{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 15.0, "global_step": 7494, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00013344008540165466, "grad_norm": 4.080643084961509, "learning_rate": 8.88888888888889e-08, "loss": 1.8736, "step": 1 }, { "epoch": 0.0002668801708033093, "grad_norm": 5.08292352366484, "learning_rate": 1.777777777777778e-07, "loss": 1.9167, "step": 2 }, { "epoch": 0.00040032025620496394, "grad_norm": 4.314269479207965, "learning_rate": 2.666666666666667e-07, "loss": 1.868, "step": 3 }, { "epoch": 0.0005337603416066186, "grad_norm": 4.5997627346122325, "learning_rate": 3.555555555555556e-07, "loss": 1.8812, "step": 4 }, { "epoch": 0.0006672004270082733, "grad_norm": 4.366746923746356, "learning_rate": 4.444444444444445e-07, "loss": 1.8816, "step": 5 }, { "epoch": 0.0008006405124099279, "grad_norm": 6.417272891063272, "learning_rate": 5.333333333333335e-07, "loss": 1.9164, "step": 6 }, { "epoch": 0.0009340805978115826, "grad_norm": 4.276397654879367, "learning_rate": 6.222222222222223e-07, "loss": 1.8699, "step": 7 }, { "epoch": 0.0010675206832132373, "grad_norm": 40.21908327926398, "learning_rate": 7.111111111111112e-07, "loss": 1.9019, "step": 8 }, { "epoch": 0.0012009607686148918, "grad_norm": 4.191772462937108, "learning_rate": 8.000000000000001e-07, "loss": 1.8959, "step": 9 }, { "epoch": 0.0013344008540165466, "grad_norm": 4.39886202719947, "learning_rate": 8.88888888888889e-07, "loss": 1.8733, "step": 10 }, { "epoch": 0.0014678409394182012, "grad_norm": 3.872194087105484, "learning_rate": 9.77777777777778e-07, "loss": 1.8535, "step": 11 }, { "epoch": 0.0016012810248198558, "grad_norm": 3.738315413208318, "learning_rate": 1.066666666666667e-06, "loss": 1.8182, "step": 12 }, { "epoch": 0.0017347211102215106, "grad_norm": 5.806590893863183, "learning_rate": 1.1555555555555556e-06, "loss": 1.841, "step": 13 }, { "epoch": 0.0018681611956231651, "grad_norm": 3.9977833987634748, "learning_rate": 1.2444444444444445e-06, "loss": 1.8551, "step": 14 }, { "epoch": 0.0020016012810248197, "grad_norm": 3.6521269631944073, "learning_rate": 1.3333333333333334e-06, "loss": 1.8873, "step": 15 }, { "epoch": 0.0021350413664264745, "grad_norm": 2.9986950145773097, "learning_rate": 1.4222222222222223e-06, "loss": 1.8, "step": 16 }, { "epoch": 0.0022684814518281293, "grad_norm": 3.1157009896387544, "learning_rate": 1.5111111111111112e-06, "loss": 1.8309, "step": 17 }, { "epoch": 0.0024019215372297837, "grad_norm": 3.7767942592540855, "learning_rate": 1.6000000000000001e-06, "loss": 1.8749, "step": 18 }, { "epoch": 0.0025353616226314385, "grad_norm": 3.480294979458449, "learning_rate": 1.688888888888889e-06, "loss": 1.8126, "step": 19 }, { "epoch": 0.0026688017080330933, "grad_norm": 2.9012896594189366, "learning_rate": 1.777777777777778e-06, "loss": 1.7983, "step": 20 }, { "epoch": 0.0028022417934347476, "grad_norm": 4.745562235186556, "learning_rate": 1.8666666666666669e-06, "loss": 1.7862, "step": 21 }, { "epoch": 0.0029356818788364024, "grad_norm": 4.88191672119221, "learning_rate": 1.955555555555556e-06, "loss": 1.7786, "step": 22 }, { "epoch": 0.003069121964238057, "grad_norm": 41.74795779070356, "learning_rate": 2.0444444444444447e-06, "loss": 1.7671, "step": 23 }, { "epoch": 0.0032025620496397116, "grad_norm": 3.02827351605321, "learning_rate": 2.133333333333334e-06, "loss": 1.821, "step": 24 }, { "epoch": 0.0033360021350413663, "grad_norm": 2.7722420080290804, "learning_rate": 2.222222222222222e-06, "loss": 1.7749, "step": 25 }, { "epoch": 0.003469442220443021, "grad_norm": 3.669445898983361, "learning_rate": 2.311111111111111e-06, "loss": 1.8066, "step": 26 }, { "epoch": 0.003602882305844676, "grad_norm": 2.928887643981388, "learning_rate": 2.4000000000000003e-06, "loss": 1.7647, "step": 27 }, { "epoch": 0.0037363223912463303, "grad_norm": 3.7543465480647957, "learning_rate": 2.488888888888889e-06, "loss": 1.8005, "step": 28 }, { "epoch": 0.003869762476647985, "grad_norm": 3.251679830837901, "learning_rate": 2.577777777777778e-06, "loss": 1.7779, "step": 29 }, { "epoch": 0.0040032025620496394, "grad_norm": 21.468244588308337, "learning_rate": 2.666666666666667e-06, "loss": 1.7694, "step": 30 }, { "epoch": 0.004136642647451294, "grad_norm": 4.000265636508565, "learning_rate": 2.755555555555556e-06, "loss": 1.7384, "step": 31 }, { "epoch": 0.004270082732852949, "grad_norm": 3.0634285680156577, "learning_rate": 2.8444444444444446e-06, "loss": 1.7271, "step": 32 }, { "epoch": 0.004403522818254604, "grad_norm": 2.406246700648424, "learning_rate": 2.9333333333333338e-06, "loss": 1.7338, "step": 33 }, { "epoch": 0.004536962903656259, "grad_norm": 2.542980268594405, "learning_rate": 3.0222222222222225e-06, "loss": 1.7457, "step": 34 }, { "epoch": 0.004670402989057913, "grad_norm": 2.7984146843006226, "learning_rate": 3.1111111111111116e-06, "loss": 1.7539, "step": 35 }, { "epoch": 0.004803843074459567, "grad_norm": 3.013085830269592, "learning_rate": 3.2000000000000003e-06, "loss": 1.6781, "step": 36 }, { "epoch": 0.004937283159861222, "grad_norm": 2.153019671959159, "learning_rate": 3.2888888888888894e-06, "loss": 1.744, "step": 37 }, { "epoch": 0.005070723245262877, "grad_norm": 2.419384656819244, "learning_rate": 3.377777777777778e-06, "loss": 1.7563, "step": 38 }, { "epoch": 0.005204163330664532, "grad_norm": 2.431242306767295, "learning_rate": 3.4666666666666672e-06, "loss": 1.7474, "step": 39 }, { "epoch": 0.0053376034160661865, "grad_norm": 2.58042762585277, "learning_rate": 3.555555555555556e-06, "loss": 1.7052, "step": 40 }, { "epoch": 0.005471043501467841, "grad_norm": 2.903485309507233, "learning_rate": 3.644444444444445e-06, "loss": 1.7613, "step": 41 }, { "epoch": 0.005604483586869495, "grad_norm": 3.1612531609911123, "learning_rate": 3.7333333333333337e-06, "loss": 1.7141, "step": 42 }, { "epoch": 0.00573792367227115, "grad_norm": 2.060069555889511, "learning_rate": 3.8222222222222224e-06, "loss": 1.6964, "step": 43 }, { "epoch": 0.005871363757672805, "grad_norm": 2.0784087729038885, "learning_rate": 3.911111111111112e-06, "loss": 1.7272, "step": 44 }, { "epoch": 0.00600480384307446, "grad_norm": 2.046333038993166, "learning_rate": 4.000000000000001e-06, "loss": 1.7155, "step": 45 }, { "epoch": 0.006138243928476114, "grad_norm": 4.060174091987868, "learning_rate": 4.088888888888889e-06, "loss": 1.7394, "step": 46 }, { "epoch": 0.006271684013877769, "grad_norm": 2.194133473755285, "learning_rate": 4.177777777777778e-06, "loss": 1.7473, "step": 47 }, { "epoch": 0.006405124099279423, "grad_norm": 2.086936433450861, "learning_rate": 4.266666666666668e-06, "loss": 1.7482, "step": 48 }, { "epoch": 0.006538564184681078, "grad_norm": 2.055176025616284, "learning_rate": 4.3555555555555555e-06, "loss": 1.6989, "step": 49 }, { "epoch": 0.006672004270082733, "grad_norm": 4.2705916173620935, "learning_rate": 4.444444444444444e-06, "loss": 1.7113, "step": 50 }, { "epoch": 0.0068054443554843875, "grad_norm": 3.062440642397753, "learning_rate": 4.533333333333334e-06, "loss": 1.6994, "step": 51 }, { "epoch": 0.006938884440886042, "grad_norm": 3.6513027631899724, "learning_rate": 4.622222222222222e-06, "loss": 1.7435, "step": 52 }, { "epoch": 0.007072324526287697, "grad_norm": 4.339915391562868, "learning_rate": 4.711111111111111e-06, "loss": 1.7093, "step": 53 }, { "epoch": 0.007205764611689352, "grad_norm": 2.300952067433821, "learning_rate": 4.800000000000001e-06, "loss": 1.6687, "step": 54 }, { "epoch": 0.007339204697091006, "grad_norm": 2.3827551772304814, "learning_rate": 4.888888888888889e-06, "loss": 1.748, "step": 55 }, { "epoch": 0.007472644782492661, "grad_norm": 1.9554846454820922, "learning_rate": 4.977777777777778e-06, "loss": 1.6707, "step": 56 }, { "epoch": 0.007606084867894315, "grad_norm": 2.208610815386933, "learning_rate": 5.0666666666666676e-06, "loss": 1.6908, "step": 57 }, { "epoch": 0.00773952495329597, "grad_norm": 2.431678252426931, "learning_rate": 5.155555555555556e-06, "loss": 1.7041, "step": 58 }, { "epoch": 0.007872965038697624, "grad_norm": 1.926978176003599, "learning_rate": 5.244444444444445e-06, "loss": 1.6928, "step": 59 }, { "epoch": 0.008006405124099279, "grad_norm": 2.75648074491875, "learning_rate": 5.333333333333334e-06, "loss": 1.7009, "step": 60 }, { "epoch": 0.008139845209500934, "grad_norm": 1.9054991774370111, "learning_rate": 5.422222222222223e-06, "loss": 1.6937, "step": 61 }, { "epoch": 0.008273285294902588, "grad_norm": 2.7419834376889174, "learning_rate": 5.511111111111112e-06, "loss": 1.7218, "step": 62 }, { "epoch": 0.008406725380304243, "grad_norm": 1.8897989454015407, "learning_rate": 5.600000000000001e-06, "loss": 1.7069, "step": 63 }, { "epoch": 0.008540165465705898, "grad_norm": 2.159740439666748, "learning_rate": 5.688888888888889e-06, "loss": 1.7143, "step": 64 }, { "epoch": 0.008673605551107553, "grad_norm": 2.1199099802628396, "learning_rate": 5.777777777777778e-06, "loss": 1.6872, "step": 65 }, { "epoch": 0.008807045636509208, "grad_norm": 2.275963855081617, "learning_rate": 5.8666666666666675e-06, "loss": 1.8008, "step": 66 }, { "epoch": 0.008940485721910862, "grad_norm": 2.208827668777784, "learning_rate": 5.955555555555555e-06, "loss": 1.686, "step": 67 }, { "epoch": 0.009073925807312517, "grad_norm": 3.117561539877217, "learning_rate": 6.044444444444445e-06, "loss": 1.6589, "step": 68 }, { "epoch": 0.009207365892714172, "grad_norm": 2.3085392935053766, "learning_rate": 6.133333333333334e-06, "loss": 1.6787, "step": 69 }, { "epoch": 0.009340805978115827, "grad_norm": 2.1582405446873554, "learning_rate": 6.222222222222223e-06, "loss": 1.7265, "step": 70 }, { "epoch": 0.00947424606351748, "grad_norm": 2.0969226907995053, "learning_rate": 6.311111111111111e-06, "loss": 1.6737, "step": 71 }, { "epoch": 0.009607686148919135, "grad_norm": 4.200722941416422, "learning_rate": 6.4000000000000006e-06, "loss": 1.7188, "step": 72 }, { "epoch": 0.00974112623432079, "grad_norm": 2.2485101560203105, "learning_rate": 6.488888888888889e-06, "loss": 1.6912, "step": 73 }, { "epoch": 0.009874566319722444, "grad_norm": 2.763908223216233, "learning_rate": 6.577777777777779e-06, "loss": 1.6571, "step": 74 }, { "epoch": 0.010008006405124099, "grad_norm": 1.9040632627929543, "learning_rate": 6.666666666666667e-06, "loss": 1.6861, "step": 75 }, { "epoch": 0.010141446490525754, "grad_norm": 28.864482563980474, "learning_rate": 6.755555555555556e-06, "loss": 1.7206, "step": 76 }, { "epoch": 0.010274886575927409, "grad_norm": 2.8893679461430732, "learning_rate": 6.844444444444445e-06, "loss": 1.651, "step": 77 }, { "epoch": 0.010408326661329063, "grad_norm": 2.6807260850078705, "learning_rate": 6.9333333333333344e-06, "loss": 1.7404, "step": 78 }, { "epoch": 0.010541766746730718, "grad_norm": 2.616916412887755, "learning_rate": 7.022222222222222e-06, "loss": 1.7453, "step": 79 }, { "epoch": 0.010675206832132373, "grad_norm": 2.030396506610222, "learning_rate": 7.111111111111112e-06, "loss": 1.6951, "step": 80 }, { "epoch": 0.010808646917534028, "grad_norm": 2.276338612685183, "learning_rate": 7.2000000000000005e-06, "loss": 1.6665, "step": 81 }, { "epoch": 0.010942087002935683, "grad_norm": 4.873556416423791, "learning_rate": 7.28888888888889e-06, "loss": 1.6902, "step": 82 }, { "epoch": 0.011075527088337337, "grad_norm": 2.480438858137466, "learning_rate": 7.377777777777778e-06, "loss": 1.6992, "step": 83 }, { "epoch": 0.01120896717373899, "grad_norm": 2.0278767294473234, "learning_rate": 7.4666666666666675e-06, "loss": 1.6769, "step": 84 }, { "epoch": 0.011342407259140645, "grad_norm": 2.7060597428983937, "learning_rate": 7.555555555555556e-06, "loss": 1.6538, "step": 85 }, { "epoch": 0.0114758473445423, "grad_norm": 2.7423909405968963, "learning_rate": 7.644444444444445e-06, "loss": 1.7279, "step": 86 }, { "epoch": 0.011609287429943955, "grad_norm": 2.2930045021509446, "learning_rate": 7.733333333333334e-06, "loss": 1.6586, "step": 87 }, { "epoch": 0.01174272751534561, "grad_norm": 1.9855479277235186, "learning_rate": 7.822222222222224e-06, "loss": 1.7009, "step": 88 }, { "epoch": 0.011876167600747264, "grad_norm": 2.7213236928751554, "learning_rate": 7.911111111111112e-06, "loss": 1.6854, "step": 89 }, { "epoch": 0.01200960768614892, "grad_norm": 2.7306900665495757, "learning_rate": 8.000000000000001e-06, "loss": 1.7225, "step": 90 }, { "epoch": 0.012143047771550574, "grad_norm": 2.107447235822766, "learning_rate": 8.08888888888889e-06, "loss": 1.7019, "step": 91 }, { "epoch": 0.012276487856952229, "grad_norm": 2.570127439213928, "learning_rate": 8.177777777777779e-06, "loss": 1.6494, "step": 92 }, { "epoch": 0.012409927942353884, "grad_norm": 1.8760712243264193, "learning_rate": 8.266666666666667e-06, "loss": 1.6933, "step": 93 }, { "epoch": 0.012543368027755538, "grad_norm": 2.1290912423121253, "learning_rate": 8.355555555555556e-06, "loss": 1.6705, "step": 94 }, { "epoch": 0.012676808113157193, "grad_norm": 2.488673234475764, "learning_rate": 8.444444444444446e-06, "loss": 1.7204, "step": 95 }, { "epoch": 0.012810248198558846, "grad_norm": 2.1122071965215015, "learning_rate": 8.533333333333335e-06, "loss": 1.7021, "step": 96 }, { "epoch": 0.012943688283960501, "grad_norm": 2.200704929120579, "learning_rate": 8.622222222222223e-06, "loss": 1.6436, "step": 97 }, { "epoch": 0.013077128369362156, "grad_norm": 2.2007715935735606, "learning_rate": 8.711111111111111e-06, "loss": 1.6646, "step": 98 }, { "epoch": 0.01321056845476381, "grad_norm": 2.2699598373044383, "learning_rate": 8.8e-06, "loss": 1.6495, "step": 99 }, { "epoch": 0.013344008540165465, "grad_norm": 2.328713098955483, "learning_rate": 8.888888888888888e-06, "loss": 1.7009, "step": 100 }, { "epoch": 0.01347744862556712, "grad_norm": 2.218972713645272, "learning_rate": 8.977777777777778e-06, "loss": 1.7082, "step": 101 }, { "epoch": 0.013610888710968775, "grad_norm": 2.618934756244986, "learning_rate": 9.066666666666667e-06, "loss": 1.6985, "step": 102 }, { "epoch": 0.01374432879637043, "grad_norm": 3.2314521781408074, "learning_rate": 9.155555555555557e-06, "loss": 1.7344, "step": 103 }, { "epoch": 0.013877768881772085, "grad_norm": 3.697972156481625, "learning_rate": 9.244444444444445e-06, "loss": 1.683, "step": 104 }, { "epoch": 0.01401120896717374, "grad_norm": 2.8531262360061698, "learning_rate": 9.333333333333334e-06, "loss": 1.6776, "step": 105 }, { "epoch": 0.014144649052575394, "grad_norm": 39.794103633027675, "learning_rate": 9.422222222222222e-06, "loss": 1.7134, "step": 106 }, { "epoch": 0.014278089137977049, "grad_norm": 2.9264673716827954, "learning_rate": 9.511111111111112e-06, "loss": 1.6754, "step": 107 }, { "epoch": 0.014411529223378704, "grad_norm": 2.414594446024647, "learning_rate": 9.600000000000001e-06, "loss": 1.6917, "step": 108 }, { "epoch": 0.014544969308780357, "grad_norm": 2.792320442984017, "learning_rate": 9.688888888888889e-06, "loss": 1.6453, "step": 109 }, { "epoch": 0.014678409394182012, "grad_norm": 3.358788179969882, "learning_rate": 9.777777777777779e-06, "loss": 1.713, "step": 110 }, { "epoch": 0.014811849479583666, "grad_norm": 2.044760856305306, "learning_rate": 9.866666666666668e-06, "loss": 1.6851, "step": 111 }, { "epoch": 0.014945289564985321, "grad_norm": 2.4828242965545386, "learning_rate": 9.955555555555556e-06, "loss": 1.7159, "step": 112 }, { "epoch": 0.015078729650386976, "grad_norm": 46.56955438722204, "learning_rate": 1.0044444444444446e-05, "loss": 1.7581, "step": 113 }, { "epoch": 0.01521216973578863, "grad_norm": 4.705272454001238, "learning_rate": 1.0133333333333335e-05, "loss": 1.6542, "step": 114 }, { "epoch": 0.015345609821190286, "grad_norm": 7.73608658858232, "learning_rate": 1.0222222222222223e-05, "loss": 1.6793, "step": 115 }, { "epoch": 0.01547904990659194, "grad_norm": 5.229243538124936, "learning_rate": 1.0311111111111113e-05, "loss": 1.6866, "step": 116 }, { "epoch": 0.015612489991993595, "grad_norm": 4.340274190286637, "learning_rate": 1.04e-05, "loss": 1.7025, "step": 117 }, { "epoch": 0.015745930077395248, "grad_norm": 2.211270582628492, "learning_rate": 1.048888888888889e-05, "loss": 1.6903, "step": 118 }, { "epoch": 0.015879370162796905, "grad_norm": 2.2975450110438334, "learning_rate": 1.0577777777777778e-05, "loss": 1.7088, "step": 119 }, { "epoch": 0.016012810248198558, "grad_norm": 17.328579385026632, "learning_rate": 1.0666666666666667e-05, "loss": 1.7141, "step": 120 }, { "epoch": 0.016146250333600214, "grad_norm": 3.877950326292574, "learning_rate": 1.0755555555555557e-05, "loss": 1.6759, "step": 121 }, { "epoch": 0.016279690419001867, "grad_norm": 4.011366710334245, "learning_rate": 1.0844444444444446e-05, "loss": 1.699, "step": 122 }, { "epoch": 0.016413130504403524, "grad_norm": 3.2602156984936417, "learning_rate": 1.0933333333333334e-05, "loss": 1.7003, "step": 123 }, { "epoch": 0.016546570589805177, "grad_norm": 2.630659747532619, "learning_rate": 1.1022222222222224e-05, "loss": 1.6344, "step": 124 }, { "epoch": 0.016680010675206833, "grad_norm": 2.458919675792434, "learning_rate": 1.1111111111111113e-05, "loss": 1.6945, "step": 125 }, { "epoch": 0.016813450760608487, "grad_norm": 3.984975390451759, "learning_rate": 1.1200000000000001e-05, "loss": 1.7746, "step": 126 }, { "epoch": 0.016946890846010143, "grad_norm": 3.8315471470735787, "learning_rate": 1.1288888888888889e-05, "loss": 1.6974, "step": 127 }, { "epoch": 0.017080330931411796, "grad_norm": 2.499188895634502, "learning_rate": 1.1377777777777779e-05, "loss": 1.6892, "step": 128 }, { "epoch": 0.01721377101681345, "grad_norm": 2.0452097919611005, "learning_rate": 1.1466666666666668e-05, "loss": 1.7397, "step": 129 }, { "epoch": 0.017347211102215106, "grad_norm": 2.298966367562689, "learning_rate": 1.1555555555555556e-05, "loss": 1.6836, "step": 130 }, { "epoch": 0.01748065118761676, "grad_norm": 2.5420025536951942, "learning_rate": 1.1644444444444446e-05, "loss": 1.7454, "step": 131 }, { "epoch": 0.017614091273018415, "grad_norm": 13.604138953612503, "learning_rate": 1.1733333333333335e-05, "loss": 1.7288, "step": 132 }, { "epoch": 0.01774753135842007, "grad_norm": 2.3801527003336487, "learning_rate": 1.1822222222222225e-05, "loss": 1.6876, "step": 133 }, { "epoch": 0.017880971443821725, "grad_norm": 2.460244538839727, "learning_rate": 1.191111111111111e-05, "loss": 1.7061, "step": 134 }, { "epoch": 0.018014411529223378, "grad_norm": 2.4874810432246095, "learning_rate": 1.2e-05, "loss": 1.6963, "step": 135 }, { "epoch": 0.018147851614625034, "grad_norm": 2.409965122688259, "learning_rate": 1.208888888888889e-05, "loss": 1.7093, "step": 136 }, { "epoch": 0.018281291700026688, "grad_norm": 2.5548220927651393, "learning_rate": 1.217777777777778e-05, "loss": 1.6531, "step": 137 }, { "epoch": 0.018414731785428344, "grad_norm": 2.7637181173708676, "learning_rate": 1.2266666666666667e-05, "loss": 1.6484, "step": 138 }, { "epoch": 0.018548171870829997, "grad_norm": 2.108441938061504, "learning_rate": 1.2355555555555557e-05, "loss": 1.6831, "step": 139 }, { "epoch": 0.018681611956231654, "grad_norm": 1.843873869065762, "learning_rate": 1.2444444444444446e-05, "loss": 1.7209, "step": 140 }, { "epoch": 0.018815052041633307, "grad_norm": 1.852059082824261, "learning_rate": 1.2533333333333336e-05, "loss": 1.6356, "step": 141 }, { "epoch": 0.01894849212703496, "grad_norm": 1.8903070568688063, "learning_rate": 1.2622222222222222e-05, "loss": 1.666, "step": 142 }, { "epoch": 0.019081932212436616, "grad_norm": 2.7546690000389145, "learning_rate": 1.2711111111111112e-05, "loss": 1.6859, "step": 143 }, { "epoch": 0.01921537229783827, "grad_norm": 1.9120514050425437, "learning_rate": 1.2800000000000001e-05, "loss": 1.616, "step": 144 }, { "epoch": 0.019348812383239926, "grad_norm": 1.9059883225343819, "learning_rate": 1.288888888888889e-05, "loss": 1.7452, "step": 145 }, { "epoch": 0.01948225246864158, "grad_norm": 1.8505183157858194, "learning_rate": 1.2977777777777779e-05, "loss": 1.6603, "step": 146 }, { "epoch": 0.019615692554043235, "grad_norm": 3.2853784344984027, "learning_rate": 1.3066666666666668e-05, "loss": 1.6544, "step": 147 }, { "epoch": 0.01974913263944489, "grad_norm": 2.09698441082926, "learning_rate": 1.3155555555555558e-05, "loss": 1.725, "step": 148 }, { "epoch": 0.019882572724846545, "grad_norm": 2.244317832313048, "learning_rate": 1.3244444444444447e-05, "loss": 1.6843, "step": 149 }, { "epoch": 0.020016012810248198, "grad_norm": 1.906767252092642, "learning_rate": 1.3333333333333333e-05, "loss": 1.6561, "step": 150 }, { "epoch": 0.020149452895649855, "grad_norm": 13.009656729569588, "learning_rate": 1.3422222222222223e-05, "loss": 1.717, "step": 151 }, { "epoch": 0.020282892981051508, "grad_norm": 1.939458338988826, "learning_rate": 1.3511111111111112e-05, "loss": 1.6941, "step": 152 }, { "epoch": 0.020416333066453164, "grad_norm": 1.9081248684144936, "learning_rate": 1.3600000000000002e-05, "loss": 1.7335, "step": 153 }, { "epoch": 0.020549773151854817, "grad_norm": 3.46048722855863, "learning_rate": 1.368888888888889e-05, "loss": 1.6904, "step": 154 }, { "epoch": 0.02068321323725647, "grad_norm": 3.2150826776743093, "learning_rate": 1.377777777777778e-05, "loss": 1.7326, "step": 155 }, { "epoch": 0.020816653322658127, "grad_norm": 2.682239347552553, "learning_rate": 1.3866666666666669e-05, "loss": 1.6991, "step": 156 }, { "epoch": 0.02095009340805978, "grad_norm": 2.9892152568563337, "learning_rate": 1.3955555555555558e-05, "loss": 1.6674, "step": 157 }, { "epoch": 0.021083533493461436, "grad_norm": 1.9370449545076165, "learning_rate": 1.4044444444444445e-05, "loss": 1.7093, "step": 158 }, { "epoch": 0.02121697357886309, "grad_norm": 2.8348396699033085, "learning_rate": 1.4133333333333334e-05, "loss": 1.6561, "step": 159 }, { "epoch": 0.021350413664264746, "grad_norm": 5.362105984011789, "learning_rate": 1.4222222222222224e-05, "loss": 1.7225, "step": 160 }, { "epoch": 0.0214838537496664, "grad_norm": 2.2489346309856972, "learning_rate": 1.4311111111111111e-05, "loss": 1.7205, "step": 161 }, { "epoch": 0.021617293835068056, "grad_norm": 2.5792958960212813, "learning_rate": 1.4400000000000001e-05, "loss": 1.7106, "step": 162 }, { "epoch": 0.02175073392046971, "grad_norm": 2.799373167012452, "learning_rate": 1.448888888888889e-05, "loss": 1.7011, "step": 163 }, { "epoch": 0.021884174005871365, "grad_norm": 2.7270269575589587, "learning_rate": 1.457777777777778e-05, "loss": 1.7108, "step": 164 }, { "epoch": 0.022017614091273018, "grad_norm": 2.14261161418117, "learning_rate": 1.4666666666666666e-05, "loss": 1.6779, "step": 165 }, { "epoch": 0.022151054176674675, "grad_norm": 1.9467025797614437, "learning_rate": 1.4755555555555556e-05, "loss": 1.7194, "step": 166 }, { "epoch": 0.022284494262076328, "grad_norm": 2.0951052041172034, "learning_rate": 1.4844444444444445e-05, "loss": 1.7088, "step": 167 }, { "epoch": 0.02241793434747798, "grad_norm": 1.792897219038739, "learning_rate": 1.4933333333333335e-05, "loss": 1.6647, "step": 168 }, { "epoch": 0.022551374432879637, "grad_norm": 2.057396723030578, "learning_rate": 1.5022222222222223e-05, "loss": 1.6376, "step": 169 }, { "epoch": 0.02268481451828129, "grad_norm": 1.7777783661568858, "learning_rate": 1.5111111111111112e-05, "loss": 1.7434, "step": 170 }, { "epoch": 0.022818254603682947, "grad_norm": 2.583723235252364, "learning_rate": 1.5200000000000002e-05, "loss": 1.6701, "step": 171 }, { "epoch": 0.0229516946890846, "grad_norm": 2.708960473066722, "learning_rate": 1.528888888888889e-05, "loss": 1.7109, "step": 172 }, { "epoch": 0.023085134774486257, "grad_norm": 2.179043118704419, "learning_rate": 1.537777777777778e-05, "loss": 1.6601, "step": 173 }, { "epoch": 0.02321857485988791, "grad_norm": 30.22918083463127, "learning_rate": 1.546666666666667e-05, "loss": 1.8007, "step": 174 }, { "epoch": 0.023352014945289566, "grad_norm": 2.230452217009043, "learning_rate": 1.555555555555556e-05, "loss": 1.7362, "step": 175 }, { "epoch": 0.02348545503069122, "grad_norm": 2.9922017986999276, "learning_rate": 1.5644444444444448e-05, "loss": 1.7398, "step": 176 }, { "epoch": 0.023618895116092876, "grad_norm": 3.914186394323879, "learning_rate": 1.5733333333333334e-05, "loss": 1.7256, "step": 177 }, { "epoch": 0.02375233520149453, "grad_norm": 1.9015888556101328, "learning_rate": 1.5822222222222224e-05, "loss": 1.6762, "step": 178 }, { "epoch": 0.023885775286896185, "grad_norm": 1.7266594765283543, "learning_rate": 1.5911111111111113e-05, "loss": 1.672, "step": 179 }, { "epoch": 0.02401921537229784, "grad_norm": 2.070151343179939, "learning_rate": 1.6000000000000003e-05, "loss": 1.7933, "step": 180 }, { "epoch": 0.02415265545769949, "grad_norm": 2.51353756811448, "learning_rate": 1.608888888888889e-05, "loss": 1.6875, "step": 181 }, { "epoch": 0.024286095543101148, "grad_norm": 11.935615730367939, "learning_rate": 1.617777777777778e-05, "loss": 1.7512, "step": 182 }, { "epoch": 0.0244195356285028, "grad_norm": 3.9274893534399635, "learning_rate": 1.6266666666666668e-05, "loss": 1.6877, "step": 183 }, { "epoch": 0.024552975713904458, "grad_norm": 2.8888619941000377, "learning_rate": 1.6355555555555557e-05, "loss": 1.6988, "step": 184 }, { "epoch": 0.02468641579930611, "grad_norm": 2.335255800684397, "learning_rate": 1.6444444444444444e-05, "loss": 1.7197, "step": 185 }, { "epoch": 0.024819855884707767, "grad_norm": 2.132564305791294, "learning_rate": 1.6533333333333333e-05, "loss": 1.7189, "step": 186 }, { "epoch": 0.02495329597010942, "grad_norm": 2.344308980206617, "learning_rate": 1.6622222222222223e-05, "loss": 1.6521, "step": 187 }, { "epoch": 0.025086736055511077, "grad_norm": 1.7158444729386582, "learning_rate": 1.6711111111111112e-05, "loss": 1.7004, "step": 188 }, { "epoch": 0.02522017614091273, "grad_norm": 3.0317644581837344, "learning_rate": 1.6800000000000002e-05, "loss": 1.7271, "step": 189 }, { "epoch": 0.025353616226314386, "grad_norm": 1.9353267422280926, "learning_rate": 1.688888888888889e-05, "loss": 1.6795, "step": 190 }, { "epoch": 0.02548705631171604, "grad_norm": 1.7304826902324242, "learning_rate": 1.697777777777778e-05, "loss": 1.6588, "step": 191 }, { "epoch": 0.025620496397117692, "grad_norm": 1.9159549442018844, "learning_rate": 1.706666666666667e-05, "loss": 1.7432, "step": 192 }, { "epoch": 0.02575393648251935, "grad_norm": 1.8910667470096478, "learning_rate": 1.7155555555555557e-05, "loss": 1.7513, "step": 193 }, { "epoch": 0.025887376567921002, "grad_norm": 2.569829238717817, "learning_rate": 1.7244444444444446e-05, "loss": 1.6466, "step": 194 }, { "epoch": 0.02602081665332266, "grad_norm": 2.078018415198727, "learning_rate": 1.7333333333333336e-05, "loss": 1.7358, "step": 195 }, { "epoch": 0.02615425673872431, "grad_norm": 1.8817118359807499, "learning_rate": 1.7422222222222222e-05, "loss": 1.7516, "step": 196 }, { "epoch": 0.026287696824125968, "grad_norm": 1.77364702683218, "learning_rate": 1.751111111111111e-05, "loss": 1.689, "step": 197 }, { "epoch": 0.02642113690952762, "grad_norm": 2.2161414310718466, "learning_rate": 1.76e-05, "loss": 1.7242, "step": 198 }, { "epoch": 0.026554576994929278, "grad_norm": 2.0643613848156392, "learning_rate": 1.768888888888889e-05, "loss": 1.6685, "step": 199 }, { "epoch": 0.02668801708033093, "grad_norm": 14.87718153231451, "learning_rate": 1.7777777777777777e-05, "loss": 1.6533, "step": 200 }, { "epoch": 0.026821457165732587, "grad_norm": 2.272950745857619, "learning_rate": 1.7866666666666666e-05, "loss": 1.7366, "step": 201 }, { "epoch": 0.02695489725113424, "grad_norm": 2.8727770433260242, "learning_rate": 1.7955555555555556e-05, "loss": 1.7315, "step": 202 }, { "epoch": 0.027088337336535897, "grad_norm": 2.3209623388005958, "learning_rate": 1.8044444444444445e-05, "loss": 1.7321, "step": 203 }, { "epoch": 0.02722177742193755, "grad_norm": 1.897431656822105, "learning_rate": 1.8133333333333335e-05, "loss": 1.7697, "step": 204 }, { "epoch": 0.027355217507339203, "grad_norm": 2.083186577685706, "learning_rate": 1.8222222222222224e-05, "loss": 1.7103, "step": 205 }, { "epoch": 0.02748865759274086, "grad_norm": 2.4252062792709395, "learning_rate": 1.8311111111111114e-05, "loss": 1.7401, "step": 206 }, { "epoch": 0.027622097678142513, "grad_norm": 5.5398864573778726, "learning_rate": 1.8400000000000003e-05, "loss": 1.7618, "step": 207 }, { "epoch": 0.02775553776354417, "grad_norm": 2.12561692151565, "learning_rate": 1.848888888888889e-05, "loss": 1.698, "step": 208 }, { "epoch": 0.027888977848945822, "grad_norm": 2.122353896017312, "learning_rate": 1.857777777777778e-05, "loss": 1.7519, "step": 209 }, { "epoch": 0.02802241793434748, "grad_norm": 1.946448922291358, "learning_rate": 1.866666666666667e-05, "loss": 1.6719, "step": 210 }, { "epoch": 0.028155858019749132, "grad_norm": 2.4301306367972813, "learning_rate": 1.8755555555555558e-05, "loss": 1.7039, "step": 211 }, { "epoch": 0.02828929810515079, "grad_norm": 1.841018211290893, "learning_rate": 1.8844444444444444e-05, "loss": 1.7406, "step": 212 }, { "epoch": 0.02842273819055244, "grad_norm": 1.9703359299596948, "learning_rate": 1.8933333333333334e-05, "loss": 1.6828, "step": 213 }, { "epoch": 0.028556178275954098, "grad_norm": 2.0318091145704646, "learning_rate": 1.9022222222222223e-05, "loss": 1.6784, "step": 214 }, { "epoch": 0.02868961836135575, "grad_norm": 1.8820707489427722, "learning_rate": 1.9111111111111113e-05, "loss": 1.7024, "step": 215 }, { "epoch": 0.028823058446757407, "grad_norm": 2.4241888613922145, "learning_rate": 1.9200000000000003e-05, "loss": 1.685, "step": 216 }, { "epoch": 0.02895649853215906, "grad_norm": 2.2092388050397456, "learning_rate": 1.928888888888889e-05, "loss": 1.7553, "step": 217 }, { "epoch": 0.029089938617560714, "grad_norm": 2.165125467884484, "learning_rate": 1.9377777777777778e-05, "loss": 1.6919, "step": 218 }, { "epoch": 0.02922337870296237, "grad_norm": 2.4113038080825624, "learning_rate": 1.9466666666666668e-05, "loss": 1.6618, "step": 219 }, { "epoch": 0.029356818788364023, "grad_norm": 2.3316936837660576, "learning_rate": 1.9555555555555557e-05, "loss": 1.7472, "step": 220 }, { "epoch": 0.02949025887376568, "grad_norm": 3.2727415100596855, "learning_rate": 1.9644444444444447e-05, "loss": 1.6679, "step": 221 }, { "epoch": 0.029623698959167333, "grad_norm": 2.0492799095337784, "learning_rate": 1.9733333333333336e-05, "loss": 1.7506, "step": 222 }, { "epoch": 0.02975713904456899, "grad_norm": 2.576771291880995, "learning_rate": 1.9822222222222226e-05, "loss": 1.737, "step": 223 }, { "epoch": 0.029890579129970642, "grad_norm": 3.2741913701257257, "learning_rate": 1.9911111111111112e-05, "loss": 1.7066, "step": 224 }, { "epoch": 0.0300240192153723, "grad_norm": 2.001886242005137, "learning_rate": 2e-05, "loss": 1.7444, "step": 225 }, { "epoch": 0.030157459300773952, "grad_norm": 2.6698368689644334, "learning_rate": 1.9999999066056927e-05, "loss": 1.6765, "step": 226 }, { "epoch": 0.03029089938617561, "grad_norm": 3.3466160854176628, "learning_rate": 1.9999996264227883e-05, "loss": 1.7286, "step": 227 }, { "epoch": 0.03042433947157726, "grad_norm": 2.393225998646352, "learning_rate": 1.999999159451339e-05, "loss": 1.7539, "step": 228 }, { "epoch": 0.030557779556978918, "grad_norm": 2.5849019495304777, "learning_rate": 1.9999985056914326e-05, "loss": 1.7202, "step": 229 }, { "epoch": 0.03069121964238057, "grad_norm": 2.761192723900784, "learning_rate": 1.9999976651431904e-05, "loss": 1.7445, "step": 230 }, { "epoch": 0.030824659727782224, "grad_norm": 2.440029715237141, "learning_rate": 1.9999966378067696e-05, "loss": 1.7564, "step": 231 }, { "epoch": 0.03095809981318388, "grad_norm": 2.0847710036036307, "learning_rate": 1.9999954236823625e-05, "loss": 1.7053, "step": 232 }, { "epoch": 0.031091539898585534, "grad_norm": 2.097727759259449, "learning_rate": 1.9999940227701952e-05, "loss": 1.7045, "step": 233 }, { "epoch": 0.03122497998398719, "grad_norm": 15.81909104706428, "learning_rate": 1.99999243507053e-05, "loss": 1.8102, "step": 234 }, { "epoch": 0.03135842006938885, "grad_norm": 2.223134069922936, "learning_rate": 1.9999906605836637e-05, "loss": 1.738, "step": 235 }, { "epoch": 0.031491860154790496, "grad_norm": 2.3651382675222528, "learning_rate": 1.9999886993099273e-05, "loss": 1.7467, "step": 236 }, { "epoch": 0.03162530024019215, "grad_norm": 1.8263105543794043, "learning_rate": 1.9999865512496867e-05, "loss": 1.7548, "step": 237 }, { "epoch": 0.03175874032559381, "grad_norm": 3.9716970412217805, "learning_rate": 1.9999842164033435e-05, "loss": 1.7206, "step": 238 }, { "epoch": 0.031892180410995466, "grad_norm": 2.0120218395868634, "learning_rate": 1.9999816947713345e-05, "loss": 1.7401, "step": 239 }, { "epoch": 0.032025620496397116, "grad_norm": 2.3384151798445036, "learning_rate": 1.9999789863541295e-05, "loss": 1.7301, "step": 240 }, { "epoch": 0.03215906058179877, "grad_norm": 3.940580388558925, "learning_rate": 1.9999760911522355e-05, "loss": 1.6965, "step": 241 }, { "epoch": 0.03229250066720043, "grad_norm": 1.8224155291794617, "learning_rate": 1.9999730091661928e-05, "loss": 1.6705, "step": 242 }, { "epoch": 0.03242594075260208, "grad_norm": 1.9045524865773107, "learning_rate": 1.999969740396577e-05, "loss": 1.7543, "step": 243 }, { "epoch": 0.032559380838003735, "grad_norm": 1.6984600198344175, "learning_rate": 1.9999662848439988e-05, "loss": 1.7126, "step": 244 }, { "epoch": 0.03269282092340539, "grad_norm": 1.9269958388703856, "learning_rate": 1.9999626425091035e-05, "loss": 1.7016, "step": 245 }, { "epoch": 0.03282626100880705, "grad_norm": 2.4719605600430095, "learning_rate": 1.9999588133925715e-05, "loss": 1.733, "step": 246 }, { "epoch": 0.0329597010942087, "grad_norm": 2.129823068185752, "learning_rate": 1.9999547974951184e-05, "loss": 1.6882, "step": 247 }, { "epoch": 0.033093141179610354, "grad_norm": 2.0555832511145145, "learning_rate": 1.9999505948174943e-05, "loss": 1.7154, "step": 248 }, { "epoch": 0.03322658126501201, "grad_norm": 1.8053396394120758, "learning_rate": 1.9999462053604836e-05, "loss": 1.6796, "step": 249 }, { "epoch": 0.03336002135041367, "grad_norm": 5.575233121908451, "learning_rate": 1.9999416291249064e-05, "loss": 1.7884, "step": 250 }, { "epoch": 0.03349346143581532, "grad_norm": 2.2350909040289673, "learning_rate": 1.9999368661116177e-05, "loss": 1.7342, "step": 251 }, { "epoch": 0.03362690152121697, "grad_norm": 1.6497870642338468, "learning_rate": 1.9999319163215075e-05, "loss": 1.71, "step": 252 }, { "epoch": 0.03376034160661863, "grad_norm": 1.993739573845127, "learning_rate": 1.9999267797554995e-05, "loss": 1.7162, "step": 253 }, { "epoch": 0.033893781692020286, "grad_norm": 1.8123408280559632, "learning_rate": 1.999921456414554e-05, "loss": 1.7696, "step": 254 }, { "epoch": 0.034027221777421936, "grad_norm": 1.6594530678698969, "learning_rate": 1.9999159462996646e-05, "loss": 1.6971, "step": 255 }, { "epoch": 0.03416066186282359, "grad_norm": 1.7999533008081343, "learning_rate": 1.9999102494118613e-05, "loss": 1.7056, "step": 256 }, { "epoch": 0.03429410194822525, "grad_norm": 1.5795266580620457, "learning_rate": 1.999904365752208e-05, "loss": 1.7091, "step": 257 }, { "epoch": 0.0344275420336269, "grad_norm": 2.9298656896602355, "learning_rate": 1.9998982953218028e-05, "loss": 1.7193, "step": 258 }, { "epoch": 0.034560982119028555, "grad_norm": 1.764460603536837, "learning_rate": 1.9998920381217807e-05, "loss": 1.7333, "step": 259 }, { "epoch": 0.03469442220443021, "grad_norm": 2.3109901205414314, "learning_rate": 1.99988559415331e-05, "loss": 1.7506, "step": 260 }, { "epoch": 0.03482786228983187, "grad_norm": 10.94371168313979, "learning_rate": 1.9998789634175945e-05, "loss": 1.7554, "step": 261 }, { "epoch": 0.03496130237523352, "grad_norm": 1.8407021375815145, "learning_rate": 1.999872145915873e-05, "loss": 1.7242, "step": 262 }, { "epoch": 0.035094742460635174, "grad_norm": 2.974143978244417, "learning_rate": 1.999865141649418e-05, "loss": 1.7209, "step": 263 }, { "epoch": 0.03522818254603683, "grad_norm": 2.2722279313309475, "learning_rate": 1.9998579506195384e-05, "loss": 1.7143, "step": 264 }, { "epoch": 0.03536162263143849, "grad_norm": 1.6990878341793165, "learning_rate": 1.999850572827578e-05, "loss": 1.7197, "step": 265 }, { "epoch": 0.03549506271684014, "grad_norm": 1.6642794979330895, "learning_rate": 1.999843008274914e-05, "loss": 1.6542, "step": 266 }, { "epoch": 0.03562850280224179, "grad_norm": 2.007102879639598, "learning_rate": 1.9998352569629596e-05, "loss": 1.7283, "step": 267 }, { "epoch": 0.03576194288764345, "grad_norm": 1.603072629822543, "learning_rate": 1.9998273188931628e-05, "loss": 1.6958, "step": 268 }, { "epoch": 0.0358953829730451, "grad_norm": 1.83795235651287, "learning_rate": 1.9998191940670068e-05, "loss": 1.7116, "step": 269 }, { "epoch": 0.036028823058446756, "grad_norm": 1.9425709100774724, "learning_rate": 1.999810882486008e-05, "loss": 1.7268, "step": 270 }, { "epoch": 0.03616226314384841, "grad_norm": 1.6308935939887939, "learning_rate": 1.99980238415172e-05, "loss": 1.7157, "step": 271 }, { "epoch": 0.03629570322925007, "grad_norm": 1.668857008376373, "learning_rate": 1.99979369906573e-05, "loss": 1.7032, "step": 272 }, { "epoch": 0.03642914331465172, "grad_norm": 2.1917958281972925, "learning_rate": 1.9997848272296594e-05, "loss": 1.666, "step": 273 }, { "epoch": 0.036562583400053375, "grad_norm": 2.7045086446698567, "learning_rate": 1.9997757686451668e-05, "loss": 1.7166, "step": 274 }, { "epoch": 0.03669602348545503, "grad_norm": 1.776827654276239, "learning_rate": 1.9997665233139433e-05, "loss": 1.7162, "step": 275 }, { "epoch": 0.03682946357085669, "grad_norm": 1.8454111862717217, "learning_rate": 1.999757091237716e-05, "loss": 1.7304, "step": 276 }, { "epoch": 0.03696290365625834, "grad_norm": 1.7745973768422731, "learning_rate": 1.9997474724182465e-05, "loss": 1.7309, "step": 277 }, { "epoch": 0.037096343741659994, "grad_norm": 1.768404847121436, "learning_rate": 1.9997376668573317e-05, "loss": 1.7116, "step": 278 }, { "epoch": 0.03722978382706165, "grad_norm": 1.6641524050216268, "learning_rate": 1.9997276745568036e-05, "loss": 1.718, "step": 279 }, { "epoch": 0.03736322391246331, "grad_norm": 2.2160286554301094, "learning_rate": 1.9997174955185276e-05, "loss": 1.6983, "step": 280 }, { "epoch": 0.03749666399786496, "grad_norm": 1.830594152900142, "learning_rate": 1.9997071297444062e-05, "loss": 1.7126, "step": 281 }, { "epoch": 0.03763010408326661, "grad_norm": 1.9590317837706277, "learning_rate": 1.9996965772363747e-05, "loss": 1.7766, "step": 282 }, { "epoch": 0.03776354416866827, "grad_norm": 17.291654078400484, "learning_rate": 1.999685837996405e-05, "loss": 1.7718, "step": 283 }, { "epoch": 0.03789698425406992, "grad_norm": 1.9837536918624412, "learning_rate": 1.9996749120265023e-05, "loss": 1.7488, "step": 284 }, { "epoch": 0.038030424339471576, "grad_norm": 2.143999288457606, "learning_rate": 1.9996637993287072e-05, "loss": 1.756, "step": 285 }, { "epoch": 0.03816386442487323, "grad_norm": 2.0362899269691144, "learning_rate": 1.9996524999050966e-05, "loss": 1.7582, "step": 286 }, { "epoch": 0.03829730451027489, "grad_norm": 1.90769408022856, "learning_rate": 1.9996410137577806e-05, "loss": 1.7833, "step": 287 }, { "epoch": 0.03843074459567654, "grad_norm": 1.8427410428015334, "learning_rate": 1.9996293408889046e-05, "loss": 1.746, "step": 288 }, { "epoch": 0.038564184681078195, "grad_norm": 1.6678979095618587, "learning_rate": 1.9996174813006488e-05, "loss": 1.7362, "step": 289 }, { "epoch": 0.03869762476647985, "grad_norm": 1.8342284170270304, "learning_rate": 1.9996054349952283e-05, "loss": 1.7476, "step": 290 }, { "epoch": 0.03883106485188151, "grad_norm": 1.71509522487389, "learning_rate": 1.999593201974894e-05, "loss": 1.7033, "step": 291 }, { "epoch": 0.03896450493728316, "grad_norm": 1.6721230982503732, "learning_rate": 1.9995807822419296e-05, "loss": 1.7231, "step": 292 }, { "epoch": 0.039097945022684814, "grad_norm": 1.6080547725886836, "learning_rate": 1.9995681757986563e-05, "loss": 1.7517, "step": 293 }, { "epoch": 0.03923138510808647, "grad_norm": 3.219421965283889, "learning_rate": 1.9995553826474282e-05, "loss": 1.771, "step": 294 }, { "epoch": 0.03936482519348812, "grad_norm": 1.6715306645770436, "learning_rate": 1.9995424027906348e-05, "loss": 1.7727, "step": 295 }, { "epoch": 0.03949826527888978, "grad_norm": 2.0944536026816047, "learning_rate": 1.999529236230701e-05, "loss": 1.6944, "step": 296 }, { "epoch": 0.039631705364291434, "grad_norm": 1.8183245740053557, "learning_rate": 1.9995158829700857e-05, "loss": 1.7002, "step": 297 }, { "epoch": 0.03976514544969309, "grad_norm": 1.5947407157115723, "learning_rate": 1.9995023430112838e-05, "loss": 1.694, "step": 298 }, { "epoch": 0.03989858553509474, "grad_norm": 1.732145875042339, "learning_rate": 1.9994886163568234e-05, "loss": 1.7074, "step": 299 }, { "epoch": 0.040032025620496396, "grad_norm": 1.9283377201797984, "learning_rate": 1.9994747030092694e-05, "loss": 1.6803, "step": 300 }, { "epoch": 0.04016546570589805, "grad_norm": 2.0214640496563603, "learning_rate": 1.9994606029712204e-05, "loss": 1.6983, "step": 301 }, { "epoch": 0.04029890579129971, "grad_norm": 1.6234729327561015, "learning_rate": 1.9994463162453098e-05, "loss": 1.7016, "step": 302 }, { "epoch": 0.04043234587670136, "grad_norm": 1.9270631511486271, "learning_rate": 1.9994318428342066e-05, "loss": 1.7469, "step": 303 }, { "epoch": 0.040565785962103015, "grad_norm": 1.6310686042908182, "learning_rate": 1.9994171827406143e-05, "loss": 1.7147, "step": 304 }, { "epoch": 0.04069922604750467, "grad_norm": 1.6867380171369333, "learning_rate": 1.999402335967271e-05, "loss": 1.7216, "step": 305 }, { "epoch": 0.04083266613290633, "grad_norm": 2.1857500165121295, "learning_rate": 1.99938730251695e-05, "loss": 1.7171, "step": 306 }, { "epoch": 0.04096610621830798, "grad_norm": 2.3135110454799332, "learning_rate": 1.999372082392459e-05, "loss": 1.7334, "step": 307 }, { "epoch": 0.041099546303709635, "grad_norm": 1.589398670927812, "learning_rate": 1.9993566755966414e-05, "loss": 1.6869, "step": 308 }, { "epoch": 0.04123298638911129, "grad_norm": 2.4365266348869428, "learning_rate": 1.999341082132375e-05, "loss": 1.7227, "step": 309 }, { "epoch": 0.04136642647451294, "grad_norm": 1.6296732392377482, "learning_rate": 1.999325302002572e-05, "loss": 1.7589, "step": 310 }, { "epoch": 0.0414998665599146, "grad_norm": 2.305895115990403, "learning_rate": 1.999309335210181e-05, "loss": 1.6795, "step": 311 }, { "epoch": 0.041633306645316254, "grad_norm": 2.2267067402414136, "learning_rate": 1.9992931817581836e-05, "loss": 1.7257, "step": 312 }, { "epoch": 0.04176674673071791, "grad_norm": 2.64734565806814, "learning_rate": 1.999276841649597e-05, "loss": 1.6765, "step": 313 }, { "epoch": 0.04190018681611956, "grad_norm": 2.871860278561938, "learning_rate": 1.9992603148874735e-05, "loss": 1.7917, "step": 314 }, { "epoch": 0.042033626901521216, "grad_norm": 1.680479043931703, "learning_rate": 1.9992436014749002e-05, "loss": 1.7058, "step": 315 }, { "epoch": 0.04216706698692287, "grad_norm": 2.3517294932157986, "learning_rate": 1.9992267014149992e-05, "loss": 1.7037, "step": 316 }, { "epoch": 0.04230050707232453, "grad_norm": 2.3384834771184315, "learning_rate": 1.999209614710927e-05, "loss": 1.7229, "step": 317 }, { "epoch": 0.04243394715772618, "grad_norm": 3.0716993591543162, "learning_rate": 1.9991923413658752e-05, "loss": 1.7066, "step": 318 }, { "epoch": 0.042567387243127836, "grad_norm": 22.39020312724416, "learning_rate": 1.99917488138307e-05, "loss": 1.7471, "step": 319 }, { "epoch": 0.04270082732852949, "grad_norm": 4.513453647912413, "learning_rate": 1.999157234765773e-05, "loss": 1.7847, "step": 320 }, { "epoch": 0.04283426741393114, "grad_norm": 2.8952574314494406, "learning_rate": 1.9991394015172806e-05, "loss": 1.7865, "step": 321 }, { "epoch": 0.0429677074993328, "grad_norm": 2.3072174733434263, "learning_rate": 1.9991213816409235e-05, "loss": 1.6652, "step": 322 }, { "epoch": 0.043101147584734455, "grad_norm": 3.501051033962392, "learning_rate": 1.9991031751400678e-05, "loss": 1.7621, "step": 323 }, { "epoch": 0.04323458767013611, "grad_norm": 2.757581328302007, "learning_rate": 1.9990847820181143e-05, "loss": 1.7351, "step": 324 }, { "epoch": 0.04336802775553776, "grad_norm": 2.208520128826526, "learning_rate": 1.9990662022784984e-05, "loss": 1.7927, "step": 325 }, { "epoch": 0.04350146784093942, "grad_norm": 2.9115362978660366, "learning_rate": 1.9990474359246907e-05, "loss": 1.7272, "step": 326 }, { "epoch": 0.043634907926341074, "grad_norm": 2.2116484952074638, "learning_rate": 1.9990284829601965e-05, "loss": 1.7165, "step": 327 }, { "epoch": 0.04376834801174273, "grad_norm": 2.127585292605601, "learning_rate": 1.999009343388556e-05, "loss": 1.7696, "step": 328 }, { "epoch": 0.04390178809714438, "grad_norm": 2.1126461502594407, "learning_rate": 1.9989900172133446e-05, "loss": 1.7299, "step": 329 }, { "epoch": 0.044035228182546036, "grad_norm": 3.589659926709359, "learning_rate": 1.9989705044381717e-05, "loss": 1.7291, "step": 330 }, { "epoch": 0.04416866826794769, "grad_norm": 2.5882477625445977, "learning_rate": 1.998950805066682e-05, "loss": 1.74, "step": 331 }, { "epoch": 0.04430210835334935, "grad_norm": 2.102193237422039, "learning_rate": 1.998930919102556e-05, "loss": 1.7445, "step": 332 }, { "epoch": 0.044435548438751, "grad_norm": 1.5603391863630198, "learning_rate": 1.9989108465495074e-05, "loss": 1.7013, "step": 333 }, { "epoch": 0.044568988524152656, "grad_norm": 2.0093620838637096, "learning_rate": 1.9988905874112853e-05, "loss": 1.6947, "step": 334 }, { "epoch": 0.04470242860955431, "grad_norm": 2.0641135398132873, "learning_rate": 1.998870141691675e-05, "loss": 1.6937, "step": 335 }, { "epoch": 0.04483586869495596, "grad_norm": 1.9939169534113899, "learning_rate": 1.9988495093944942e-05, "loss": 1.7692, "step": 336 }, { "epoch": 0.04496930878035762, "grad_norm": 1.5839856511709431, "learning_rate": 1.998828690523597e-05, "loss": 1.6663, "step": 337 }, { "epoch": 0.045102748865759275, "grad_norm": 1.7578902574401223, "learning_rate": 1.9988076850828736e-05, "loss": 1.7076, "step": 338 }, { "epoch": 0.04523618895116093, "grad_norm": 1.8132930759338493, "learning_rate": 1.9987864930762456e-05, "loss": 1.673, "step": 339 }, { "epoch": 0.04536962903656258, "grad_norm": 1.4712573812370557, "learning_rate": 1.9987651145076724e-05, "loss": 1.7153, "step": 340 }, { "epoch": 0.04550306912196424, "grad_norm": 1.7206193008312878, "learning_rate": 1.9987435493811477e-05, "loss": 1.7383, "step": 341 }, { "epoch": 0.045636509207365894, "grad_norm": 1.7893043936529118, "learning_rate": 1.998721797700699e-05, "loss": 1.7391, "step": 342 }, { "epoch": 0.04576994929276755, "grad_norm": 1.6899544517809348, "learning_rate": 1.9986998594703887e-05, "loss": 1.7279, "step": 343 }, { "epoch": 0.0459033893781692, "grad_norm": 1.6139144085071184, "learning_rate": 1.9986777346943157e-05, "loss": 1.6811, "step": 344 }, { "epoch": 0.04603682946357086, "grad_norm": 1.7986249828416747, "learning_rate": 1.9986554233766125e-05, "loss": 1.7152, "step": 345 }, { "epoch": 0.04617026954897251, "grad_norm": 1.7071224522178132, "learning_rate": 1.9986329255214457e-05, "loss": 1.6777, "step": 346 }, { "epoch": 0.04630370963437416, "grad_norm": 1.726135209124258, "learning_rate": 1.9986102411330187e-05, "loss": 1.6931, "step": 347 }, { "epoch": 0.04643714971977582, "grad_norm": 1.640238498392757, "learning_rate": 1.9985873702155684e-05, "loss": 1.7056, "step": 348 }, { "epoch": 0.046570589805177476, "grad_norm": 1.4745370494240224, "learning_rate": 1.9985643127733664e-05, "loss": 1.7318, "step": 349 }, { "epoch": 0.04670402989057913, "grad_norm": 1.665538578112985, "learning_rate": 1.99854106881072e-05, "loss": 1.6866, "step": 350 }, { "epoch": 0.04683746997598078, "grad_norm": 1.7265148941792416, "learning_rate": 1.9985176383319706e-05, "loss": 1.7242, "step": 351 }, { "epoch": 0.04697091006138244, "grad_norm": 1.7372398018580704, "learning_rate": 1.998494021341495e-05, "loss": 1.6928, "step": 352 }, { "epoch": 0.047104350146784095, "grad_norm": 1.9848537709359042, "learning_rate": 1.998470217843705e-05, "loss": 1.7095, "step": 353 }, { "epoch": 0.04723779023218575, "grad_norm": 1.6273684847266405, "learning_rate": 1.9984462278430454e-05, "loss": 1.7052, "step": 354 }, { "epoch": 0.0473712303175874, "grad_norm": 2.691629432371725, "learning_rate": 1.9984220513439987e-05, "loss": 1.7216, "step": 355 }, { "epoch": 0.04750467040298906, "grad_norm": 3.0017537794303872, "learning_rate": 1.9983976883510802e-05, "loss": 1.6843, "step": 356 }, { "epoch": 0.047638110488390714, "grad_norm": 1.6968617974676807, "learning_rate": 1.9983731388688405e-05, "loss": 1.7386, "step": 357 }, { "epoch": 0.04777155057379237, "grad_norm": 1.7186160692409527, "learning_rate": 1.998348402901866e-05, "loss": 1.7086, "step": 358 }, { "epoch": 0.04790499065919402, "grad_norm": 1.6381474039682424, "learning_rate": 1.998323480454776e-05, "loss": 1.7258, "step": 359 }, { "epoch": 0.04803843074459568, "grad_norm": 1.5835971815447754, "learning_rate": 1.998298371532226e-05, "loss": 1.7738, "step": 360 }, { "epoch": 0.04817187082999733, "grad_norm": 1.5878715756664898, "learning_rate": 1.9982730761389063e-05, "loss": 1.7094, "step": 361 }, { "epoch": 0.04830531091539898, "grad_norm": 1.519758329640418, "learning_rate": 1.9982475942795418e-05, "loss": 1.7268, "step": 362 }, { "epoch": 0.04843875100080064, "grad_norm": 1.514307211931559, "learning_rate": 1.9982219259588925e-05, "loss": 1.6696, "step": 363 }, { "epoch": 0.048572191086202296, "grad_norm": 2.1724863692721352, "learning_rate": 1.9981960711817524e-05, "loss": 1.6908, "step": 364 }, { "epoch": 0.04870563117160395, "grad_norm": 1.56228704416919, "learning_rate": 1.998170029952951e-05, "loss": 1.7315, "step": 365 }, { "epoch": 0.0488390712570056, "grad_norm": 1.85473949983945, "learning_rate": 1.9981438022773526e-05, "loss": 1.7355, "step": 366 }, { "epoch": 0.04897251134240726, "grad_norm": 11.783330160673703, "learning_rate": 1.9981173881598563e-05, "loss": 1.6998, "step": 367 }, { "epoch": 0.049105951427808915, "grad_norm": 2.0242537102814415, "learning_rate": 1.998090787605396e-05, "loss": 1.6909, "step": 368 }, { "epoch": 0.04923939151321057, "grad_norm": 1.6717583123155606, "learning_rate": 1.99806400061894e-05, "loss": 1.7593, "step": 369 }, { "epoch": 0.04937283159861222, "grad_norm": 1.5943321940814934, "learning_rate": 1.998037027205492e-05, "loss": 1.7278, "step": 370 }, { "epoch": 0.04950627168401388, "grad_norm": 1.8339002788527259, "learning_rate": 1.998009867370091e-05, "loss": 1.7573, "step": 371 }, { "epoch": 0.049639711769415534, "grad_norm": 1.6495978655725259, "learning_rate": 1.997982521117809e-05, "loss": 1.7594, "step": 372 }, { "epoch": 0.049773151854817184, "grad_norm": 1.9817095312631359, "learning_rate": 1.9979549884537545e-05, "loss": 1.699, "step": 373 }, { "epoch": 0.04990659194021884, "grad_norm": 1.803977303801047, "learning_rate": 1.997927269383071e-05, "loss": 1.733, "step": 374 }, { "epoch": 0.0500400320256205, "grad_norm": 1.6078905730093185, "learning_rate": 1.9978993639109344e-05, "loss": 1.741, "step": 375 }, { "epoch": 0.050173472111022153, "grad_norm": 1.5995695562728156, "learning_rate": 1.997871272042559e-05, "loss": 1.7242, "step": 376 }, { "epoch": 0.0503069121964238, "grad_norm": 1.6340423898197851, "learning_rate": 1.9978429937831905e-05, "loss": 1.7679, "step": 377 }, { "epoch": 0.05044035228182546, "grad_norm": 1.6634270572428624, "learning_rate": 1.997814529138112e-05, "loss": 1.7056, "step": 378 }, { "epoch": 0.050573792367227116, "grad_norm": 1.7876625580664276, "learning_rate": 1.99778587811264e-05, "loss": 1.7285, "step": 379 }, { "epoch": 0.05070723245262877, "grad_norm": 1.5691636161666582, "learning_rate": 1.9977570407121258e-05, "loss": 1.7767, "step": 380 }, { "epoch": 0.05084067253803042, "grad_norm": 2.0869743704839894, "learning_rate": 1.9977280169419567e-05, "loss": 1.7575, "step": 381 }, { "epoch": 0.05097411262343208, "grad_norm": 1.5252100639836486, "learning_rate": 1.9976988068075536e-05, "loss": 1.8007, "step": 382 }, { "epoch": 0.051107552708833735, "grad_norm": 1.4730478361850414, "learning_rate": 1.9976694103143725e-05, "loss": 1.7411, "step": 383 }, { "epoch": 0.051240992794235385, "grad_norm": 2.0844587485090043, "learning_rate": 1.9976398274679044e-05, "loss": 1.7389, "step": 384 }, { "epoch": 0.05137443287963704, "grad_norm": 1.6654951502904705, "learning_rate": 1.9976100582736753e-05, "loss": 1.7117, "step": 385 }, { "epoch": 0.0515078729650387, "grad_norm": 1.4065668432108118, "learning_rate": 1.997580102737245e-05, "loss": 1.7272, "step": 386 }, { "epoch": 0.051641313050440354, "grad_norm": 1.6267242440943979, "learning_rate": 1.99754996086421e-05, "loss": 1.7481, "step": 387 }, { "epoch": 0.051774753135842004, "grad_norm": 2.2148633010781773, "learning_rate": 1.9975196326601997e-05, "loss": 1.7006, "step": 388 }, { "epoch": 0.05190819322124366, "grad_norm": 6.191778325835291, "learning_rate": 1.9974891181308788e-05, "loss": 1.7091, "step": 389 }, { "epoch": 0.05204163330664532, "grad_norm": 13.595285838337412, "learning_rate": 1.9974584172819478e-05, "loss": 1.7493, "step": 390 }, { "epoch": 0.052175073392046974, "grad_norm": 2.062696345964478, "learning_rate": 1.997427530119141e-05, "loss": 1.7039, "step": 391 }, { "epoch": 0.05230851347744862, "grad_norm": 2.0114191550940075, "learning_rate": 1.9973964566482276e-05, "loss": 1.7469, "step": 392 }, { "epoch": 0.05244195356285028, "grad_norm": 1.7826740746863319, "learning_rate": 1.997365196875012e-05, "loss": 1.6988, "step": 393 }, { "epoch": 0.052575393648251936, "grad_norm": 1.8065043559103795, "learning_rate": 1.9973337508053328e-05, "loss": 1.6763, "step": 394 }, { "epoch": 0.05270883373365359, "grad_norm": 1.618657676406062, "learning_rate": 1.9973021184450644e-05, "loss": 1.664, "step": 395 }, { "epoch": 0.05284227381905524, "grad_norm": 1.669337246084699, "learning_rate": 1.9972702998001145e-05, "loss": 1.6838, "step": 396 }, { "epoch": 0.0529757139044569, "grad_norm": 1.6318231642923446, "learning_rate": 1.9972382948764274e-05, "loss": 1.7319, "step": 397 }, { "epoch": 0.053109153989858555, "grad_norm": 1.68424778338556, "learning_rate": 1.9972061036799805e-05, "loss": 1.7526, "step": 398 }, { "epoch": 0.053242594075260205, "grad_norm": 10.099486327210181, "learning_rate": 1.9971737262167873e-05, "loss": 1.7284, "step": 399 }, { "epoch": 0.05337603416066186, "grad_norm": 2.4830108610902792, "learning_rate": 1.9971411624928952e-05, "loss": 1.6935, "step": 400 }, { "epoch": 0.05350947424606352, "grad_norm": 2.157249041077982, "learning_rate": 1.997108412514387e-05, "loss": 1.7532, "step": 401 }, { "epoch": 0.053642914331465175, "grad_norm": 17.029332948986212, "learning_rate": 1.9970754762873793e-05, "loss": 1.7565, "step": 402 }, { "epoch": 0.053776354416866824, "grad_norm": 2.2002999089958517, "learning_rate": 1.9970423538180253e-05, "loss": 1.7299, "step": 403 }, { "epoch": 0.05390979450226848, "grad_norm": 1.9526294546905574, "learning_rate": 1.997009045112511e-05, "loss": 1.7624, "step": 404 }, { "epoch": 0.05404323458767014, "grad_norm": 3.451322381710603, "learning_rate": 1.9969755501770588e-05, "loss": 1.7655, "step": 405 }, { "epoch": 0.054176674673071794, "grad_norm": 3.4396767676243116, "learning_rate": 1.9969418690179245e-05, "loss": 1.7311, "step": 406 }, { "epoch": 0.05431011475847344, "grad_norm": 2.047158976072317, "learning_rate": 1.9969080016413998e-05, "loss": 1.823, "step": 407 }, { "epoch": 0.0544435548438751, "grad_norm": 1.7003670198468013, "learning_rate": 1.9968739480538106e-05, "loss": 1.6803, "step": 408 }, { "epoch": 0.054576994929276756, "grad_norm": 1.5275767558995044, "learning_rate": 1.9968397082615177e-05, "loss": 1.6911, "step": 409 }, { "epoch": 0.054710435014678406, "grad_norm": 1.5021865116802673, "learning_rate": 1.9968052822709168e-05, "loss": 1.6727, "step": 410 }, { "epoch": 0.05484387510008006, "grad_norm": 1.6346663207881, "learning_rate": 1.9967706700884383e-05, "loss": 1.7605, "step": 411 }, { "epoch": 0.05497731518548172, "grad_norm": 1.4986173436934926, "learning_rate": 1.9967358717205473e-05, "loss": 1.7654, "step": 412 }, { "epoch": 0.055110755270883376, "grad_norm": 9.343403487653784, "learning_rate": 1.9967008871737435e-05, "loss": 1.7153, "step": 413 }, { "epoch": 0.055244195356285025, "grad_norm": 2.0824830761991167, "learning_rate": 1.996665716454562e-05, "loss": 1.738, "step": 414 }, { "epoch": 0.05537763544168668, "grad_norm": 2.05226788462729, "learning_rate": 1.996630359569572e-05, "loss": 1.6989, "step": 415 }, { "epoch": 0.05551107552708834, "grad_norm": 2.801402642121381, "learning_rate": 1.9965948165253783e-05, "loss": 1.7352, "step": 416 }, { "epoch": 0.055644515612489995, "grad_norm": 1.5863635183628055, "learning_rate": 1.996559087328619e-05, "loss": 1.7482, "step": 417 }, { "epoch": 0.055777955697891644, "grad_norm": 1.5347479681973688, "learning_rate": 1.9965231719859686e-05, "loss": 1.7804, "step": 418 }, { "epoch": 0.0559113957832933, "grad_norm": 5.941069172684277, "learning_rate": 1.9964870705041356e-05, "loss": 1.6768, "step": 419 }, { "epoch": 0.05604483586869496, "grad_norm": 1.9058886708572287, "learning_rate": 1.996450782889863e-05, "loss": 1.8138, "step": 420 }, { "epoch": 0.056178275954096614, "grad_norm": 1.83905231786784, "learning_rate": 1.9964143091499296e-05, "loss": 1.696, "step": 421 }, { "epoch": 0.056311716039498264, "grad_norm": 21.111853586038833, "learning_rate": 1.996377649291148e-05, "loss": 1.8305, "step": 422 }, { "epoch": 0.05644515612489992, "grad_norm": 2.0473455232249957, "learning_rate": 1.9963408033203652e-05, "loss": 1.6751, "step": 423 }, { "epoch": 0.05657859621030158, "grad_norm": 2.3640946700664305, "learning_rate": 1.9963037712444643e-05, "loss": 1.6767, "step": 424 }, { "epoch": 0.056712036295703226, "grad_norm": 2.639814143002119, "learning_rate": 1.9962665530703623e-05, "loss": 1.7202, "step": 425 }, { "epoch": 0.05684547638110488, "grad_norm": 2.035709803937704, "learning_rate": 1.996229148805011e-05, "loss": 1.7528, "step": 426 }, { "epoch": 0.05697891646650654, "grad_norm": 4.8723906405798605, "learning_rate": 1.996191558455397e-05, "loss": 1.7948, "step": 427 }, { "epoch": 0.057112356551908196, "grad_norm": 1.9755270680940058, "learning_rate": 1.9961537820285425e-05, "loss": 1.739, "step": 428 }, { "epoch": 0.057245796637309845, "grad_norm": 1.36715317359475, "learning_rate": 1.996115819531503e-05, "loss": 1.7144, "step": 429 }, { "epoch": 0.0573792367227115, "grad_norm": 2.099518986464087, "learning_rate": 1.9960776709713695e-05, "loss": 1.7191, "step": 430 }, { "epoch": 0.05751267680811316, "grad_norm": 1.5524531892573008, "learning_rate": 1.9960393363552677e-05, "loss": 1.6912, "step": 431 }, { "epoch": 0.057646116893514815, "grad_norm": 1.979055446110802, "learning_rate": 1.9960008156903584e-05, "loss": 1.6863, "step": 432 }, { "epoch": 0.057779556978916465, "grad_norm": 1.700547443693333, "learning_rate": 1.9959621089838363e-05, "loss": 1.6982, "step": 433 }, { "epoch": 0.05791299706431812, "grad_norm": 1.5225244263250537, "learning_rate": 1.995923216242932e-05, "loss": 1.7289, "step": 434 }, { "epoch": 0.05804643714971978, "grad_norm": 1.8075354741575524, "learning_rate": 1.9958841374749102e-05, "loss": 1.7704, "step": 435 }, { "epoch": 0.05817987723512143, "grad_norm": 1.5109107308638536, "learning_rate": 1.9958448726870695e-05, "loss": 1.7511, "step": 436 }, { "epoch": 0.058313317320523084, "grad_norm": 1.722880833771426, "learning_rate": 1.995805421886745e-05, "loss": 1.7224, "step": 437 }, { "epoch": 0.05844675740592474, "grad_norm": 1.507934531634378, "learning_rate": 1.995765785081305e-05, "loss": 1.7202, "step": 438 }, { "epoch": 0.0585801974913264, "grad_norm": 1.4464233282667935, "learning_rate": 1.995725962278154e-05, "loss": 1.6983, "step": 439 }, { "epoch": 0.058713637576728046, "grad_norm": 3.165760428882322, "learning_rate": 1.99568595348473e-05, "loss": 1.6845, "step": 440 }, { "epoch": 0.0588470776621297, "grad_norm": 10.453161040701552, "learning_rate": 1.995645758708506e-05, "loss": 1.7234, "step": 441 }, { "epoch": 0.05898051774753136, "grad_norm": 1.8410106428998834, "learning_rate": 1.9956053779569905e-05, "loss": 1.7281, "step": 442 }, { "epoch": 0.059113957832933016, "grad_norm": 1.442847521907269, "learning_rate": 1.9955648112377254e-05, "loss": 1.7504, "step": 443 }, { "epoch": 0.059247397918334666, "grad_norm": 1.4761935818962095, "learning_rate": 1.9955240585582887e-05, "loss": 1.6651, "step": 444 }, { "epoch": 0.05938083800373632, "grad_norm": 1.4515253586484216, "learning_rate": 1.995483119926292e-05, "loss": 1.7229, "step": 445 }, { "epoch": 0.05951427808913798, "grad_norm": 1.5540039321519692, "learning_rate": 1.9954419953493827e-05, "loss": 1.7037, "step": 446 }, { "epoch": 0.059647718174539635, "grad_norm": 1.5484860948366919, "learning_rate": 1.9954006848352423e-05, "loss": 1.7149, "step": 447 }, { "epoch": 0.059781158259941285, "grad_norm": 1.9290588477973933, "learning_rate": 1.995359188391587e-05, "loss": 1.7327, "step": 448 }, { "epoch": 0.05991459834534294, "grad_norm": 1.5388782815766935, "learning_rate": 1.9953175060261677e-05, "loss": 1.7035, "step": 449 }, { "epoch": 0.0600480384307446, "grad_norm": 1.4965138673427807, "learning_rate": 1.9952756377467706e-05, "loss": 1.7023, "step": 450 }, { "epoch": 0.06018147851614625, "grad_norm": 1.7236071176748005, "learning_rate": 1.995233583561216e-05, "loss": 1.7184, "step": 451 }, { "epoch": 0.060314918601547904, "grad_norm": 1.431141636104607, "learning_rate": 1.9951913434773592e-05, "loss": 1.7301, "step": 452 }, { "epoch": 0.06044835868694956, "grad_norm": 1.6645336690164663, "learning_rate": 1.9951489175030902e-05, "loss": 1.7039, "step": 453 }, { "epoch": 0.06058179877235122, "grad_norm": 2.517722128154126, "learning_rate": 1.995106305646333e-05, "loss": 1.7634, "step": 454 }, { "epoch": 0.060715238857752866, "grad_norm": 1.6794801287390608, "learning_rate": 1.9950635079150483e-05, "loss": 1.7285, "step": 455 }, { "epoch": 0.06084867894315452, "grad_norm": 1.7417705396361953, "learning_rate": 1.995020524317229e-05, "loss": 1.7344, "step": 456 }, { "epoch": 0.06098211902855618, "grad_norm": 1.6141113551164898, "learning_rate": 1.994977354860905e-05, "loss": 1.7246, "step": 457 }, { "epoch": 0.061115559113957836, "grad_norm": 1.468263394815004, "learning_rate": 1.9949339995541393e-05, "loss": 1.7539, "step": 458 }, { "epoch": 0.061248999199359486, "grad_norm": 1.665408795940273, "learning_rate": 1.99489045840503e-05, "loss": 1.6866, "step": 459 }, { "epoch": 0.06138243928476114, "grad_norm": 1.9816359801419392, "learning_rate": 1.9948467314217104e-05, "loss": 1.6644, "step": 460 }, { "epoch": 0.0615158793701628, "grad_norm": 1.5840211934856465, "learning_rate": 1.9948028186123482e-05, "loss": 1.6853, "step": 461 }, { "epoch": 0.06164931945556445, "grad_norm": 1.5472589180288636, "learning_rate": 1.9947587199851454e-05, "loss": 1.7171, "step": 462 }, { "epoch": 0.061782759540966105, "grad_norm": 1.531878299898292, "learning_rate": 1.99471443554834e-05, "loss": 1.7135, "step": 463 }, { "epoch": 0.06191619962636776, "grad_norm": 1.5698651966216552, "learning_rate": 1.9946699653102027e-05, "loss": 1.7386, "step": 464 }, { "epoch": 0.06204963971176942, "grad_norm": 1.3931585707012093, "learning_rate": 1.994625309279041e-05, "loss": 1.7277, "step": 465 }, { "epoch": 0.06218307979717107, "grad_norm": 1.360573940164653, "learning_rate": 1.994580467463196e-05, "loss": 1.7149, "step": 466 }, { "epoch": 0.062316519882572724, "grad_norm": 2.827001353046576, "learning_rate": 1.9945354398710427e-05, "loss": 1.7028, "step": 467 }, { "epoch": 0.06244995996797438, "grad_norm": 1.5514492612805253, "learning_rate": 1.994490226510993e-05, "loss": 1.75, "step": 468 }, { "epoch": 0.06258340005337604, "grad_norm": 1.3811202391730824, "learning_rate": 1.9944448273914917e-05, "loss": 1.6524, "step": 469 }, { "epoch": 0.0627168401387777, "grad_norm": 1.460693904274723, "learning_rate": 1.9943992425210186e-05, "loss": 1.7228, "step": 470 }, { "epoch": 0.06285028022417935, "grad_norm": 1.5719369846193096, "learning_rate": 1.9943534719080885e-05, "loss": 1.6843, "step": 471 }, { "epoch": 0.06298372030958099, "grad_norm": 1.4562562982659377, "learning_rate": 1.9943075155612513e-05, "loss": 1.7289, "step": 472 }, { "epoch": 0.06311716039498265, "grad_norm": 1.426871294088278, "learning_rate": 1.9942613734890908e-05, "loss": 1.7302, "step": 473 }, { "epoch": 0.0632506004803843, "grad_norm": 1.4065664674136416, "learning_rate": 1.9942150457002258e-05, "loss": 1.6727, "step": 474 }, { "epoch": 0.06338404056578596, "grad_norm": 1.4239048483373207, "learning_rate": 1.9941685322033095e-05, "loss": 1.7427, "step": 475 }, { "epoch": 0.06351748065118762, "grad_norm": 1.4154094770828864, "learning_rate": 1.9941218330070305e-05, "loss": 1.7506, "step": 476 }, { "epoch": 0.06365092073658928, "grad_norm": 1.3948618761716254, "learning_rate": 1.994074948120112e-05, "loss": 1.6968, "step": 477 }, { "epoch": 0.06378436082199093, "grad_norm": 1.4269960440031193, "learning_rate": 1.994027877551311e-05, "loss": 1.7036, "step": 478 }, { "epoch": 0.06391780090739257, "grad_norm": 1.561320058873614, "learning_rate": 1.9939806213094196e-05, "loss": 1.7169, "step": 479 }, { "epoch": 0.06405124099279423, "grad_norm": 1.3666224092243797, "learning_rate": 1.9939331794032655e-05, "loss": 1.6997, "step": 480 }, { "epoch": 0.06418468107819589, "grad_norm": 1.5591971808566425, "learning_rate": 1.9938855518417096e-05, "loss": 1.7198, "step": 481 }, { "epoch": 0.06431812116359754, "grad_norm": 1.436666270843483, "learning_rate": 1.9938377386336483e-05, "loss": 1.7764, "step": 482 }, { "epoch": 0.0644515612489992, "grad_norm": 2.5574706627508386, "learning_rate": 1.993789739788013e-05, "loss": 1.7151, "step": 483 }, { "epoch": 0.06458500133440086, "grad_norm": 1.682482407939873, "learning_rate": 1.9937415553137686e-05, "loss": 1.7123, "step": 484 }, { "epoch": 0.06471844141980251, "grad_norm": 1.4561970319846727, "learning_rate": 1.993693185219916e-05, "loss": 1.7404, "step": 485 }, { "epoch": 0.06485188150520416, "grad_norm": 1.503219984664679, "learning_rate": 1.9936446295154902e-05, "loss": 1.7044, "step": 486 }, { "epoch": 0.06498532159060581, "grad_norm": 1.4575867517583754, "learning_rate": 1.9935958882095607e-05, "loss": 1.6829, "step": 487 }, { "epoch": 0.06511876167600747, "grad_norm": 1.5458704457306156, "learning_rate": 1.9935469613112318e-05, "loss": 1.7147, "step": 488 }, { "epoch": 0.06525220176140913, "grad_norm": 1.4324636862239977, "learning_rate": 1.9934978488296423e-05, "loss": 1.699, "step": 489 }, { "epoch": 0.06538564184681078, "grad_norm": 1.609515083336598, "learning_rate": 1.993448550773966e-05, "loss": 1.7071, "step": 490 }, { "epoch": 0.06551908193221244, "grad_norm": 1.63784929721931, "learning_rate": 1.9933990671534116e-05, "loss": 1.7445, "step": 491 }, { "epoch": 0.0656525220176141, "grad_norm": 1.7557428701106073, "learning_rate": 1.9933493979772215e-05, "loss": 1.7638, "step": 492 }, { "epoch": 0.06578596210301575, "grad_norm": 1.6719589515914446, "learning_rate": 1.9932995432546733e-05, "loss": 1.6957, "step": 493 }, { "epoch": 0.0659194021884174, "grad_norm": 1.3831309866724475, "learning_rate": 1.99324950299508e-05, "loss": 1.7108, "step": 494 }, { "epoch": 0.06605284227381905, "grad_norm": 1.6438709703821648, "learning_rate": 1.9931992772077877e-05, "loss": 1.7459, "step": 495 }, { "epoch": 0.06618628235922071, "grad_norm": 1.556799980817237, "learning_rate": 1.993148865902179e-05, "loss": 1.7165, "step": 496 }, { "epoch": 0.06631972244462236, "grad_norm": 1.6005504681612104, "learning_rate": 1.993098269087669e-05, "loss": 1.6964, "step": 497 }, { "epoch": 0.06645316253002402, "grad_norm": 1.6767116156256117, "learning_rate": 1.9930474867737093e-05, "loss": 1.7101, "step": 498 }, { "epoch": 0.06658660261542568, "grad_norm": 1.437357072942104, "learning_rate": 1.9929965189697855e-05, "loss": 1.7503, "step": 499 }, { "epoch": 0.06672004270082733, "grad_norm": 1.2785056943422897, "learning_rate": 1.9929453656854177e-05, "loss": 1.7404, "step": 500 }, { "epoch": 0.06685348278622898, "grad_norm": 1.4081248663083363, "learning_rate": 1.9928940269301607e-05, "loss": 1.6389, "step": 501 }, { "epoch": 0.06698692287163063, "grad_norm": 1.2600630608207999, "learning_rate": 1.9928425027136038e-05, "loss": 1.6954, "step": 502 }, { "epoch": 0.06712036295703229, "grad_norm": 1.323168623645999, "learning_rate": 1.992790793045371e-05, "loss": 1.7456, "step": 503 }, { "epoch": 0.06725380304243395, "grad_norm": 1.337670730789513, "learning_rate": 1.9927388979351222e-05, "loss": 1.7088, "step": 504 }, { "epoch": 0.0673872431278356, "grad_norm": 1.3450109564421233, "learning_rate": 1.9926868173925496e-05, "loss": 1.7313, "step": 505 }, { "epoch": 0.06752068321323726, "grad_norm": 6.8172313878010415, "learning_rate": 1.9926345514273817e-05, "loss": 1.7266, "step": 506 }, { "epoch": 0.06765412329863892, "grad_norm": 1.4477937313346818, "learning_rate": 1.992582100049381e-05, "loss": 1.7154, "step": 507 }, { "epoch": 0.06778756338404057, "grad_norm": 1.4944076158515036, "learning_rate": 1.9925294632683454e-05, "loss": 1.681, "step": 508 }, { "epoch": 0.06792100346944221, "grad_norm": 1.4995222349055868, "learning_rate": 1.992476641094106e-05, "loss": 1.7149, "step": 509 }, { "epoch": 0.06805444355484387, "grad_norm": 1.489364590881723, "learning_rate": 1.99242363353653e-05, "loss": 1.7353, "step": 510 }, { "epoch": 0.06818788364024553, "grad_norm": 1.6809529621977222, "learning_rate": 1.9923704406055185e-05, "loss": 1.7274, "step": 511 }, { "epoch": 0.06832132372564718, "grad_norm": 3.583760089378779, "learning_rate": 1.992317062311007e-05, "loss": 1.7217, "step": 512 }, { "epoch": 0.06845476381104884, "grad_norm": 1.3874294319409661, "learning_rate": 1.9922634986629667e-05, "loss": 1.7285, "step": 513 }, { "epoch": 0.0685882038964505, "grad_norm": 1.3167043130129317, "learning_rate": 1.992209749671402e-05, "loss": 1.7086, "step": 514 }, { "epoch": 0.06872164398185215, "grad_norm": 1.6269952933086236, "learning_rate": 1.9921558153463526e-05, "loss": 1.7474, "step": 515 }, { "epoch": 0.0688550840672538, "grad_norm": 2.1627161830488304, "learning_rate": 1.992101695697893e-05, "loss": 1.7087, "step": 516 }, { "epoch": 0.06898852415265545, "grad_norm": 1.6239043871414123, "learning_rate": 1.9920473907361324e-05, "loss": 1.7406, "step": 517 }, { "epoch": 0.06912196423805711, "grad_norm": 1.3153898428508883, "learning_rate": 1.9919929004712137e-05, "loss": 1.7223, "step": 518 }, { "epoch": 0.06925540432345877, "grad_norm": 1.368634326229657, "learning_rate": 1.9919382249133158e-05, "loss": 1.6969, "step": 519 }, { "epoch": 0.06938884440886042, "grad_norm": 1.6237538448200741, "learning_rate": 1.991883364072651e-05, "loss": 1.724, "step": 520 }, { "epoch": 0.06952228449426208, "grad_norm": 1.3169440768502187, "learning_rate": 1.991828317959467e-05, "loss": 1.7785, "step": 521 }, { "epoch": 0.06965572457966374, "grad_norm": 1.3815810002503197, "learning_rate": 1.9917730865840453e-05, "loss": 1.737, "step": 522 }, { "epoch": 0.06978916466506539, "grad_norm": 19.153199421244327, "learning_rate": 1.991717669956703e-05, "loss": 1.6833, "step": 523 }, { "epoch": 0.06992260475046704, "grad_norm": 1.6039439835379967, "learning_rate": 1.991662068087791e-05, "loss": 1.7322, "step": 524 }, { "epoch": 0.07005604483586869, "grad_norm": 1.6305057820452133, "learning_rate": 1.991606280987695e-05, "loss": 1.7338, "step": 525 }, { "epoch": 0.07018948492127035, "grad_norm": 1.6126054306261002, "learning_rate": 1.9915503086668358e-05, "loss": 1.7227, "step": 526 }, { "epoch": 0.070322925006672, "grad_norm": 1.396267469940587, "learning_rate": 1.991494151135668e-05, "loss": 1.7336, "step": 527 }, { "epoch": 0.07045636509207366, "grad_norm": 1.3519617857019466, "learning_rate": 1.9914378084046814e-05, "loss": 1.685, "step": 528 }, { "epoch": 0.07058980517747532, "grad_norm": 1.369805017409349, "learning_rate": 1.9913812804844003e-05, "loss": 1.6849, "step": 529 }, { "epoch": 0.07072324526287697, "grad_norm": 1.3456126243717488, "learning_rate": 1.9913245673853833e-05, "loss": 1.6779, "step": 530 }, { "epoch": 0.07085668534827862, "grad_norm": 1.4115469932734985, "learning_rate": 1.9912676691182237e-05, "loss": 1.6661, "step": 531 }, { "epoch": 0.07099012543368027, "grad_norm": 1.452789685185474, "learning_rate": 1.9912105856935497e-05, "loss": 1.722, "step": 532 }, { "epoch": 0.07112356551908193, "grad_norm": 1.4137246122718905, "learning_rate": 1.9911533171220234e-05, "loss": 1.6956, "step": 533 }, { "epoch": 0.07125700560448359, "grad_norm": 1.657715031131543, "learning_rate": 1.991095863414342e-05, "loss": 1.7233, "step": 534 }, { "epoch": 0.07139044568988524, "grad_norm": 1.5491300325617336, "learning_rate": 1.9910382245812376e-05, "loss": 1.7357, "step": 535 }, { "epoch": 0.0715238857752869, "grad_norm": 1.3876706174173905, "learning_rate": 1.990980400633476e-05, "loss": 1.6843, "step": 536 }, { "epoch": 0.07165732586068856, "grad_norm": 1.5151426749768526, "learning_rate": 1.9909223915818584e-05, "loss": 1.7651, "step": 537 }, { "epoch": 0.0717907659460902, "grad_norm": 1.3575479856663701, "learning_rate": 1.99086419743722e-05, "loss": 1.6813, "step": 538 }, { "epoch": 0.07192420603149186, "grad_norm": 1.2693638194156773, "learning_rate": 1.9908058182104317e-05, "loss": 1.7248, "step": 539 }, { "epoch": 0.07205764611689351, "grad_norm": 1.6457462336156898, "learning_rate": 1.9907472539123968e-05, "loss": 1.752, "step": 540 }, { "epoch": 0.07219108620229517, "grad_norm": 1.678698655916284, "learning_rate": 1.9906885045540547e-05, "loss": 1.7071, "step": 541 }, { "epoch": 0.07232452628769682, "grad_norm": 1.650324255621724, "learning_rate": 1.99062957014638e-05, "loss": 1.7447, "step": 542 }, { "epoch": 0.07245796637309848, "grad_norm": 1.417893991120254, "learning_rate": 1.9905704507003794e-05, "loss": 1.6948, "step": 543 }, { "epoch": 0.07259140645850014, "grad_norm": 1.4999406881312796, "learning_rate": 1.9905111462270976e-05, "loss": 1.648, "step": 544 }, { "epoch": 0.0727248465439018, "grad_norm": 1.4453828984993826, "learning_rate": 1.9904516567376105e-05, "loss": 1.7441, "step": 545 }, { "epoch": 0.07285828662930344, "grad_norm": 1.466543506749187, "learning_rate": 1.990391982243031e-05, "loss": 1.736, "step": 546 }, { "epoch": 0.0729917267147051, "grad_norm": 1.2757799214271062, "learning_rate": 1.990332122754505e-05, "loss": 1.7273, "step": 547 }, { "epoch": 0.07312516680010675, "grad_norm": 1.786649049168531, "learning_rate": 1.9902720782832136e-05, "loss": 1.7035, "step": 548 }, { "epoch": 0.0732586068855084, "grad_norm": 1.6705192239448357, "learning_rate": 1.990211848840373e-05, "loss": 1.7395, "step": 549 }, { "epoch": 0.07339204697091006, "grad_norm": 1.4329159010944417, "learning_rate": 1.990151434437233e-05, "loss": 1.7018, "step": 550 }, { "epoch": 0.07352548705631172, "grad_norm": 1.3424218662116187, "learning_rate": 1.9900908350850784e-05, "loss": 1.6933, "step": 551 }, { "epoch": 0.07365892714171338, "grad_norm": 1.4293148650020853, "learning_rate": 1.9900300507952282e-05, "loss": 1.7374, "step": 552 }, { "epoch": 0.07379236722711502, "grad_norm": 1.3488966110143432, "learning_rate": 1.9899690815790365e-05, "loss": 1.72, "step": 553 }, { "epoch": 0.07392580731251668, "grad_norm": 1.2512462922247582, "learning_rate": 1.9899079274478916e-05, "loss": 1.6908, "step": 554 }, { "epoch": 0.07405924739791833, "grad_norm": 1.4908815449799042, "learning_rate": 1.9898465884132164e-05, "loss": 1.6776, "step": 555 }, { "epoch": 0.07419268748331999, "grad_norm": 1.340382784697227, "learning_rate": 1.9897850644864683e-05, "loss": 1.7624, "step": 556 }, { "epoch": 0.07432612756872164, "grad_norm": 1.5972162605012559, "learning_rate": 1.9897233556791392e-05, "loss": 1.7383, "step": 557 }, { "epoch": 0.0744595676541233, "grad_norm": 1.5896167899937395, "learning_rate": 1.989661462002756e-05, "loss": 1.7443, "step": 558 }, { "epoch": 0.07459300773952496, "grad_norm": 1.257607085158065, "learning_rate": 1.9895993834688792e-05, "loss": 1.6755, "step": 559 }, { "epoch": 0.07472644782492661, "grad_norm": 1.3197820287481992, "learning_rate": 1.9895371200891045e-05, "loss": 1.7056, "step": 560 }, { "epoch": 0.07485988791032826, "grad_norm": 1.2908766480278888, "learning_rate": 1.989474671875062e-05, "loss": 1.6912, "step": 561 }, { "epoch": 0.07499332799572991, "grad_norm": 1.3304422687884063, "learning_rate": 1.9894120388384167e-05, "loss": 1.6982, "step": 562 }, { "epoch": 0.07512676808113157, "grad_norm": 1.270386568576679, "learning_rate": 1.9893492209908673e-05, "loss": 1.6924, "step": 563 }, { "epoch": 0.07526020816653323, "grad_norm": 1.3181304828307967, "learning_rate": 1.9892862183441475e-05, "loss": 1.7527, "step": 564 }, { "epoch": 0.07539364825193488, "grad_norm": 1.2811143921204557, "learning_rate": 1.9892230309100257e-05, "loss": 1.6951, "step": 565 }, { "epoch": 0.07552708833733654, "grad_norm": 1.5238424880045898, "learning_rate": 1.989159658700304e-05, "loss": 1.7398, "step": 566 }, { "epoch": 0.0756605284227382, "grad_norm": 1.4366895400044768, "learning_rate": 1.9890961017268206e-05, "loss": 1.7202, "step": 567 }, { "epoch": 0.07579396850813984, "grad_norm": 1.3526495950339494, "learning_rate": 1.9890323600014465e-05, "loss": 1.732, "step": 568 }, { "epoch": 0.0759274085935415, "grad_norm": 1.8603030771660538, "learning_rate": 1.9889684335360883e-05, "loss": 1.7233, "step": 569 }, { "epoch": 0.07606084867894315, "grad_norm": 1.457863107278941, "learning_rate": 1.9889043223426864e-05, "loss": 1.6965, "step": 570 }, { "epoch": 0.07619428876434481, "grad_norm": 1.391003183808799, "learning_rate": 1.988840026433216e-05, "loss": 1.6628, "step": 571 }, { "epoch": 0.07632772884974647, "grad_norm": 1.5192166538443774, "learning_rate": 1.988775545819687e-05, "loss": 1.7319, "step": 572 }, { "epoch": 0.07646116893514812, "grad_norm": 1.349717213694597, "learning_rate": 1.988710880514144e-05, "loss": 1.7236, "step": 573 }, { "epoch": 0.07659460902054978, "grad_norm": 1.4799233730457397, "learning_rate": 1.9886460305286653e-05, "loss": 1.6907, "step": 574 }, { "epoch": 0.07672804910595143, "grad_norm": 1.3506939293859863, "learning_rate": 1.988580995875364e-05, "loss": 1.7699, "step": 575 }, { "epoch": 0.07686148919135308, "grad_norm": 1.407704376614465, "learning_rate": 1.9885157765663882e-05, "loss": 1.6905, "step": 576 }, { "epoch": 0.07699492927675473, "grad_norm": 1.3193501358588269, "learning_rate": 1.98845037261392e-05, "loss": 1.6843, "step": 577 }, { "epoch": 0.07712836936215639, "grad_norm": 1.4235546422730225, "learning_rate": 1.9883847840301766e-05, "loss": 1.6729, "step": 578 }, { "epoch": 0.07726180944755805, "grad_norm": 1.416179755704021, "learning_rate": 1.9883190108274083e-05, "loss": 1.6728, "step": 579 }, { "epoch": 0.0773952495329597, "grad_norm": 1.4443285178109353, "learning_rate": 1.9882530530179013e-05, "loss": 1.7048, "step": 580 }, { "epoch": 0.07752868961836136, "grad_norm": 1.5186108478360427, "learning_rate": 1.9881869106139756e-05, "loss": 1.7514, "step": 581 }, { "epoch": 0.07766212970376302, "grad_norm": 1.5545405165342625, "learning_rate": 1.988120583627986e-05, "loss": 1.7108, "step": 582 }, { "epoch": 0.07779556978916466, "grad_norm": 1.3900902877777435, "learning_rate": 1.9880540720723214e-05, "loss": 1.7202, "step": 583 }, { "epoch": 0.07792900987456632, "grad_norm": 1.5816584989791276, "learning_rate": 1.9879873759594057e-05, "loss": 1.6767, "step": 584 }, { "epoch": 0.07806244995996797, "grad_norm": 1.6976660554047953, "learning_rate": 1.9879204953016968e-05, "loss": 1.7432, "step": 585 }, { "epoch": 0.07819589004536963, "grad_norm": 1.3644155406532983, "learning_rate": 1.987853430111687e-05, "loss": 1.7208, "step": 586 }, { "epoch": 0.07832933013077129, "grad_norm": 1.7167546825301851, "learning_rate": 1.987786180401904e-05, "loss": 1.7147, "step": 587 }, { "epoch": 0.07846277021617294, "grad_norm": 1.3847591281625475, "learning_rate": 1.9877187461849086e-05, "loss": 1.7112, "step": 588 }, { "epoch": 0.0785962103015746, "grad_norm": 1.5571722725576893, "learning_rate": 1.9876511274732973e-05, "loss": 1.7638, "step": 589 }, { "epoch": 0.07872965038697624, "grad_norm": 1.394231398970201, "learning_rate": 1.9875833242797e-05, "loss": 1.7043, "step": 590 }, { "epoch": 0.0788630904723779, "grad_norm": 1.3846937701988455, "learning_rate": 1.987515336616782e-05, "loss": 1.7134, "step": 591 }, { "epoch": 0.07899653055777955, "grad_norm": 1.3032703638813365, "learning_rate": 1.9874471644972423e-05, "loss": 1.7126, "step": 592 }, { "epoch": 0.07912997064318121, "grad_norm": 1.4886448086335287, "learning_rate": 1.9873788079338147e-05, "loss": 1.7153, "step": 593 }, { "epoch": 0.07926341072858287, "grad_norm": 1.611970344713762, "learning_rate": 1.9873102669392676e-05, "loss": 1.7026, "step": 594 }, { "epoch": 0.07939685081398452, "grad_norm": 1.341383594822625, "learning_rate": 1.9872415415264036e-05, "loss": 1.6898, "step": 595 }, { "epoch": 0.07953029089938618, "grad_norm": 1.3360746005534239, "learning_rate": 1.9871726317080596e-05, "loss": 1.6745, "step": 596 }, { "epoch": 0.07966373098478784, "grad_norm": 8.410282418991848, "learning_rate": 1.9871035374971076e-05, "loss": 1.7583, "step": 597 }, { "epoch": 0.07979717107018948, "grad_norm": 1.6794637542120652, "learning_rate": 1.9870342589064533e-05, "loss": 1.6641, "step": 598 }, { "epoch": 0.07993061115559114, "grad_norm": 1.3453314375845957, "learning_rate": 1.9869647959490373e-05, "loss": 1.6974, "step": 599 }, { "epoch": 0.08006405124099279, "grad_norm": 1.6191652794215983, "learning_rate": 1.9868951486378344e-05, "loss": 1.7029, "step": 600 }, { "epoch": 0.08019749132639445, "grad_norm": 7.014227307878178, "learning_rate": 1.986825316985854e-05, "loss": 1.6979, "step": 601 }, { "epoch": 0.0803309314117961, "grad_norm": 1.666603092412851, "learning_rate": 1.9867553010061397e-05, "loss": 1.7212, "step": 602 }, { "epoch": 0.08046437149719776, "grad_norm": 6.777435475671393, "learning_rate": 1.98668510071177e-05, "loss": 1.6779, "step": 603 }, { "epoch": 0.08059781158259942, "grad_norm": 9.166357022667544, "learning_rate": 1.9866147161158574e-05, "loss": 1.7199, "step": 604 }, { "epoch": 0.08073125166800106, "grad_norm": 1.8917080711861438, "learning_rate": 1.9865441472315482e-05, "loss": 1.6838, "step": 605 }, { "epoch": 0.08086469175340272, "grad_norm": 1.5977719491010742, "learning_rate": 1.986473394072025e-05, "loss": 1.7179, "step": 606 }, { "epoch": 0.08099813183880437, "grad_norm": 1.658856522654762, "learning_rate": 1.9864024566505034e-05, "loss": 1.6835, "step": 607 }, { "epoch": 0.08113157192420603, "grad_norm": 1.499498737885197, "learning_rate": 1.9863313349802333e-05, "loss": 1.7349, "step": 608 }, { "epoch": 0.08126501200960769, "grad_norm": 13.15546017312421, "learning_rate": 1.9862600290744996e-05, "loss": 1.7886, "step": 609 }, { "epoch": 0.08139845209500934, "grad_norm": 1.9828316081894657, "learning_rate": 1.9861885389466216e-05, "loss": 1.731, "step": 610 }, { "epoch": 0.081531892180411, "grad_norm": 1.7448728734068772, "learning_rate": 1.9861168646099525e-05, "loss": 1.6969, "step": 611 }, { "epoch": 0.08166533226581266, "grad_norm": 1.6273856601272179, "learning_rate": 1.9860450060778806e-05, "loss": 1.7009, "step": 612 }, { "epoch": 0.0817987723512143, "grad_norm": 1.624049525128813, "learning_rate": 1.9859729633638278e-05, "loss": 1.762, "step": 613 }, { "epoch": 0.08193221243661596, "grad_norm": 2.6801910069846797, "learning_rate": 1.9859007364812516e-05, "loss": 1.64, "step": 614 }, { "epoch": 0.08206565252201761, "grad_norm": 1.905005831673537, "learning_rate": 1.9858283254436425e-05, "loss": 1.6968, "step": 615 }, { "epoch": 0.08219909260741927, "grad_norm": 1.7290318164845841, "learning_rate": 1.9857557302645265e-05, "loss": 1.669, "step": 616 }, { "epoch": 0.08233253269282093, "grad_norm": 1.7447056074174758, "learning_rate": 1.985682950957463e-05, "loss": 1.6803, "step": 617 }, { "epoch": 0.08246597277822258, "grad_norm": 1.7209313384190483, "learning_rate": 1.9856099875360472e-05, "loss": 1.7356, "step": 618 }, { "epoch": 0.08259941286362424, "grad_norm": 1.6574968534579264, "learning_rate": 1.9855368400139068e-05, "loss": 1.714, "step": 619 }, { "epoch": 0.08273285294902588, "grad_norm": 2.0519868971563344, "learning_rate": 1.9854635084047055e-05, "loss": 1.6875, "step": 620 }, { "epoch": 0.08286629303442754, "grad_norm": 1.5958780040536966, "learning_rate": 1.9853899927221408e-05, "loss": 1.7272, "step": 621 }, { "epoch": 0.0829997331198292, "grad_norm": 1.596271866444752, "learning_rate": 1.9853162929799445e-05, "loss": 1.6827, "step": 622 }, { "epoch": 0.08313317320523085, "grad_norm": 1.7231315613385811, "learning_rate": 1.9852424091918832e-05, "loss": 1.7274, "step": 623 }, { "epoch": 0.08326661329063251, "grad_norm": 1.4156414113003977, "learning_rate": 1.985168341371757e-05, "loss": 1.7028, "step": 624 }, { "epoch": 0.08340005337603416, "grad_norm": 1.6053274294337398, "learning_rate": 1.9850940895334014e-05, "loss": 1.7042, "step": 625 }, { "epoch": 0.08353349346143582, "grad_norm": 1.4585415436887006, "learning_rate": 1.9850196536906857e-05, "loss": 1.7034, "step": 626 }, { "epoch": 0.08366693354683746, "grad_norm": 1.7610381200893763, "learning_rate": 1.9849450338575132e-05, "loss": 1.6512, "step": 627 }, { "epoch": 0.08380037363223912, "grad_norm": 1.3605669698961698, "learning_rate": 1.9848702300478227e-05, "loss": 1.7041, "step": 628 }, { "epoch": 0.08393381371764078, "grad_norm": 1.3831476932394524, "learning_rate": 1.984795242275586e-05, "loss": 1.6914, "step": 629 }, { "epoch": 0.08406725380304243, "grad_norm": 1.3145903110830626, "learning_rate": 1.9847200705548106e-05, "loss": 1.7033, "step": 630 }, { "epoch": 0.08420069388844409, "grad_norm": 1.509116622712505, "learning_rate": 1.9846447148995374e-05, "loss": 1.7358, "step": 631 }, { "epoch": 0.08433413397384575, "grad_norm": 1.3781652631062695, "learning_rate": 1.984569175323842e-05, "loss": 1.6917, "step": 632 }, { "epoch": 0.0844675740592474, "grad_norm": 1.264221299881922, "learning_rate": 1.984493451841835e-05, "loss": 1.6757, "step": 633 }, { "epoch": 0.08460101414464906, "grad_norm": 1.4764352016979319, "learning_rate": 1.9844175444676594e-05, "loss": 1.6922, "step": 634 }, { "epoch": 0.0847344542300507, "grad_norm": 1.2723999017947936, "learning_rate": 1.9843414532154946e-05, "loss": 1.6704, "step": 635 }, { "epoch": 0.08486789431545236, "grad_norm": 1.3038264727029132, "learning_rate": 1.9842651780995536e-05, "loss": 1.7411, "step": 636 }, { "epoch": 0.08500133440085401, "grad_norm": 1.2103071746870917, "learning_rate": 1.9841887191340835e-05, "loss": 1.7046, "step": 637 }, { "epoch": 0.08513477448625567, "grad_norm": 1.4950179854172028, "learning_rate": 1.9841120763333665e-05, "loss": 1.6348, "step": 638 }, { "epoch": 0.08526821457165733, "grad_norm": 1.3121278424473823, "learning_rate": 1.9840352497117178e-05, "loss": 1.6354, "step": 639 }, { "epoch": 0.08540165465705898, "grad_norm": 1.2723164770543802, "learning_rate": 1.9839582392834883e-05, "loss": 1.7162, "step": 640 }, { "epoch": 0.08553509474246064, "grad_norm": 1.2860827518610605, "learning_rate": 1.983881045063062e-05, "loss": 1.7034, "step": 641 }, { "epoch": 0.08566853482786228, "grad_norm": 1.2951379952738893, "learning_rate": 1.983803667064859e-05, "loss": 1.6595, "step": 642 }, { "epoch": 0.08580197491326394, "grad_norm": 1.3206243247305416, "learning_rate": 1.9837261053033316e-05, "loss": 1.6181, "step": 643 }, { "epoch": 0.0859354149986656, "grad_norm": 1.2669833972811808, "learning_rate": 1.983648359792968e-05, "loss": 1.7101, "step": 644 }, { "epoch": 0.08606885508406725, "grad_norm": 1.29975526714487, "learning_rate": 1.9835704305482905e-05, "loss": 1.7718, "step": 645 }, { "epoch": 0.08620229516946891, "grad_norm": 1.292410669128173, "learning_rate": 1.9834923175838543e-05, "loss": 1.6673, "step": 646 }, { "epoch": 0.08633573525487057, "grad_norm": 1.4355885884004973, "learning_rate": 1.9834140209142507e-05, "loss": 1.7111, "step": 647 }, { "epoch": 0.08646917534027222, "grad_norm": 1.3556914239382971, "learning_rate": 1.983335540554105e-05, "loss": 1.7227, "step": 648 }, { "epoch": 0.08660261542567388, "grad_norm": 1.3219454699436908, "learning_rate": 1.9832568765180758e-05, "loss": 1.6788, "step": 649 }, { "epoch": 0.08673605551107552, "grad_norm": 1.3046640649028514, "learning_rate": 1.9831780288208565e-05, "loss": 1.7202, "step": 650 }, { "epoch": 0.08686949559647718, "grad_norm": 1.2956980030658956, "learning_rate": 1.983098997477176e-05, "loss": 1.6821, "step": 651 }, { "epoch": 0.08700293568187883, "grad_norm": 1.4884991397141505, "learning_rate": 1.9830197825017955e-05, "loss": 1.6731, "step": 652 }, { "epoch": 0.08713637576728049, "grad_norm": 1.2808830107084233, "learning_rate": 1.9829403839095115e-05, "loss": 1.6643, "step": 653 }, { "epoch": 0.08726981585268215, "grad_norm": 1.3169596863329627, "learning_rate": 1.9828608017151554e-05, "loss": 1.6755, "step": 654 }, { "epoch": 0.0874032559380838, "grad_norm": 1.4063600492133352, "learning_rate": 1.9827810359335914e-05, "loss": 1.6977, "step": 655 }, { "epoch": 0.08753669602348546, "grad_norm": 2.4592773035398823, "learning_rate": 1.9827010865797196e-05, "loss": 1.6971, "step": 656 }, { "epoch": 0.0876701361088871, "grad_norm": 1.5217703143835495, "learning_rate": 1.9826209536684732e-05, "loss": 1.747, "step": 657 }, { "epoch": 0.08780357619428876, "grad_norm": 1.2927816468462434, "learning_rate": 1.98254063721482e-05, "loss": 1.671, "step": 658 }, { "epoch": 0.08793701627969042, "grad_norm": 1.4773678075247743, "learning_rate": 1.9824601372337628e-05, "loss": 1.7076, "step": 659 }, { "epoch": 0.08807045636509207, "grad_norm": 1.4581860403342026, "learning_rate": 1.9823794537403372e-05, "loss": 1.7272, "step": 660 }, { "epoch": 0.08820389645049373, "grad_norm": 1.3752337806151618, "learning_rate": 1.982298586749615e-05, "loss": 1.6682, "step": 661 }, { "epoch": 0.08833733653589539, "grad_norm": 1.353951137249054, "learning_rate": 1.9822175362767006e-05, "loss": 1.6528, "step": 662 }, { "epoch": 0.08847077662129704, "grad_norm": 1.3149849437010714, "learning_rate": 1.9821363023367327e-05, "loss": 1.7095, "step": 663 }, { "epoch": 0.0886042167066987, "grad_norm": 1.3521374496959613, "learning_rate": 1.982054884944886e-05, "loss": 1.7091, "step": 664 }, { "epoch": 0.08873765679210034, "grad_norm": 1.2635125106435583, "learning_rate": 1.9819732841163683e-05, "loss": 1.6917, "step": 665 }, { "epoch": 0.088871096877502, "grad_norm": 1.4576045027521718, "learning_rate": 1.981891499866421e-05, "loss": 1.7307, "step": 666 }, { "epoch": 0.08900453696290365, "grad_norm": 2.0342924247731333, "learning_rate": 1.9818095322103207e-05, "loss": 1.7208, "step": 667 }, { "epoch": 0.08913797704830531, "grad_norm": 1.3227265586786, "learning_rate": 1.981727381163378e-05, "loss": 1.7141, "step": 668 }, { "epoch": 0.08927141713370697, "grad_norm": 1.3798866135669063, "learning_rate": 1.981645046740938e-05, "loss": 1.7157, "step": 669 }, { "epoch": 0.08940485721910862, "grad_norm": 1.399678731841314, "learning_rate": 1.9815625289583802e-05, "loss": 1.7082, "step": 670 }, { "epoch": 0.08953829730451028, "grad_norm": 1.4295883763194557, "learning_rate": 1.981479827831117e-05, "loss": 1.7054, "step": 671 }, { "epoch": 0.08967173738991192, "grad_norm": 1.2635430082079968, "learning_rate": 1.9813969433745964e-05, "loss": 1.7389, "step": 672 }, { "epoch": 0.08980517747531358, "grad_norm": 1.2907473612580835, "learning_rate": 1.981313875604301e-05, "loss": 1.7277, "step": 673 }, { "epoch": 0.08993861756071524, "grad_norm": 1.237003971445107, "learning_rate": 1.981230624535746e-05, "loss": 1.7385, "step": 674 }, { "epoch": 0.0900720576461169, "grad_norm": 1.3068726757473064, "learning_rate": 1.9811471901844818e-05, "loss": 1.7012, "step": 675 }, { "epoch": 0.09020549773151855, "grad_norm": 1.3009066481680596, "learning_rate": 1.9810635725660934e-05, "loss": 1.7133, "step": 676 }, { "epoch": 0.0903389378169202, "grad_norm": 1.4485758632883985, "learning_rate": 1.9809797716961995e-05, "loss": 1.704, "step": 677 }, { "epoch": 0.09047237790232186, "grad_norm": 1.2649229686508479, "learning_rate": 1.980895787590453e-05, "loss": 1.7537, "step": 678 }, { "epoch": 0.0906058179877235, "grad_norm": 1.2115856539500358, "learning_rate": 1.9808116202645414e-05, "loss": 1.7245, "step": 679 }, { "epoch": 0.09073925807312516, "grad_norm": 1.8074140919241495, "learning_rate": 1.9807272697341862e-05, "loss": 1.7238, "step": 680 }, { "epoch": 0.09087269815852682, "grad_norm": 1.3795215862127743, "learning_rate": 1.980642736015143e-05, "loss": 1.7037, "step": 681 }, { "epoch": 0.09100613824392847, "grad_norm": 1.2851711329581033, "learning_rate": 1.9805580191232017e-05, "loss": 1.6901, "step": 682 }, { "epoch": 0.09113957832933013, "grad_norm": 1.2298242189849842, "learning_rate": 1.9804731190741867e-05, "loss": 1.6914, "step": 683 }, { "epoch": 0.09127301841473179, "grad_norm": 1.2359913396040505, "learning_rate": 1.980388035883956e-05, "loss": 1.6764, "step": 684 }, { "epoch": 0.09140645850013344, "grad_norm": 1.4724399232032126, "learning_rate": 1.980302769568402e-05, "loss": 1.6784, "step": 685 }, { "epoch": 0.0915398985855351, "grad_norm": 1.4540529296606115, "learning_rate": 1.9802173201434522e-05, "loss": 1.726, "step": 686 }, { "epoch": 0.09167333867093674, "grad_norm": 1.2409303185307805, "learning_rate": 1.980131687625067e-05, "loss": 1.7201, "step": 687 }, { "epoch": 0.0918067787563384, "grad_norm": 1.2194142955503393, "learning_rate": 1.980045872029242e-05, "loss": 1.7108, "step": 688 }, { "epoch": 0.09194021884174006, "grad_norm": 1.39809489648676, "learning_rate": 1.979959873372006e-05, "loss": 1.6765, "step": 689 }, { "epoch": 0.09207365892714171, "grad_norm": 1.253703018642756, "learning_rate": 1.9798736916694234e-05, "loss": 1.7405, "step": 690 }, { "epoch": 0.09220709901254337, "grad_norm": 1.4561652380463412, "learning_rate": 1.979787326937591e-05, "loss": 1.7281, "step": 691 }, { "epoch": 0.09234053909794503, "grad_norm": 1.2475596641603122, "learning_rate": 1.9797007791926416e-05, "loss": 1.7424, "step": 692 }, { "epoch": 0.09247397918334668, "grad_norm": 1.2389551154520175, "learning_rate": 1.9796140484507407e-05, "loss": 1.64, "step": 693 }, { "epoch": 0.09260741926874833, "grad_norm": 1.2409009102177742, "learning_rate": 1.979527134728089e-05, "loss": 1.7022, "step": 694 }, { "epoch": 0.09274085935414998, "grad_norm": 3.070113056380779, "learning_rate": 1.979440038040921e-05, "loss": 1.7089, "step": 695 }, { "epoch": 0.09287429943955164, "grad_norm": 1.2847037546220312, "learning_rate": 1.9793527584055048e-05, "loss": 1.6702, "step": 696 }, { "epoch": 0.0930077395249533, "grad_norm": 1.342430254516785, "learning_rate": 1.9792652958381442e-05, "loss": 1.6981, "step": 697 }, { "epoch": 0.09314117961035495, "grad_norm": 1.285114160550707, "learning_rate": 1.9791776503551753e-05, "loss": 1.7194, "step": 698 }, { "epoch": 0.09327461969575661, "grad_norm": 1.384740422514496, "learning_rate": 1.97908982197297e-05, "loss": 1.6947, "step": 699 }, { "epoch": 0.09340805978115826, "grad_norm": 1.317742692774045, "learning_rate": 1.9790018107079328e-05, "loss": 1.7059, "step": 700 }, { "epoch": 0.09354149986655992, "grad_norm": 1.3948823280878986, "learning_rate": 1.978913616576504e-05, "loss": 1.6838, "step": 701 }, { "epoch": 0.09367493995196156, "grad_norm": 1.5694970220793347, "learning_rate": 1.9788252395951572e-05, "loss": 1.7468, "step": 702 }, { "epoch": 0.09380838003736322, "grad_norm": 1.3817257421047109, "learning_rate": 1.9787366797804e-05, "loss": 1.7176, "step": 703 }, { "epoch": 0.09394182012276488, "grad_norm": 1.6513313977834363, "learning_rate": 1.978647937148774e-05, "loss": 1.6944, "step": 704 }, { "epoch": 0.09407526020816653, "grad_norm": 1.405768726367186, "learning_rate": 1.9785590117168558e-05, "loss": 1.6983, "step": 705 }, { "epoch": 0.09420870029356819, "grad_norm": 1.3377994406213816, "learning_rate": 1.9784699035012552e-05, "loss": 1.668, "step": 706 }, { "epoch": 0.09434214037896985, "grad_norm": 1.4374634387262166, "learning_rate": 1.9783806125186176e-05, "loss": 1.725, "step": 707 }, { "epoch": 0.0944755804643715, "grad_norm": 1.2800462634526275, "learning_rate": 1.9782911387856204e-05, "loss": 1.6953, "step": 708 }, { "epoch": 0.09460902054977315, "grad_norm": 1.364118854409637, "learning_rate": 1.978201482318977e-05, "loss": 1.7155, "step": 709 }, { "epoch": 0.0947424606351748, "grad_norm": 1.2401749198659822, "learning_rate": 1.9781116431354337e-05, "loss": 1.7057, "step": 710 }, { "epoch": 0.09487590072057646, "grad_norm": 1.8122777264311412, "learning_rate": 1.9780216212517718e-05, "loss": 1.7083, "step": 711 }, { "epoch": 0.09500934080597812, "grad_norm": 1.3875021914221024, "learning_rate": 1.9779314166848063e-05, "loss": 1.6533, "step": 712 }, { "epoch": 0.09514278089137977, "grad_norm": 1.2772834218394613, "learning_rate": 1.9778410294513865e-05, "loss": 1.6915, "step": 713 }, { "epoch": 0.09527622097678143, "grad_norm": 1.4645542214685092, "learning_rate": 1.977750459568395e-05, "loss": 1.7061, "step": 714 }, { "epoch": 0.09540966106218308, "grad_norm": 1.38108304312693, "learning_rate": 1.9776597070527502e-05, "loss": 1.721, "step": 715 }, { "epoch": 0.09554310114758474, "grad_norm": 1.382167340023687, "learning_rate": 1.977568771921403e-05, "loss": 1.7469, "step": 716 }, { "epoch": 0.09567654123298638, "grad_norm": 1.4198404214818579, "learning_rate": 1.9774776541913394e-05, "loss": 1.6709, "step": 717 }, { "epoch": 0.09580998131838804, "grad_norm": 1.3599396711566136, "learning_rate": 1.9773863538795787e-05, "loss": 1.7425, "step": 718 }, { "epoch": 0.0959434214037897, "grad_norm": 1.2499141359082184, "learning_rate": 1.9772948710031754e-05, "loss": 1.6758, "step": 719 }, { "epoch": 0.09607686148919135, "grad_norm": 1.3452952575533974, "learning_rate": 1.977203205579217e-05, "loss": 1.6976, "step": 720 }, { "epoch": 0.09621030157459301, "grad_norm": 1.5618388812813502, "learning_rate": 1.9771113576248257e-05, "loss": 1.7143, "step": 721 }, { "epoch": 0.09634374165999467, "grad_norm": 1.4099658140052458, "learning_rate": 1.9770193271571573e-05, "loss": 1.704, "step": 722 }, { "epoch": 0.09647718174539632, "grad_norm": 1.4101573255249529, "learning_rate": 1.976927114193403e-05, "loss": 1.7099, "step": 723 }, { "epoch": 0.09661062183079797, "grad_norm": 2.4533761018966898, "learning_rate": 1.976834718750786e-05, "loss": 1.7659, "step": 724 }, { "epoch": 0.09674406191619962, "grad_norm": 1.450517529596779, "learning_rate": 1.9767421408465654e-05, "loss": 1.7009, "step": 725 }, { "epoch": 0.09687750200160128, "grad_norm": 1.268810752392761, "learning_rate": 1.9766493804980335e-05, "loss": 1.7101, "step": 726 }, { "epoch": 0.09701094208700294, "grad_norm": 1.3332261405385217, "learning_rate": 1.976556437722517e-05, "loss": 1.7127, "step": 727 }, { "epoch": 0.09714438217240459, "grad_norm": 1.2579594665184823, "learning_rate": 1.976463312537376e-05, "loss": 1.7202, "step": 728 }, { "epoch": 0.09727782225780625, "grad_norm": 1.5697939160123118, "learning_rate": 1.976370004960006e-05, "loss": 1.7365, "step": 729 }, { "epoch": 0.0974112623432079, "grad_norm": 1.437009988573765, "learning_rate": 1.9762765150078356e-05, "loss": 1.7322, "step": 730 }, { "epoch": 0.09754470242860955, "grad_norm": 1.3169792642285323, "learning_rate": 1.9761828426983275e-05, "loss": 1.7273, "step": 731 }, { "epoch": 0.0976781425140112, "grad_norm": 1.5676941938167916, "learning_rate": 1.9760889880489785e-05, "loss": 1.6898, "step": 732 }, { "epoch": 0.09781158259941286, "grad_norm": 1.4697881996182383, "learning_rate": 1.9759949510773198e-05, "loss": 1.7192, "step": 733 }, { "epoch": 0.09794502268481452, "grad_norm": 1.2684891400607772, "learning_rate": 1.9759007318009163e-05, "loss": 1.7186, "step": 734 }, { "epoch": 0.09807846277021617, "grad_norm": 1.2364993695259308, "learning_rate": 1.9758063302373668e-05, "loss": 1.7161, "step": 735 }, { "epoch": 0.09821190285561783, "grad_norm": 1.2322024949845318, "learning_rate": 1.9757117464043054e-05, "loss": 1.6399, "step": 736 }, { "epoch": 0.09834534294101949, "grad_norm": 1.2787037328868474, "learning_rate": 1.9756169803193982e-05, "loss": 1.6938, "step": 737 }, { "epoch": 0.09847878302642114, "grad_norm": 1.4067488547142601, "learning_rate": 1.9755220320003472e-05, "loss": 1.6728, "step": 738 }, { "epoch": 0.09861222311182279, "grad_norm": 1.2315172135495314, "learning_rate": 1.9754269014648876e-05, "loss": 1.6648, "step": 739 }, { "epoch": 0.09874566319722444, "grad_norm": 1.267447733362674, "learning_rate": 1.975331588730788e-05, "loss": 1.706, "step": 740 }, { "epoch": 0.0988791032826261, "grad_norm": 1.2625669177056562, "learning_rate": 1.9752360938158522e-05, "loss": 1.6826, "step": 741 }, { "epoch": 0.09901254336802776, "grad_norm": 1.3204723066933017, "learning_rate": 1.9751404167379175e-05, "loss": 1.7471, "step": 742 }, { "epoch": 0.09914598345342941, "grad_norm": 1.2156415623133245, "learning_rate": 1.9750445575148557e-05, "loss": 1.7028, "step": 743 }, { "epoch": 0.09927942353883107, "grad_norm": 1.3803504882683986, "learning_rate": 1.9749485161645715e-05, "loss": 1.5917, "step": 744 }, { "epoch": 0.09941286362423273, "grad_norm": 1.3249632567906648, "learning_rate": 1.974852292705005e-05, "loss": 1.7628, "step": 745 }, { "epoch": 0.09954630370963437, "grad_norm": 1.2675581593990985, "learning_rate": 1.974755887154129e-05, "loss": 1.7161, "step": 746 }, { "epoch": 0.09967974379503602, "grad_norm": 1.1665077868327602, "learning_rate": 1.9746592995299515e-05, "loss": 1.6748, "step": 747 }, { "epoch": 0.09981318388043768, "grad_norm": 1.8639201171370587, "learning_rate": 1.974562529850514e-05, "loss": 1.697, "step": 748 }, { "epoch": 0.09994662396583934, "grad_norm": 1.3196170477492044, "learning_rate": 1.9744655781338913e-05, "loss": 1.7133, "step": 749 }, { "epoch": 0.100080064051241, "grad_norm": 1.2684243723887993, "learning_rate": 1.9743684443981933e-05, "loss": 1.7135, "step": 750 }, { "epoch": 0.10021350413664265, "grad_norm": 1.1693884140497548, "learning_rate": 1.9742711286615637e-05, "loss": 1.7565, "step": 751 }, { "epoch": 0.10034694422204431, "grad_norm": 1.2151625452030927, "learning_rate": 1.97417363094218e-05, "loss": 1.6717, "step": 752 }, { "epoch": 0.10048038430744596, "grad_norm": 1.205035641499454, "learning_rate": 1.974075951258253e-05, "loss": 1.699, "step": 753 }, { "epoch": 0.1006138243928476, "grad_norm": 1.547680875790279, "learning_rate": 1.973978089628029e-05, "loss": 1.7031, "step": 754 }, { "epoch": 0.10074726447824926, "grad_norm": 1.5859192028100046, "learning_rate": 1.9738800460697864e-05, "loss": 1.7173, "step": 755 }, { "epoch": 0.10088070456365092, "grad_norm": 1.5322336200687723, "learning_rate": 1.9737818206018398e-05, "loss": 1.6959, "step": 756 }, { "epoch": 0.10101414464905258, "grad_norm": 1.2626433527759493, "learning_rate": 1.9736834132425358e-05, "loss": 1.7239, "step": 757 }, { "epoch": 0.10114758473445423, "grad_norm": 1.505323027317607, "learning_rate": 1.973584824010256e-05, "loss": 1.6621, "step": 758 }, { "epoch": 0.10128102481985589, "grad_norm": 1.286494859170549, "learning_rate": 1.9734860529234158e-05, "loss": 1.7242, "step": 759 }, { "epoch": 0.10141446490525755, "grad_norm": 1.2306017189093845, "learning_rate": 1.9733871000004643e-05, "loss": 1.7357, "step": 760 }, { "epoch": 0.10154790499065919, "grad_norm": 1.2061936583163555, "learning_rate": 1.973287965259885e-05, "loss": 1.7067, "step": 761 }, { "epoch": 0.10168134507606084, "grad_norm": 1.2179611482678936, "learning_rate": 1.9731886487201955e-05, "loss": 1.685, "step": 762 }, { "epoch": 0.1018147851614625, "grad_norm": 1.2683345257288643, "learning_rate": 1.973089150399946e-05, "loss": 1.6885, "step": 763 }, { "epoch": 0.10194822524686416, "grad_norm": 1.2219892409057462, "learning_rate": 1.9729894703177224e-05, "loss": 1.6881, "step": 764 }, { "epoch": 0.10208166533226581, "grad_norm": 1.458449697810168, "learning_rate": 1.972889608492144e-05, "loss": 1.7057, "step": 765 }, { "epoch": 0.10221510541766747, "grad_norm": 1.3400740587143003, "learning_rate": 1.972789564941863e-05, "loss": 1.7171, "step": 766 }, { "epoch": 0.10234854550306913, "grad_norm": 1.2400559343919726, "learning_rate": 1.9726893396855675e-05, "loss": 1.6838, "step": 767 }, { "epoch": 0.10248198558847077, "grad_norm": 1.2051072425315403, "learning_rate": 1.9725889327419773e-05, "loss": 1.7223, "step": 768 }, { "epoch": 0.10261542567387243, "grad_norm": 1.2802866462485152, "learning_rate": 1.9724883441298482e-05, "loss": 1.6311, "step": 769 }, { "epoch": 0.10274886575927408, "grad_norm": 1.2328867984674214, "learning_rate": 1.9723875738679684e-05, "loss": 1.7446, "step": 770 }, { "epoch": 0.10288230584467574, "grad_norm": 1.2341749669672337, "learning_rate": 1.9722866219751606e-05, "loss": 1.7008, "step": 771 }, { "epoch": 0.1030157459300774, "grad_norm": 1.236991722122687, "learning_rate": 1.972185488470282e-05, "loss": 1.7141, "step": 772 }, { "epoch": 0.10314918601547905, "grad_norm": 1.577844605519268, "learning_rate": 1.9720841733722228e-05, "loss": 1.7455, "step": 773 }, { "epoch": 0.10328262610088071, "grad_norm": 1.3705708996585753, "learning_rate": 1.9719826766999076e-05, "loss": 1.7616, "step": 774 }, { "epoch": 0.10341606618628237, "grad_norm": 1.221952982391811, "learning_rate": 1.971880998472295e-05, "loss": 1.7413, "step": 775 }, { "epoch": 0.10354950627168401, "grad_norm": 1.537181900251082, "learning_rate": 1.9717791387083772e-05, "loss": 1.7011, "step": 776 }, { "epoch": 0.10368294635708566, "grad_norm": 1.2027484662414312, "learning_rate": 1.9716770974271803e-05, "loss": 1.6332, "step": 777 }, { "epoch": 0.10381638644248732, "grad_norm": 1.253964108899184, "learning_rate": 1.9715748746477644e-05, "loss": 1.7075, "step": 778 }, { "epoch": 0.10394982652788898, "grad_norm": 1.309242214190543, "learning_rate": 1.9714724703892238e-05, "loss": 1.7086, "step": 779 }, { "epoch": 0.10408326661329063, "grad_norm": 1.1977182401997006, "learning_rate": 1.9713698846706865e-05, "loss": 1.6517, "step": 780 }, { "epoch": 0.10421670669869229, "grad_norm": 6.937492769576936, "learning_rate": 1.971267117511314e-05, "loss": 1.7718, "step": 781 }, { "epoch": 0.10435014678409395, "grad_norm": 1.7070387484387968, "learning_rate": 1.9711641689303024e-05, "loss": 1.6522, "step": 782 }, { "epoch": 0.10448358686949559, "grad_norm": 1.407052136214357, "learning_rate": 1.971061038946881e-05, "loss": 1.6851, "step": 783 }, { "epoch": 0.10461702695489725, "grad_norm": 1.5040283593232824, "learning_rate": 1.970957727580314e-05, "loss": 1.64, "step": 784 }, { "epoch": 0.1047504670402989, "grad_norm": 1.3258063153724626, "learning_rate": 1.9708542348498975e-05, "loss": 1.6948, "step": 785 }, { "epoch": 0.10488390712570056, "grad_norm": 1.288600009166478, "learning_rate": 1.970750560774964e-05, "loss": 1.7193, "step": 786 }, { "epoch": 0.10501734721110222, "grad_norm": 1.4250574606731519, "learning_rate": 1.9706467053748782e-05, "loss": 1.7096, "step": 787 }, { "epoch": 0.10515078729650387, "grad_norm": 1.5215309159514565, "learning_rate": 1.9705426686690387e-05, "loss": 1.6971, "step": 788 }, { "epoch": 0.10528422738190553, "grad_norm": 1.3671118437746754, "learning_rate": 1.9704384506768788e-05, "loss": 1.7509, "step": 789 }, { "epoch": 0.10541766746730719, "grad_norm": 1.2902319309239332, "learning_rate": 1.9703340514178656e-05, "loss": 1.7539, "step": 790 }, { "epoch": 0.10555110755270883, "grad_norm": 1.2681335491850911, "learning_rate": 1.9702294709114987e-05, "loss": 1.7117, "step": 791 }, { "epoch": 0.10568454763811048, "grad_norm": 1.2418617832342838, "learning_rate": 1.9701247091773135e-05, "loss": 1.7345, "step": 792 }, { "epoch": 0.10581798772351214, "grad_norm": 1.2895790697871072, "learning_rate": 1.9700197662348777e-05, "loss": 1.6623, "step": 793 }, { "epoch": 0.1059514278089138, "grad_norm": 1.2684357393175132, "learning_rate": 1.9699146421037935e-05, "loss": 1.6487, "step": 794 }, { "epoch": 0.10608486789431545, "grad_norm": 1.2423605419736183, "learning_rate": 1.9698093368036976e-05, "loss": 1.7129, "step": 795 }, { "epoch": 0.10621830797971711, "grad_norm": 1.4752327188086216, "learning_rate": 1.969703850354259e-05, "loss": 1.6892, "step": 796 }, { "epoch": 0.10635174806511877, "grad_norm": 1.4526839181300373, "learning_rate": 1.9695981827751815e-05, "loss": 1.7072, "step": 797 }, { "epoch": 0.10648518815052041, "grad_norm": 1.285791119441606, "learning_rate": 1.969492334086203e-05, "loss": 1.7122, "step": 798 }, { "epoch": 0.10661862823592207, "grad_norm": 1.2256787041306045, "learning_rate": 1.9693863043070944e-05, "loss": 1.6888, "step": 799 }, { "epoch": 0.10675206832132372, "grad_norm": 1.228951068308044, "learning_rate": 1.9692800934576607e-05, "loss": 1.727, "step": 800 }, { "epoch": 0.10688550840672538, "grad_norm": 1.352118087145811, "learning_rate": 1.9691737015577418e-05, "loss": 1.7125, "step": 801 }, { "epoch": 0.10701894849212704, "grad_norm": 1.21157400011724, "learning_rate": 1.96906712862721e-05, "loss": 1.6923, "step": 802 }, { "epoch": 0.10715238857752869, "grad_norm": 1.3003550717703736, "learning_rate": 1.9689603746859714e-05, "loss": 1.6511, "step": 803 }, { "epoch": 0.10728582866293035, "grad_norm": 1.1564673283657656, "learning_rate": 1.9688534397539666e-05, "loss": 1.6726, "step": 804 }, { "epoch": 0.107419268748332, "grad_norm": 1.2730332287589288, "learning_rate": 1.9687463238511704e-05, "loss": 1.6618, "step": 805 }, { "epoch": 0.10755270883373365, "grad_norm": 1.3800666144026246, "learning_rate": 1.9686390269975907e-05, "loss": 1.7154, "step": 806 }, { "epoch": 0.1076861489191353, "grad_norm": 1.2048204552645487, "learning_rate": 1.968531549213269e-05, "loss": 1.6448, "step": 807 }, { "epoch": 0.10781958900453696, "grad_norm": 1.3937316868921632, "learning_rate": 1.9684238905182807e-05, "loss": 1.729, "step": 808 }, { "epoch": 0.10795302908993862, "grad_norm": 1.3900561665275548, "learning_rate": 1.968316050932736e-05, "loss": 1.6828, "step": 809 }, { "epoch": 0.10808646917534027, "grad_norm": 1.6114246988977743, "learning_rate": 1.9682080304767775e-05, "loss": 1.6792, "step": 810 }, { "epoch": 0.10821990926074193, "grad_norm": 1.197150037360335, "learning_rate": 1.9680998291705822e-05, "loss": 1.6617, "step": 811 }, { "epoch": 0.10835334934614359, "grad_norm": 1.2177958885989686, "learning_rate": 1.9679914470343615e-05, "loss": 1.7133, "step": 812 }, { "epoch": 0.10848678943154523, "grad_norm": 1.494320786513006, "learning_rate": 1.9678828840883592e-05, "loss": 1.7004, "step": 813 }, { "epoch": 0.10862022951694689, "grad_norm": 1.1694521285820114, "learning_rate": 1.9677741403528538e-05, "loss": 1.6699, "step": 814 }, { "epoch": 0.10875366960234854, "grad_norm": 1.1962398552992284, "learning_rate": 1.967665215848158e-05, "loss": 1.6874, "step": 815 }, { "epoch": 0.1088871096877502, "grad_norm": 1.5912303268847496, "learning_rate": 1.9675561105946165e-05, "loss": 1.6643, "step": 816 }, { "epoch": 0.10902054977315186, "grad_norm": 1.2975078209899873, "learning_rate": 1.96744682461261e-05, "loss": 1.6789, "step": 817 }, { "epoch": 0.10915398985855351, "grad_norm": 1.2145763409844237, "learning_rate": 1.9673373579225514e-05, "loss": 1.6771, "step": 818 }, { "epoch": 0.10928742994395517, "grad_norm": 1.493883539659001, "learning_rate": 1.9672277105448878e-05, "loss": 1.6724, "step": 819 }, { "epoch": 0.10942087002935681, "grad_norm": 1.644683081505485, "learning_rate": 1.9671178825001002e-05, "loss": 1.6167, "step": 820 }, { "epoch": 0.10955431011475847, "grad_norm": 1.2719900635243897, "learning_rate": 1.967007873808703e-05, "loss": 1.6765, "step": 821 }, { "epoch": 0.10968775020016013, "grad_norm": 1.2528047543957326, "learning_rate": 1.966897684491245e-05, "loss": 1.7102, "step": 822 }, { "epoch": 0.10982119028556178, "grad_norm": 1.2419647024115608, "learning_rate": 1.9667873145683082e-05, "loss": 1.7077, "step": 823 }, { "epoch": 0.10995463037096344, "grad_norm": 1.4501286698451352, "learning_rate": 1.966676764060508e-05, "loss": 1.6994, "step": 824 }, { "epoch": 0.1100880704563651, "grad_norm": 1.3630120265958166, "learning_rate": 1.9665660329884944e-05, "loss": 1.6926, "step": 825 }, { "epoch": 0.11022151054176675, "grad_norm": 1.460903039980999, "learning_rate": 1.966455121372951e-05, "loss": 1.7103, "step": 826 }, { "epoch": 0.11035495062716841, "grad_norm": 1.5274078789068972, "learning_rate": 1.9663440292345942e-05, "loss": 1.6988, "step": 827 }, { "epoch": 0.11048839071257005, "grad_norm": 1.2090669145048123, "learning_rate": 1.9662327565941747e-05, "loss": 1.6496, "step": 828 }, { "epoch": 0.1106218307979717, "grad_norm": 1.2269483421344651, "learning_rate": 1.9661213034724776e-05, "loss": 1.7122, "step": 829 }, { "epoch": 0.11075527088337336, "grad_norm": 1.4069578940447711, "learning_rate": 1.9660096698903203e-05, "loss": 1.7269, "step": 830 }, { "epoch": 0.11088871096877502, "grad_norm": 1.2037399973430059, "learning_rate": 1.9658978558685557e-05, "loss": 1.7083, "step": 831 }, { "epoch": 0.11102215105417668, "grad_norm": 1.2631008462452196, "learning_rate": 1.9657858614280682e-05, "loss": 1.6926, "step": 832 }, { "epoch": 0.11115559113957833, "grad_norm": 1.3642794324735983, "learning_rate": 1.965673686589778e-05, "loss": 1.7298, "step": 833 }, { "epoch": 0.11128903122497999, "grad_norm": 1.7518036471850427, "learning_rate": 1.9655613313746378e-05, "loss": 1.669, "step": 834 }, { "epoch": 0.11142247131038163, "grad_norm": 1.3194015458910031, "learning_rate": 1.965448795803634e-05, "loss": 1.6799, "step": 835 }, { "epoch": 0.11155591139578329, "grad_norm": 1.2408206570528904, "learning_rate": 1.9653360798977872e-05, "loss": 1.7045, "step": 836 }, { "epoch": 0.11168935148118495, "grad_norm": 1.255528423044181, "learning_rate": 1.9652231836781514e-05, "loss": 1.6847, "step": 837 }, { "epoch": 0.1118227915665866, "grad_norm": 1.5892490809881583, "learning_rate": 1.965110107165815e-05, "loss": 1.6491, "step": 838 }, { "epoch": 0.11195623165198826, "grad_norm": 1.117194657692538, "learning_rate": 1.964996850381898e-05, "loss": 1.6832, "step": 839 }, { "epoch": 0.11208967173738991, "grad_norm": 1.3292775964235752, "learning_rate": 1.9648834133475564e-05, "loss": 1.7325, "step": 840 }, { "epoch": 0.11222311182279157, "grad_norm": 1.2706968194762298, "learning_rate": 1.964769796083979e-05, "loss": 1.7105, "step": 841 }, { "epoch": 0.11235655190819323, "grad_norm": 1.1622186343437864, "learning_rate": 1.964655998612388e-05, "loss": 1.6583, "step": 842 }, { "epoch": 0.11248999199359487, "grad_norm": 1.2930179210511994, "learning_rate": 1.9645420209540394e-05, "loss": 1.728, "step": 843 }, { "epoch": 0.11262343207899653, "grad_norm": 1.3384278372678646, "learning_rate": 1.964427863130223e-05, "loss": 1.7889, "step": 844 }, { "epoch": 0.11275687216439818, "grad_norm": 1.3828255437162351, "learning_rate": 1.964313525162262e-05, "loss": 1.6834, "step": 845 }, { "epoch": 0.11289031224979984, "grad_norm": 1.151410588671105, "learning_rate": 1.964199007071514e-05, "loss": 1.7299, "step": 846 }, { "epoch": 0.1130237523352015, "grad_norm": 1.1440279928005437, "learning_rate": 1.9640843088793692e-05, "loss": 1.7109, "step": 847 }, { "epoch": 0.11315719242060315, "grad_norm": 1.4236141452119535, "learning_rate": 1.9639694306072518e-05, "loss": 1.7074, "step": 848 }, { "epoch": 0.11329063250600481, "grad_norm": 1.2106461953286103, "learning_rate": 1.96385437227662e-05, "loss": 1.7274, "step": 849 }, { "epoch": 0.11342407259140645, "grad_norm": 1.1556803406122926, "learning_rate": 1.9637391339089655e-05, "loss": 1.6884, "step": 850 }, { "epoch": 0.11355751267680811, "grad_norm": 4.314358844478156, "learning_rate": 1.9636237155258132e-05, "loss": 1.7116, "step": 851 }, { "epoch": 0.11369095276220977, "grad_norm": 1.3854953599475233, "learning_rate": 1.9635081171487223e-05, "loss": 1.7502, "step": 852 }, { "epoch": 0.11382439284761142, "grad_norm": 1.3815136446017762, "learning_rate": 1.9633923387992852e-05, "loss": 1.6506, "step": 853 }, { "epoch": 0.11395783293301308, "grad_norm": 1.2593646207844973, "learning_rate": 1.9632763804991275e-05, "loss": 1.6633, "step": 854 }, { "epoch": 0.11409127301841474, "grad_norm": 1.1318090116685733, "learning_rate": 1.9631602422699093e-05, "loss": 1.6382, "step": 855 }, { "epoch": 0.11422471310381639, "grad_norm": 1.523834563895417, "learning_rate": 1.963043924133324e-05, "loss": 1.7173, "step": 856 }, { "epoch": 0.11435815318921803, "grad_norm": 1.369606548099637, "learning_rate": 1.962927426111098e-05, "loss": 1.7182, "step": 857 }, { "epoch": 0.11449159327461969, "grad_norm": 1.2489405584762354, "learning_rate": 1.9628107482249926e-05, "loss": 1.6701, "step": 858 }, { "epoch": 0.11462503336002135, "grad_norm": 1.3750560913985916, "learning_rate": 1.9626938904968013e-05, "loss": 1.725, "step": 859 }, { "epoch": 0.114758473445423, "grad_norm": 1.3689472702896512, "learning_rate": 1.962576852948352e-05, "loss": 1.6338, "step": 860 }, { "epoch": 0.11489191353082466, "grad_norm": 1.5090068885201111, "learning_rate": 1.9624596356015057e-05, "loss": 1.7569, "step": 861 }, { "epoch": 0.11502535361622632, "grad_norm": 1.1857397963469596, "learning_rate": 1.9623422384781575e-05, "loss": 1.689, "step": 862 }, { "epoch": 0.11515879370162797, "grad_norm": 4.383590101850042, "learning_rate": 1.9622246616002362e-05, "loss": 1.7869, "step": 863 }, { "epoch": 0.11529223378702963, "grad_norm": 1.6301360882052789, "learning_rate": 1.9621069049897026e-05, "loss": 1.7169, "step": 864 }, { "epoch": 0.11542567387243127, "grad_norm": 1.7846551885225532, "learning_rate": 1.961988968668554e-05, "loss": 1.731, "step": 865 }, { "epoch": 0.11555911395783293, "grad_norm": 1.576969047359226, "learning_rate": 1.9618708526588187e-05, "loss": 1.7073, "step": 866 }, { "epoch": 0.11569255404323459, "grad_norm": 1.4423014187494878, "learning_rate": 1.961752556982559e-05, "loss": 1.6984, "step": 867 }, { "epoch": 0.11582599412863624, "grad_norm": 1.411158314477759, "learning_rate": 1.9616340816618718e-05, "loss": 1.6689, "step": 868 }, { "epoch": 0.1159594342140379, "grad_norm": 1.2615885604648809, "learning_rate": 1.9615154267188865e-05, "loss": 1.7033, "step": 869 }, { "epoch": 0.11609287429943956, "grad_norm": 1.3497153430677546, "learning_rate": 1.9613965921757672e-05, "loss": 1.7052, "step": 870 }, { "epoch": 0.11622631438484121, "grad_norm": 1.3238657405928431, "learning_rate": 1.96127757805471e-05, "loss": 1.6803, "step": 871 }, { "epoch": 0.11635975447024285, "grad_norm": 1.4113872862910883, "learning_rate": 1.961158384377946e-05, "loss": 1.6832, "step": 872 }, { "epoch": 0.11649319455564451, "grad_norm": 1.1657735990335347, "learning_rate": 1.9610390111677388e-05, "loss": 1.6761, "step": 873 }, { "epoch": 0.11662663464104617, "grad_norm": 1.4288925931679666, "learning_rate": 1.9609194584463866e-05, "loss": 1.7142, "step": 874 }, { "epoch": 0.11676007472644782, "grad_norm": 1.3482735605608565, "learning_rate": 1.9607997262362195e-05, "loss": 1.7664, "step": 875 }, { "epoch": 0.11689351481184948, "grad_norm": 1.2758965299420557, "learning_rate": 1.960679814559603e-05, "loss": 1.7643, "step": 876 }, { "epoch": 0.11702695489725114, "grad_norm": 2.091988370893863, "learning_rate": 1.9605597234389347e-05, "loss": 1.6968, "step": 877 }, { "epoch": 0.1171603949826528, "grad_norm": 1.7213048905302604, "learning_rate": 1.9604394528966467e-05, "loss": 1.6905, "step": 878 }, { "epoch": 0.11729383506805445, "grad_norm": 1.3791112926314404, "learning_rate": 1.9603190029552036e-05, "loss": 1.6883, "step": 879 }, { "epoch": 0.11742727515345609, "grad_norm": 1.3014168835710302, "learning_rate": 1.9601983736371047e-05, "loss": 1.747, "step": 880 }, { "epoch": 0.11756071523885775, "grad_norm": 1.2352469046555101, "learning_rate": 1.9600775649648818e-05, "loss": 1.663, "step": 881 }, { "epoch": 0.1176941553242594, "grad_norm": 1.3042902670559662, "learning_rate": 1.9599565769611004e-05, "loss": 1.6927, "step": 882 }, { "epoch": 0.11782759540966106, "grad_norm": 1.3814487920433733, "learning_rate": 1.95983540964836e-05, "loss": 1.6648, "step": 883 }, { "epoch": 0.11796103549506272, "grad_norm": 1.192223516720673, "learning_rate": 1.9597140630492934e-05, "loss": 1.6921, "step": 884 }, { "epoch": 0.11809447558046438, "grad_norm": 1.2034614588136634, "learning_rate": 1.9595925371865667e-05, "loss": 1.6936, "step": 885 }, { "epoch": 0.11822791566586603, "grad_norm": 1.1538622984795832, "learning_rate": 1.959470832082879e-05, "loss": 1.6821, "step": 886 }, { "epoch": 0.11836135575126767, "grad_norm": 1.2580439984544938, "learning_rate": 1.9593489477609646e-05, "loss": 1.7528, "step": 887 }, { "epoch": 0.11849479583666933, "grad_norm": 1.4294495361334751, "learning_rate": 1.9592268842435884e-05, "loss": 1.7465, "step": 888 }, { "epoch": 0.11862823592207099, "grad_norm": 1.257681366416578, "learning_rate": 1.959104641553552e-05, "loss": 1.6859, "step": 889 }, { "epoch": 0.11876167600747264, "grad_norm": 1.2143693832991094, "learning_rate": 1.958982219713688e-05, "loss": 1.7008, "step": 890 }, { "epoch": 0.1188951160928743, "grad_norm": 1.2891059442807018, "learning_rate": 1.9588596187468642e-05, "loss": 1.7103, "step": 891 }, { "epoch": 0.11902855617827596, "grad_norm": 1.2311129858964251, "learning_rate": 1.9587368386759805e-05, "loss": 1.714, "step": 892 }, { "epoch": 0.11916199626367761, "grad_norm": 1.2118946144194225, "learning_rate": 1.9586138795239708e-05, "loss": 1.6704, "step": 893 }, { "epoch": 0.11929543634907927, "grad_norm": 1.159008481780571, "learning_rate": 1.9584907413138028e-05, "loss": 1.6918, "step": 894 }, { "epoch": 0.11942887643448091, "grad_norm": 1.2289280001259455, "learning_rate": 1.9583674240684774e-05, "loss": 1.7155, "step": 895 }, { "epoch": 0.11956231651988257, "grad_norm": 1.2482207680648996, "learning_rate": 1.9582439278110282e-05, "loss": 1.6881, "step": 896 }, { "epoch": 0.11969575660528423, "grad_norm": 1.435525566789562, "learning_rate": 1.9581202525645232e-05, "loss": 1.6873, "step": 897 }, { "epoch": 0.11982919669068588, "grad_norm": 1.3060438169865851, "learning_rate": 1.957996398352064e-05, "loss": 1.6797, "step": 898 }, { "epoch": 0.11996263677608754, "grad_norm": 1.2737667444400789, "learning_rate": 1.9578723651967845e-05, "loss": 1.6942, "step": 899 }, { "epoch": 0.1200960768614892, "grad_norm": 1.6807278602860825, "learning_rate": 1.957748153121853e-05, "loss": 1.693, "step": 900 }, { "epoch": 0.12022951694689085, "grad_norm": 1.2626260309636503, "learning_rate": 1.957623762150471e-05, "loss": 1.6766, "step": 901 }, { "epoch": 0.1203629570322925, "grad_norm": 1.6130682344451919, "learning_rate": 1.9574991923058735e-05, "loss": 1.7133, "step": 902 }, { "epoch": 0.12049639711769415, "grad_norm": 1.1868637434354896, "learning_rate": 1.957374443611328e-05, "loss": 1.719, "step": 903 }, { "epoch": 0.12062983720309581, "grad_norm": 1.1629455637439938, "learning_rate": 1.9572495160901365e-05, "loss": 1.6718, "step": 904 }, { "epoch": 0.12076327728849746, "grad_norm": 1.2126155193246908, "learning_rate": 1.9571244097656343e-05, "loss": 1.6731, "step": 905 }, { "epoch": 0.12089671737389912, "grad_norm": 1.3426037774841502, "learning_rate": 1.9569991246611897e-05, "loss": 1.7082, "step": 906 }, { "epoch": 0.12103015745930078, "grad_norm": 1.237669168766994, "learning_rate": 1.9568736608002045e-05, "loss": 1.698, "step": 907 }, { "epoch": 0.12116359754470243, "grad_norm": 1.2019255673482718, "learning_rate": 1.9567480182061134e-05, "loss": 1.7086, "step": 908 }, { "epoch": 0.12129703763010408, "grad_norm": 1.4253857823504195, "learning_rate": 1.956622196902386e-05, "loss": 1.7049, "step": 909 }, { "epoch": 0.12143047771550573, "grad_norm": 1.681519114291726, "learning_rate": 1.9564961969125235e-05, "loss": 1.6735, "step": 910 }, { "epoch": 0.12156391780090739, "grad_norm": 1.2989313363354473, "learning_rate": 1.9563700182600612e-05, "loss": 1.7477, "step": 911 }, { "epoch": 0.12169735788630905, "grad_norm": 1.1509458671490507, "learning_rate": 1.9562436609685685e-05, "loss": 1.7121, "step": 912 }, { "epoch": 0.1218307979717107, "grad_norm": 1.234465309616413, "learning_rate": 1.9561171250616472e-05, "loss": 1.7032, "step": 913 }, { "epoch": 0.12196423805711236, "grad_norm": 1.286630945624901, "learning_rate": 1.955990410562933e-05, "loss": 1.6834, "step": 914 }, { "epoch": 0.12209767814251402, "grad_norm": 1.2427074764646417, "learning_rate": 1.9558635174960942e-05, "loss": 1.6784, "step": 915 }, { "epoch": 0.12223111822791567, "grad_norm": 1.2023142218836853, "learning_rate": 1.9557364458848334e-05, "loss": 1.6907, "step": 916 }, { "epoch": 0.12236455831331731, "grad_norm": 1.1692669558611244, "learning_rate": 1.955609195752886e-05, "loss": 1.6604, "step": 917 }, { "epoch": 0.12249799839871897, "grad_norm": 1.441083824408625, "learning_rate": 1.9554817671240205e-05, "loss": 1.7268, "step": 918 }, { "epoch": 0.12263143848412063, "grad_norm": 1.1637128804063606, "learning_rate": 1.9553541600220395e-05, "loss": 1.7105, "step": 919 }, { "epoch": 0.12276487856952228, "grad_norm": 1.1948097801622213, "learning_rate": 1.955226374470779e-05, "loss": 1.6994, "step": 920 }, { "epoch": 0.12289831865492394, "grad_norm": 1.19037198249763, "learning_rate": 1.955098410494107e-05, "loss": 1.6805, "step": 921 }, { "epoch": 0.1230317587403256, "grad_norm": 1.1965278157578167, "learning_rate": 1.954970268115926e-05, "loss": 1.7126, "step": 922 }, { "epoch": 0.12316519882572725, "grad_norm": 12.043177426547418, "learning_rate": 1.954841947360172e-05, "loss": 1.6847, "step": 923 }, { "epoch": 0.1232986389111289, "grad_norm": 1.4171849613590173, "learning_rate": 1.9547134482508135e-05, "loss": 1.6773, "step": 924 }, { "epoch": 0.12343207899653055, "grad_norm": 1.277439153377438, "learning_rate": 1.9545847708118524e-05, "loss": 1.654, "step": 925 }, { "epoch": 0.12356551908193221, "grad_norm": 1.5344343949821269, "learning_rate": 1.954455915067325e-05, "loss": 1.714, "step": 926 }, { "epoch": 0.12369895916733387, "grad_norm": 1.3273336556361204, "learning_rate": 1.954326881041299e-05, "loss": 1.7272, "step": 927 }, { "epoch": 0.12383239925273552, "grad_norm": 1.2408609710996044, "learning_rate": 1.9541976687578773e-05, "loss": 1.7073, "step": 928 }, { "epoch": 0.12396583933813718, "grad_norm": 1.3534841024628121, "learning_rate": 1.954068278241195e-05, "loss": 1.7464, "step": 929 }, { "epoch": 0.12409927942353884, "grad_norm": 1.8710842856544097, "learning_rate": 1.9539387095154207e-05, "loss": 1.6445, "step": 930 }, { "epoch": 0.12423271950894049, "grad_norm": 1.3643772964473504, "learning_rate": 1.9538089626047566e-05, "loss": 1.7005, "step": 931 }, { "epoch": 0.12436615959434213, "grad_norm": 1.2248623778543628, "learning_rate": 1.953679037533438e-05, "loss": 1.6574, "step": 932 }, { "epoch": 0.12449959967974379, "grad_norm": 1.3022000288924167, "learning_rate": 1.9535489343257326e-05, "loss": 1.7176, "step": 933 }, { "epoch": 0.12463303976514545, "grad_norm": 1.4725976531996385, "learning_rate": 1.9534186530059434e-05, "loss": 1.6851, "step": 934 }, { "epoch": 0.1247664798505471, "grad_norm": 1.2400512724415127, "learning_rate": 1.9532881935984046e-05, "loss": 1.688, "step": 935 }, { "epoch": 0.12489991993594876, "grad_norm": 1.4145930106236353, "learning_rate": 1.9531575561274852e-05, "loss": 1.6835, "step": 936 }, { "epoch": 0.1250333600213504, "grad_norm": 1.3793980244786415, "learning_rate": 1.953026740617586e-05, "loss": 1.7196, "step": 937 }, { "epoch": 0.12516680010675207, "grad_norm": 13.216726991500538, "learning_rate": 1.9528957470931424e-05, "loss": 1.7525, "step": 938 }, { "epoch": 0.12530024019215372, "grad_norm": 1.3237742593449273, "learning_rate": 1.9527645755786224e-05, "loss": 1.6883, "step": 939 }, { "epoch": 0.1254336802775554, "grad_norm": 1.445191972771836, "learning_rate": 1.9526332260985275e-05, "loss": 1.6686, "step": 940 }, { "epoch": 0.12556712036295703, "grad_norm": 1.5185282218871532, "learning_rate": 1.952501698677392e-05, "loss": 1.6581, "step": 941 }, { "epoch": 0.1257005604483587, "grad_norm": 1.3124983698296924, "learning_rate": 1.9523699933397834e-05, "loss": 1.6544, "step": 942 }, { "epoch": 0.12583400053376034, "grad_norm": 1.183777382042402, "learning_rate": 1.9522381101103038e-05, "loss": 1.7612, "step": 943 }, { "epoch": 0.12596744061916199, "grad_norm": 1.2415393099346195, "learning_rate": 1.9521060490135865e-05, "loss": 1.7291, "step": 944 }, { "epoch": 0.12610088070456366, "grad_norm": 1.176856792014906, "learning_rate": 1.9519738100742995e-05, "loss": 1.6638, "step": 945 }, { "epoch": 0.1262343207899653, "grad_norm": 1.1487711372941813, "learning_rate": 1.9518413933171432e-05, "loss": 1.6464, "step": 946 }, { "epoch": 0.12636776087536697, "grad_norm": 1.4297554387238383, "learning_rate": 1.9517087987668522e-05, "loss": 1.6069, "step": 947 }, { "epoch": 0.1265012009607686, "grad_norm": 1.31215843034171, "learning_rate": 1.951576026448193e-05, "loss": 1.689, "step": 948 }, { "epoch": 0.12663464104617028, "grad_norm": 1.1689420027444986, "learning_rate": 1.951443076385966e-05, "loss": 1.6715, "step": 949 }, { "epoch": 0.12676808113157192, "grad_norm": 1.680542791799617, "learning_rate": 1.951309948605005e-05, "loss": 1.6546, "step": 950 }, { "epoch": 0.12690152121697357, "grad_norm": 1.1797862555326082, "learning_rate": 1.9511766431301766e-05, "loss": 1.7215, "step": 951 }, { "epoch": 0.12703496130237524, "grad_norm": 1.5427071574060558, "learning_rate": 1.951043159986381e-05, "loss": 1.6322, "step": 952 }, { "epoch": 0.12716840138777688, "grad_norm": 1.1779500188493681, "learning_rate": 1.950909499198551e-05, "loss": 1.6812, "step": 953 }, { "epoch": 0.12730184147317855, "grad_norm": 1.1913258200020012, "learning_rate": 1.950775660791653e-05, "loss": 1.6755, "step": 954 }, { "epoch": 0.1274352815585802, "grad_norm": 1.232490173323732, "learning_rate": 1.950641644790687e-05, "loss": 1.7031, "step": 955 }, { "epoch": 0.12756872164398186, "grad_norm": 1.327617648590312, "learning_rate": 1.9505074512206847e-05, "loss": 1.6455, "step": 956 }, { "epoch": 0.1277021617293835, "grad_norm": 1.1740821011806148, "learning_rate": 1.9503730801067125e-05, "loss": 1.6913, "step": 957 }, { "epoch": 0.12783560181478515, "grad_norm": 1.3812646322593256, "learning_rate": 1.9502385314738697e-05, "loss": 1.6215, "step": 958 }, { "epoch": 0.12796904190018682, "grad_norm": 1.3155996801138305, "learning_rate": 1.9501038053472877e-05, "loss": 1.6518, "step": 959 }, { "epoch": 0.12810248198558846, "grad_norm": 1.3349048294702728, "learning_rate": 1.9499689017521323e-05, "loss": 1.6609, "step": 960 }, { "epoch": 0.12823592207099013, "grad_norm": 1.2671901915804749, "learning_rate": 1.9498338207136022e-05, "loss": 1.725, "step": 961 }, { "epoch": 0.12836936215639178, "grad_norm": 1.1359327570816697, "learning_rate": 1.949698562256928e-05, "loss": 1.6597, "step": 962 }, { "epoch": 0.12850280224179345, "grad_norm": 1.2333203786654017, "learning_rate": 1.9495631264073758e-05, "loss": 1.6659, "step": 963 }, { "epoch": 0.1286362423271951, "grad_norm": 1.2554849794053347, "learning_rate": 1.9494275131902423e-05, "loss": 1.6749, "step": 964 }, { "epoch": 0.12876968241259673, "grad_norm": 1.3230800031319025, "learning_rate": 1.949291722630859e-05, "loss": 1.6923, "step": 965 }, { "epoch": 0.1289031224979984, "grad_norm": 2.1623618851588327, "learning_rate": 1.9491557547545903e-05, "loss": 1.6905, "step": 966 }, { "epoch": 0.12903656258340004, "grad_norm": 1.3456119660809565, "learning_rate": 1.9490196095868328e-05, "loss": 1.7063, "step": 967 }, { "epoch": 0.12917000266880171, "grad_norm": 1.600288717290206, "learning_rate": 1.9488832871530173e-05, "loss": 1.7006, "step": 968 }, { "epoch": 0.12930344275420336, "grad_norm": 1.3672986818133261, "learning_rate": 1.9487467874786076e-05, "loss": 1.7197, "step": 969 }, { "epoch": 0.12943688283960503, "grad_norm": 1.2760213827762075, "learning_rate": 1.9486101105890993e-05, "loss": 1.6708, "step": 970 }, { "epoch": 0.12957032292500667, "grad_norm": 1.393135659234334, "learning_rate": 1.948473256510023e-05, "loss": 1.6924, "step": 971 }, { "epoch": 0.1297037630104083, "grad_norm": 1.367984927030445, "learning_rate": 1.948336225266941e-05, "loss": 1.7087, "step": 972 }, { "epoch": 0.12983720309580998, "grad_norm": 1.2953603648125673, "learning_rate": 1.9481990168854494e-05, "loss": 1.7151, "step": 973 }, { "epoch": 0.12997064318121163, "grad_norm": 1.167269554903983, "learning_rate": 1.9480616313911774e-05, "loss": 1.6787, "step": 974 }, { "epoch": 0.1301040832666133, "grad_norm": 1.246260379852551, "learning_rate": 1.9479240688097864e-05, "loss": 1.7456, "step": 975 }, { "epoch": 0.13023752335201494, "grad_norm": 1.2725941500644802, "learning_rate": 1.9477863291669718e-05, "loss": 1.6552, "step": 976 }, { "epoch": 0.1303709634374166, "grad_norm": 1.2084106713506604, "learning_rate": 1.9476484124884622e-05, "loss": 1.6639, "step": 977 }, { "epoch": 0.13050440352281825, "grad_norm": 1.194547259427426, "learning_rate": 1.9475103188000183e-05, "loss": 1.6877, "step": 978 }, { "epoch": 0.13063784360821992, "grad_norm": 1.2780352392355023, "learning_rate": 1.9473720481274347e-05, "loss": 1.6605, "step": 979 }, { "epoch": 0.13077128369362157, "grad_norm": 1.5045389202002308, "learning_rate": 1.9472336004965387e-05, "loss": 1.7074, "step": 980 }, { "epoch": 0.1309047237790232, "grad_norm": 1.2421804342522536, "learning_rate": 1.947094975933191e-05, "loss": 1.686, "step": 981 }, { "epoch": 0.13103816386442488, "grad_norm": 1.1426071394085213, "learning_rate": 1.9469561744632845e-05, "loss": 1.6592, "step": 982 }, { "epoch": 0.13117160394982652, "grad_norm": 1.6999618031943038, "learning_rate": 1.9468171961127464e-05, "loss": 1.7108, "step": 983 }, { "epoch": 0.1313050440352282, "grad_norm": 1.2427085932088913, "learning_rate": 1.946678040907536e-05, "loss": 1.657, "step": 984 }, { "epoch": 0.13143848412062983, "grad_norm": 8.676443998708113, "learning_rate": 1.9465387088736455e-05, "loss": 1.6552, "step": 985 }, { "epoch": 0.1315719242060315, "grad_norm": 1.6685163662088924, "learning_rate": 1.9463992000371014e-05, "loss": 1.6877, "step": 986 }, { "epoch": 0.13170536429143315, "grad_norm": 1.4363009160454192, "learning_rate": 1.9462595144239616e-05, "loss": 1.7549, "step": 987 }, { "epoch": 0.1318388043768348, "grad_norm": 1.4849169479024855, "learning_rate": 1.946119652060318e-05, "loss": 1.6377, "step": 988 }, { "epoch": 0.13197224446223646, "grad_norm": 1.384439488826471, "learning_rate": 1.9459796129722962e-05, "loss": 1.69, "step": 989 }, { "epoch": 0.1321056845476381, "grad_norm": 1.486350400208871, "learning_rate": 1.9458393971860522e-05, "loss": 1.6643, "step": 990 }, { "epoch": 0.13223912463303977, "grad_norm": 1.4827478161059948, "learning_rate": 1.9456990047277777e-05, "loss": 1.6915, "step": 991 }, { "epoch": 0.13237256471844142, "grad_norm": 1.2049573199404926, "learning_rate": 1.9455584356236965e-05, "loss": 1.6686, "step": 992 }, { "epoch": 0.13250600480384309, "grad_norm": 1.490030676305235, "learning_rate": 1.9454176899000653e-05, "loss": 1.7358, "step": 993 }, { "epoch": 0.13263944488924473, "grad_norm": 1.1805251966858894, "learning_rate": 1.9452767675831733e-05, "loss": 1.6745, "step": 994 }, { "epoch": 0.13277288497464637, "grad_norm": 1.4646671341281234, "learning_rate": 1.9451356686993435e-05, "loss": 1.671, "step": 995 }, { "epoch": 0.13290632506004804, "grad_norm": 1.2502496308048068, "learning_rate": 1.9449943932749316e-05, "loss": 1.6041, "step": 996 }, { "epoch": 0.13303976514544968, "grad_norm": 1.3302442965706283, "learning_rate": 1.9448529413363264e-05, "loss": 1.692, "step": 997 }, { "epoch": 0.13317320523085135, "grad_norm": 1.5733523617366565, "learning_rate": 1.944711312909949e-05, "loss": 1.6779, "step": 998 }, { "epoch": 0.133306645316253, "grad_norm": 1.2021852120833718, "learning_rate": 1.9445695080222543e-05, "loss": 1.688, "step": 999 }, { "epoch": 0.13344008540165467, "grad_norm": 1.3704807104442995, "learning_rate": 1.9444275266997302e-05, "loss": 1.6618, "step": 1000 }, { "epoch": 0.1335735254870563, "grad_norm": 1.1311995894978009, "learning_rate": 1.9442853689688965e-05, "loss": 1.6144, "step": 1001 }, { "epoch": 0.13370696557245795, "grad_norm": 1.5372526742454895, "learning_rate": 1.9441430348563072e-05, "loss": 1.7336, "step": 1002 }, { "epoch": 0.13384040565785962, "grad_norm": 1.3090351266009799, "learning_rate": 1.9440005243885482e-05, "loss": 1.6752, "step": 1003 }, { "epoch": 0.13397384574326127, "grad_norm": 1.313411350120398, "learning_rate": 1.943857837592239e-05, "loss": 1.6838, "step": 1004 }, { "epoch": 0.13410728582866294, "grad_norm": 1.3955032805375227, "learning_rate": 1.943714974494032e-05, "loss": 1.7377, "step": 1005 }, { "epoch": 0.13424072591406458, "grad_norm": 1.1613148391890242, "learning_rate": 1.943571935120613e-05, "loss": 1.6595, "step": 1006 }, { "epoch": 0.13437416599946625, "grad_norm": 1.5018393485227763, "learning_rate": 1.943428719498699e-05, "loss": 1.7533, "step": 1007 }, { "epoch": 0.1345076060848679, "grad_norm": 1.1758602862709078, "learning_rate": 1.9432853276550412e-05, "loss": 1.6804, "step": 1008 }, { "epoch": 0.13464104617026956, "grad_norm": 2.1854421038226075, "learning_rate": 1.9431417596164246e-05, "loss": 1.7315, "step": 1009 }, { "epoch": 0.1347744862556712, "grad_norm": 1.3381407737075066, "learning_rate": 1.942998015409665e-05, "loss": 1.6721, "step": 1010 }, { "epoch": 0.13490792634107285, "grad_norm": 1.2547528294212573, "learning_rate": 1.9428540950616127e-05, "loss": 1.6782, "step": 1011 }, { "epoch": 0.13504136642647452, "grad_norm": 5.939952891644821, "learning_rate": 1.94270999859915e-05, "loss": 1.6811, "step": 1012 }, { "epoch": 0.13517480651187616, "grad_norm": 1.3380178004751162, "learning_rate": 1.942565726049193e-05, "loss": 1.6712, "step": 1013 }, { "epoch": 0.13530824659727783, "grad_norm": 1.2408694717484239, "learning_rate": 1.9424212774386894e-05, "loss": 1.7259, "step": 1014 }, { "epoch": 0.13544168668267947, "grad_norm": 1.2756143544585534, "learning_rate": 1.9422766527946217e-05, "loss": 1.6631, "step": 1015 }, { "epoch": 0.13557512676808114, "grad_norm": 1.4810902782091733, "learning_rate": 1.942131852144003e-05, "loss": 1.6927, "step": 1016 }, { "epoch": 0.1357085668534828, "grad_norm": 1.2419032087935538, "learning_rate": 1.941986875513881e-05, "loss": 1.6898, "step": 1017 }, { "epoch": 0.13584200693888443, "grad_norm": 1.2663899524703148, "learning_rate": 1.9418417229313357e-05, "loss": 1.679, "step": 1018 }, { "epoch": 0.1359754470242861, "grad_norm": 1.6857267169996077, "learning_rate": 1.9416963944234795e-05, "loss": 1.6354, "step": 1019 }, { "epoch": 0.13610888710968774, "grad_norm": 1.4554351634221496, "learning_rate": 1.9415508900174587e-05, "loss": 1.7462, "step": 1020 }, { "epoch": 0.1362423271950894, "grad_norm": 1.2229508649241836, "learning_rate": 1.941405209740452e-05, "loss": 1.6975, "step": 1021 }, { "epoch": 0.13637576728049106, "grad_norm": 1.8919520969087211, "learning_rate": 1.94125935361967e-05, "loss": 1.7258, "step": 1022 }, { "epoch": 0.13650920736589273, "grad_norm": 1.2669785367995678, "learning_rate": 1.9411133216823573e-05, "loss": 1.7269, "step": 1023 }, { "epoch": 0.13664264745129437, "grad_norm": 1.394093842588581, "learning_rate": 1.9409671139557913e-05, "loss": 1.6975, "step": 1024 }, { "epoch": 0.136776087536696, "grad_norm": 1.382684928389381, "learning_rate": 1.9408207304672815e-05, "loss": 1.667, "step": 1025 }, { "epoch": 0.13690952762209768, "grad_norm": 1.1652111588255187, "learning_rate": 1.940674171244171e-05, "loss": 1.6379, "step": 1026 }, { "epoch": 0.13704296770749932, "grad_norm": 1.3038078568540548, "learning_rate": 1.9405274363138353e-05, "loss": 1.6447, "step": 1027 }, { "epoch": 0.137176407792901, "grad_norm": 1.1387426323073633, "learning_rate": 1.940380525703683e-05, "loss": 1.6675, "step": 1028 }, { "epoch": 0.13730984787830264, "grad_norm": 1.474771378157035, "learning_rate": 1.940233439441155e-05, "loss": 1.7026, "step": 1029 }, { "epoch": 0.1374432879637043, "grad_norm": 1.3414858835240848, "learning_rate": 1.9400861775537253e-05, "loss": 1.6212, "step": 1030 }, { "epoch": 0.13757672804910595, "grad_norm": 1.4550778747404767, "learning_rate": 1.939938740068901e-05, "loss": 1.6785, "step": 1031 }, { "epoch": 0.1377101681345076, "grad_norm": 1.3556680615235186, "learning_rate": 1.939791127014222e-05, "loss": 1.6597, "step": 1032 }, { "epoch": 0.13784360821990926, "grad_norm": 1.4224278910538175, "learning_rate": 1.93964333841726e-05, "loss": 1.6917, "step": 1033 }, { "epoch": 0.1379770483053109, "grad_norm": 2.2986502944753275, "learning_rate": 1.9394953743056205e-05, "loss": 1.6871, "step": 1034 }, { "epoch": 0.13811048839071258, "grad_norm": 2.0365277465356924, "learning_rate": 1.939347234706942e-05, "loss": 1.7383, "step": 1035 }, { "epoch": 0.13824392847611422, "grad_norm": 1.4908556334356526, "learning_rate": 1.9391989196488947e-05, "loss": 1.6577, "step": 1036 }, { "epoch": 0.1383773685615159, "grad_norm": 1.2426015406392708, "learning_rate": 1.9390504291591825e-05, "loss": 1.6762, "step": 1037 }, { "epoch": 0.13851080864691753, "grad_norm": 1.1622345855922083, "learning_rate": 1.9389017632655417e-05, "loss": 1.6452, "step": 1038 }, { "epoch": 0.13864424873231918, "grad_norm": 1.2036449724168792, "learning_rate": 1.938752921995741e-05, "loss": 1.6652, "step": 1039 }, { "epoch": 0.13877768881772085, "grad_norm": 1.3193413331431836, "learning_rate": 1.938603905377583e-05, "loss": 1.7153, "step": 1040 }, { "epoch": 0.1389111289031225, "grad_norm": 3.9652856357976005, "learning_rate": 1.9384547134389017e-05, "loss": 1.6998, "step": 1041 }, { "epoch": 0.13904456898852416, "grad_norm": 1.2832120473397637, "learning_rate": 1.9383053462075645e-05, "loss": 1.6865, "step": 1042 }, { "epoch": 0.1391780090739258, "grad_norm": 1.9488157590841544, "learning_rate": 1.938155803711472e-05, "loss": 1.7429, "step": 1043 }, { "epoch": 0.13931144915932747, "grad_norm": 1.334661146454789, "learning_rate": 1.9380060859785562e-05, "loss": 1.687, "step": 1044 }, { "epoch": 0.13944488924472911, "grad_norm": 1.254965676826134, "learning_rate": 1.937856193036783e-05, "loss": 1.647, "step": 1045 }, { "epoch": 0.13957832933013078, "grad_norm": 1.3924358256427258, "learning_rate": 1.9377061249141515e-05, "loss": 1.7171, "step": 1046 }, { "epoch": 0.13971176941553243, "grad_norm": 1.3956910901280772, "learning_rate": 1.9375558816386915e-05, "loss": 1.743, "step": 1047 }, { "epoch": 0.13984520950093407, "grad_norm": 1.2701133772552229, "learning_rate": 1.9374054632384677e-05, "loss": 1.7041, "step": 1048 }, { "epoch": 0.13997864958633574, "grad_norm": 1.3350393841513681, "learning_rate": 1.937254869741576e-05, "loss": 1.6846, "step": 1049 }, { "epoch": 0.14011208967173738, "grad_norm": 1.4627388869419586, "learning_rate": 1.9371041011761456e-05, "loss": 1.6207, "step": 1050 }, { "epoch": 0.14024552975713905, "grad_norm": 1.4866992503394356, "learning_rate": 1.9369531575703388e-05, "loss": 1.7129, "step": 1051 }, { "epoch": 0.1403789698425407, "grad_norm": 1.1685028747662984, "learning_rate": 1.9368020389523493e-05, "loss": 1.668, "step": 1052 }, { "epoch": 0.14051240992794237, "grad_norm": 1.3215015297643653, "learning_rate": 1.936650745350405e-05, "loss": 1.7145, "step": 1053 }, { "epoch": 0.140645850013344, "grad_norm": 1.3693292355410813, "learning_rate": 1.9364992767927656e-05, "loss": 1.701, "step": 1054 }, { "epoch": 0.14077929009874565, "grad_norm": 1.2258997785581536, "learning_rate": 1.9363476333077237e-05, "loss": 1.7085, "step": 1055 }, { "epoch": 0.14091273018414732, "grad_norm": 1.2455180226057498, "learning_rate": 1.9361958149236045e-05, "loss": 1.7652, "step": 1056 }, { "epoch": 0.14104617026954896, "grad_norm": 1.1398881313468456, "learning_rate": 1.9360438216687663e-05, "loss": 1.6521, "step": 1057 }, { "epoch": 0.14117961035495064, "grad_norm": 1.4032373423726727, "learning_rate": 1.9358916535715995e-05, "loss": 1.6822, "step": 1058 }, { "epoch": 0.14131305044035228, "grad_norm": 1.2386258022282814, "learning_rate": 1.9357393106605273e-05, "loss": 1.6565, "step": 1059 }, { "epoch": 0.14144649052575395, "grad_norm": 1.2563917204753323, "learning_rate": 1.9355867929640054e-05, "loss": 1.7224, "step": 1060 }, { "epoch": 0.1415799306111556, "grad_norm": 1.1600878491451077, "learning_rate": 1.9354341005105228e-05, "loss": 1.6831, "step": 1061 }, { "epoch": 0.14171337069655723, "grad_norm": 1.1898921958138153, "learning_rate": 1.9352812333286006e-05, "loss": 1.7034, "step": 1062 }, { "epoch": 0.1418468107819589, "grad_norm": 1.3776077175515267, "learning_rate": 1.9351281914467924e-05, "loss": 1.6558, "step": 1063 }, { "epoch": 0.14198025086736055, "grad_norm": 1.4565706718782248, "learning_rate": 1.934974974893685e-05, "loss": 1.7167, "step": 1064 }, { "epoch": 0.14211369095276222, "grad_norm": 1.2442621324452396, "learning_rate": 1.9348215836978978e-05, "loss": 1.6459, "step": 1065 }, { "epoch": 0.14224713103816386, "grad_norm": 1.1267333234718482, "learning_rate": 1.9346680178880813e-05, "loss": 1.7133, "step": 1066 }, { "epoch": 0.14238057112356553, "grad_norm": 1.1628045406596315, "learning_rate": 1.9345142774929214e-05, "loss": 1.6475, "step": 1067 }, { "epoch": 0.14251401120896717, "grad_norm": 1.1018713280842254, "learning_rate": 1.9343603625411338e-05, "loss": 1.5936, "step": 1068 }, { "epoch": 0.14264745129436882, "grad_norm": 1.193003889508144, "learning_rate": 1.9342062730614688e-05, "loss": 1.6788, "step": 1069 }, { "epoch": 0.14278089137977049, "grad_norm": 1.1343045579344455, "learning_rate": 1.9340520090827086e-05, "loss": 1.6561, "step": 1070 }, { "epoch": 0.14291433146517213, "grad_norm": 1.1710425209498292, "learning_rate": 1.9338975706336672e-05, "loss": 1.5907, "step": 1071 }, { "epoch": 0.1430477715505738, "grad_norm": 1.3817597541347761, "learning_rate": 1.9337429577431926e-05, "loss": 1.7147, "step": 1072 }, { "epoch": 0.14318121163597544, "grad_norm": 1.1573508951337832, "learning_rate": 1.9335881704401642e-05, "loss": 1.698, "step": 1073 }, { "epoch": 0.1433146517213771, "grad_norm": 1.0912396515600804, "learning_rate": 1.9334332087534956e-05, "loss": 1.6756, "step": 1074 }, { "epoch": 0.14344809180677875, "grad_norm": 1.1837140980523393, "learning_rate": 1.9332780727121306e-05, "loss": 1.6908, "step": 1075 }, { "epoch": 0.1435815318921804, "grad_norm": 9.673515773086278, "learning_rate": 1.9331227623450475e-05, "loss": 1.71, "step": 1076 }, { "epoch": 0.14371497197758207, "grad_norm": 1.3759562691897052, "learning_rate": 1.9329672776812563e-05, "loss": 1.7094, "step": 1077 }, { "epoch": 0.1438484120629837, "grad_norm": 1.1597684961716581, "learning_rate": 1.9328116187498e-05, "loss": 1.6868, "step": 1078 }, { "epoch": 0.14398185214838538, "grad_norm": 1.861180144739059, "learning_rate": 1.9326557855797537e-05, "loss": 1.695, "step": 1079 }, { "epoch": 0.14411529223378702, "grad_norm": 1.3971061309442314, "learning_rate": 1.9324997782002258e-05, "loss": 1.6394, "step": 1080 }, { "epoch": 0.1442487323191887, "grad_norm": 1.1696642529732444, "learning_rate": 1.9323435966403557e-05, "loss": 1.6628, "step": 1081 }, { "epoch": 0.14438217240459034, "grad_norm": 1.4363997803769488, "learning_rate": 1.932187240929317e-05, "loss": 1.6358, "step": 1082 }, { "epoch": 0.144515612489992, "grad_norm": 1.2009780630510838, "learning_rate": 1.932030711096315e-05, "loss": 1.6941, "step": 1083 }, { "epoch": 0.14464905257539365, "grad_norm": 1.36326083414618, "learning_rate": 1.931874007170588e-05, "loss": 1.7041, "step": 1084 }, { "epoch": 0.1447824926607953, "grad_norm": 1.1982580377129664, "learning_rate": 1.9317171291814058e-05, "loss": 1.7533, "step": 1085 }, { "epoch": 0.14491593274619696, "grad_norm": 1.2862961234292616, "learning_rate": 1.931560077158072e-05, "loss": 1.6702, "step": 1086 }, { "epoch": 0.1450493728315986, "grad_norm": 1.1631924404688276, "learning_rate": 1.9314028511299223e-05, "loss": 1.6285, "step": 1087 }, { "epoch": 0.14518281291700028, "grad_norm": 1.3994096815725487, "learning_rate": 1.9312454511263242e-05, "loss": 1.6882, "step": 1088 }, { "epoch": 0.14531625300240192, "grad_norm": 1.2308714130213054, "learning_rate": 1.9310878771766787e-05, "loss": 1.662, "step": 1089 }, { "epoch": 0.1454496930878036, "grad_norm": 1.2400995841145075, "learning_rate": 1.9309301293104182e-05, "loss": 1.6559, "step": 1090 }, { "epoch": 0.14558313317320523, "grad_norm": 1.1648127201282061, "learning_rate": 1.930772207557009e-05, "loss": 1.697, "step": 1091 }, { "epoch": 0.14571657325860687, "grad_norm": 1.1588422983053825, "learning_rate": 1.9306141119459483e-05, "loss": 1.7384, "step": 1092 }, { "epoch": 0.14585001334400854, "grad_norm": 1.504230986251467, "learning_rate": 1.930455842506767e-05, "loss": 1.6645, "step": 1093 }, { "epoch": 0.1459834534294102, "grad_norm": 1.4254729141660931, "learning_rate": 1.9302973992690282e-05, "loss": 1.7063, "step": 1094 }, { "epoch": 0.14611689351481186, "grad_norm": 3.5366147232314296, "learning_rate": 1.9301387822623266e-05, "loss": 1.7281, "step": 1095 }, { "epoch": 0.1462503336002135, "grad_norm": 1.2361609720127364, "learning_rate": 1.929979991516291e-05, "loss": 1.7209, "step": 1096 }, { "epoch": 0.14638377368561517, "grad_norm": 1.095253097518337, "learning_rate": 1.929821027060581e-05, "loss": 1.63, "step": 1097 }, { "epoch": 0.1465172137710168, "grad_norm": 1.2870911756015357, "learning_rate": 1.9296618889248893e-05, "loss": 1.6939, "step": 1098 }, { "epoch": 0.14665065385641846, "grad_norm": 1.3716292424936098, "learning_rate": 1.9295025771389414e-05, "loss": 1.7245, "step": 1099 }, { "epoch": 0.14678409394182013, "grad_norm": 1.2427470679061952, "learning_rate": 1.9293430917324955e-05, "loss": 1.666, "step": 1100 }, { "epoch": 0.14691753402722177, "grad_norm": 1.460158973358874, "learning_rate": 1.9291834327353403e-05, "loss": 1.7147, "step": 1101 }, { "epoch": 0.14705097411262344, "grad_norm": 1.184155803132356, "learning_rate": 1.9290236001772995e-05, "loss": 1.6566, "step": 1102 }, { "epoch": 0.14718441419802508, "grad_norm": 1.2799912701156315, "learning_rate": 1.9288635940882273e-05, "loss": 1.6627, "step": 1103 }, { "epoch": 0.14731785428342675, "grad_norm": 1.4406914311548737, "learning_rate": 1.9287034144980114e-05, "loss": 1.6758, "step": 1104 }, { "epoch": 0.1474512943688284, "grad_norm": 1.2219976061823485, "learning_rate": 1.928543061436571e-05, "loss": 1.71, "step": 1105 }, { "epoch": 0.14758473445423004, "grad_norm": 1.2519035119374464, "learning_rate": 1.9283825349338588e-05, "loss": 1.6538, "step": 1106 }, { "epoch": 0.1477181745396317, "grad_norm": 1.3524232905207045, "learning_rate": 1.9282218350198588e-05, "loss": 1.6776, "step": 1107 }, { "epoch": 0.14785161462503335, "grad_norm": 1.30724045787273, "learning_rate": 1.928060961724589e-05, "loss": 1.6482, "step": 1108 }, { "epoch": 0.14798505471043502, "grad_norm": 1.1368808570323825, "learning_rate": 1.9278999150780972e-05, "loss": 1.6194, "step": 1109 }, { "epoch": 0.14811849479583666, "grad_norm": 1.300916828823837, "learning_rate": 1.927738695110466e-05, "loss": 1.6947, "step": 1110 }, { "epoch": 0.14825193488123833, "grad_norm": 1.1726520891396903, "learning_rate": 1.9275773018518094e-05, "loss": 1.6945, "step": 1111 }, { "epoch": 0.14838537496663998, "grad_norm": 1.4312765994187449, "learning_rate": 1.9274157353322736e-05, "loss": 1.7079, "step": 1112 }, { "epoch": 0.14851881505204162, "grad_norm": 1.3042905231751305, "learning_rate": 1.9272539955820372e-05, "loss": 1.6697, "step": 1113 }, { "epoch": 0.1486522551374433, "grad_norm": 1.2109176431266062, "learning_rate": 1.9270920826313122e-05, "loss": 1.7066, "step": 1114 }, { "epoch": 0.14878569522284493, "grad_norm": 1.269814239652846, "learning_rate": 1.926929996510341e-05, "loss": 1.6679, "step": 1115 }, { "epoch": 0.1489191353082466, "grad_norm": 1.26285461824445, "learning_rate": 1.9267677372494003e-05, "loss": 1.6651, "step": 1116 }, { "epoch": 0.14905257539364825, "grad_norm": 1.4162229382915708, "learning_rate": 1.9266053048787973e-05, "loss": 1.7093, "step": 1117 }, { "epoch": 0.14918601547904992, "grad_norm": 1.1695719882927538, "learning_rate": 1.926442699428874e-05, "loss": 1.6775, "step": 1118 }, { "epoch": 0.14931945556445156, "grad_norm": 1.5241355299411088, "learning_rate": 1.9262799209300017e-05, "loss": 1.7078, "step": 1119 }, { "epoch": 0.14945289564985323, "grad_norm": 1.3679819271720925, "learning_rate": 1.9261169694125868e-05, "loss": 1.6512, "step": 1120 }, { "epoch": 0.14958633573525487, "grad_norm": 1.0852517422618726, "learning_rate": 1.925953844907066e-05, "loss": 1.6732, "step": 1121 }, { "epoch": 0.14971977582065651, "grad_norm": 1.1505718677390935, "learning_rate": 1.9257905474439093e-05, "loss": 1.6627, "step": 1122 }, { "epoch": 0.14985321590605818, "grad_norm": 1.366145668087986, "learning_rate": 1.9256270770536187e-05, "loss": 1.6224, "step": 1123 }, { "epoch": 0.14998665599145983, "grad_norm": 1.2009763385012844, "learning_rate": 1.925463433766729e-05, "loss": 1.667, "step": 1124 }, { "epoch": 0.1501200960768615, "grad_norm": 1.1495050946339627, "learning_rate": 1.9252996176138065e-05, "loss": 1.7039, "step": 1125 }, { "epoch": 0.15025353616226314, "grad_norm": 1.202004713006955, "learning_rate": 1.9251356286254506e-05, "loss": 1.6953, "step": 1126 }, { "epoch": 0.1503869762476648, "grad_norm": 1.2049847095982797, "learning_rate": 1.9249714668322922e-05, "loss": 1.7301, "step": 1127 }, { "epoch": 0.15052041633306645, "grad_norm": 1.3586596709801637, "learning_rate": 1.924807132264995e-05, "loss": 1.6488, "step": 1128 }, { "epoch": 0.1506538564184681, "grad_norm": 1.304495283882212, "learning_rate": 1.924642624954255e-05, "loss": 1.6784, "step": 1129 }, { "epoch": 0.15078729650386977, "grad_norm": 1.2254989980261268, "learning_rate": 1.9244779449308e-05, "loss": 1.6712, "step": 1130 }, { "epoch": 0.1509207365892714, "grad_norm": 1.131251283705494, "learning_rate": 1.9243130922253902e-05, "loss": 1.6639, "step": 1131 }, { "epoch": 0.15105417667467308, "grad_norm": 1.0999764886521537, "learning_rate": 1.924148066868819e-05, "loss": 1.7247, "step": 1132 }, { "epoch": 0.15118761676007472, "grad_norm": 1.1537923655335296, "learning_rate": 1.92398286889191e-05, "loss": 1.6669, "step": 1133 }, { "epoch": 0.1513210568454764, "grad_norm": 1.2811987513376073, "learning_rate": 1.9238174983255216e-05, "loss": 1.6873, "step": 1134 }, { "epoch": 0.15145449693087804, "grad_norm": 1.189456464225642, "learning_rate": 1.9236519552005425e-05, "loss": 1.7032, "step": 1135 }, { "epoch": 0.15158793701627968, "grad_norm": 1.226791373769935, "learning_rate": 1.923486239547894e-05, "loss": 1.7184, "step": 1136 }, { "epoch": 0.15172137710168135, "grad_norm": 1.103146464311287, "learning_rate": 1.9233203513985307e-05, "loss": 1.6491, "step": 1137 }, { "epoch": 0.151854817187083, "grad_norm": 1.1779372105249897, "learning_rate": 1.923154290783438e-05, "loss": 1.6474, "step": 1138 }, { "epoch": 0.15198825727248466, "grad_norm": 1.2505895966166218, "learning_rate": 1.922988057733634e-05, "loss": 1.7153, "step": 1139 }, { "epoch": 0.1521216973578863, "grad_norm": 1.2586045407401536, "learning_rate": 1.92282165228017e-05, "loss": 1.6305, "step": 1140 }, { "epoch": 0.15225513744328797, "grad_norm": 1.1801506753191604, "learning_rate": 1.9226550744541276e-05, "loss": 1.6803, "step": 1141 }, { "epoch": 0.15238857752868962, "grad_norm": 1.2155598704280677, "learning_rate": 1.9224883242866223e-05, "loss": 1.6921, "step": 1142 }, { "epoch": 0.15252201761409126, "grad_norm": 1.1553853036033908, "learning_rate": 1.9223214018088007e-05, "loss": 1.6683, "step": 1143 }, { "epoch": 0.15265545769949293, "grad_norm": 1.7355840451920468, "learning_rate": 1.9221543070518427e-05, "loss": 1.6839, "step": 1144 }, { "epoch": 0.15278889778489457, "grad_norm": 10.109110380656796, "learning_rate": 1.9219870400469588e-05, "loss": 1.762, "step": 1145 }, { "epoch": 0.15292233787029624, "grad_norm": 1.7615641665674961, "learning_rate": 1.9218196008253934e-05, "loss": 1.7248, "step": 1146 }, { "epoch": 0.15305577795569789, "grad_norm": 15.239890369804119, "learning_rate": 1.921651989418422e-05, "loss": 1.7259, "step": 1147 }, { "epoch": 0.15318921804109956, "grad_norm": 1.2991891405839884, "learning_rate": 1.9214842058573517e-05, "loss": 1.6952, "step": 1148 }, { "epoch": 0.1533226581265012, "grad_norm": 1.2912291596013608, "learning_rate": 1.921316250173524e-05, "loss": 1.6993, "step": 1149 }, { "epoch": 0.15345609821190287, "grad_norm": 1.1663630774969274, "learning_rate": 1.92114812239831e-05, "loss": 1.6719, "step": 1150 }, { "epoch": 0.1535895382973045, "grad_norm": 1.2285813852625158, "learning_rate": 1.9209798225631144e-05, "loss": 1.7126, "step": 1151 }, { "epoch": 0.15372297838270615, "grad_norm": 1.1467685561512206, "learning_rate": 1.9208113506993734e-05, "loss": 1.6242, "step": 1152 }, { "epoch": 0.15385641846810783, "grad_norm": 1.1465340851099293, "learning_rate": 1.920642706838556e-05, "loss": 1.7279, "step": 1153 }, { "epoch": 0.15398985855350947, "grad_norm": 1.1578599709330035, "learning_rate": 1.9204738910121635e-05, "loss": 1.6549, "step": 1154 }, { "epoch": 0.15412329863891114, "grad_norm": 1.1683154746708295, "learning_rate": 1.9203049032517276e-05, "loss": 1.6429, "step": 1155 }, { "epoch": 0.15425673872431278, "grad_norm": 1.20845397374898, "learning_rate": 1.920135743588814e-05, "loss": 1.681, "step": 1156 }, { "epoch": 0.15439017880971445, "grad_norm": 1.3494049767476677, "learning_rate": 1.9199664120550197e-05, "loss": 1.7222, "step": 1157 }, { "epoch": 0.1545236188951161, "grad_norm": 1.1544440730765555, "learning_rate": 1.9197969086819735e-05, "loss": 1.6895, "step": 1158 }, { "epoch": 0.15465705898051774, "grad_norm": 1.1304557747461206, "learning_rate": 1.9196272335013373e-05, "loss": 1.6369, "step": 1159 }, { "epoch": 0.1547904990659194, "grad_norm": 1.119564016408627, "learning_rate": 1.9194573865448044e-05, "loss": 1.6653, "step": 1160 }, { "epoch": 0.15492393915132105, "grad_norm": 1.1506348346262587, "learning_rate": 1.9192873678441e-05, "loss": 1.6596, "step": 1161 }, { "epoch": 0.15505737923672272, "grad_norm": 1.197324233503071, "learning_rate": 1.9191171774309816e-05, "loss": 1.7255, "step": 1162 }, { "epoch": 0.15519081932212436, "grad_norm": 1.1674782781588378, "learning_rate": 1.9189468153372394e-05, "loss": 1.6745, "step": 1163 }, { "epoch": 0.15532425940752603, "grad_norm": 1.308070450830591, "learning_rate": 1.9187762815946942e-05, "loss": 1.6307, "step": 1164 }, { "epoch": 0.15545769949292768, "grad_norm": 1.1249631316465178, "learning_rate": 1.9186055762352003e-05, "loss": 1.6749, "step": 1165 }, { "epoch": 0.15559113957832932, "grad_norm": 1.22808803451986, "learning_rate": 1.918434699290644e-05, "loss": 1.7167, "step": 1166 }, { "epoch": 0.155724579663731, "grad_norm": 1.142395151395811, "learning_rate": 1.918263650792942e-05, "loss": 1.7143, "step": 1167 }, { "epoch": 0.15585801974913263, "grad_norm": 1.1288423274679555, "learning_rate": 1.9180924307740453e-05, "loss": 1.6465, "step": 1168 }, { "epoch": 0.1559914598345343, "grad_norm": 1.4078641204542017, "learning_rate": 1.9179210392659353e-05, "loss": 1.7182, "step": 1169 }, { "epoch": 0.15612489991993594, "grad_norm": 1.1847205713407944, "learning_rate": 1.917749476300626e-05, "loss": 1.6681, "step": 1170 }, { "epoch": 0.15625834000533761, "grad_norm": 1.096423039092125, "learning_rate": 1.9175777419101634e-05, "loss": 1.644, "step": 1171 }, { "epoch": 0.15639178009073926, "grad_norm": 1.115302048331846, "learning_rate": 1.917405836126626e-05, "loss": 1.7129, "step": 1172 }, { "epoch": 0.1565252201761409, "grad_norm": 1.1932528993802167, "learning_rate": 1.917233758982123e-05, "loss": 1.6633, "step": 1173 }, { "epoch": 0.15665866026154257, "grad_norm": 1.4039420662328945, "learning_rate": 1.917061510508797e-05, "loss": 1.7039, "step": 1174 }, { "epoch": 0.1567921003469442, "grad_norm": 1.3133488883749793, "learning_rate": 1.9168890907388224e-05, "loss": 1.639, "step": 1175 }, { "epoch": 0.15692554043234588, "grad_norm": 1.1512042689466013, "learning_rate": 1.9167164997044044e-05, "loss": 1.6891, "step": 1176 }, { "epoch": 0.15705898051774753, "grad_norm": 1.4318667585471574, "learning_rate": 1.9165437374377815e-05, "loss": 1.653, "step": 1177 }, { "epoch": 0.1571924206031492, "grad_norm": 1.5436421619414216, "learning_rate": 1.9163708039712238e-05, "loss": 1.6979, "step": 1178 }, { "epoch": 0.15732586068855084, "grad_norm": 1.163504109244437, "learning_rate": 1.916197699337033e-05, "loss": 1.6458, "step": 1179 }, { "epoch": 0.15745930077395248, "grad_norm": 1.139437842953167, "learning_rate": 1.9160244235675436e-05, "loss": 1.7182, "step": 1180 }, { "epoch": 0.15759274085935415, "grad_norm": 1.206983035678986, "learning_rate": 1.915850976695121e-05, "loss": 1.6761, "step": 1181 }, { "epoch": 0.1577261809447558, "grad_norm": 1.1218445645631256, "learning_rate": 1.9156773587521634e-05, "loss": 1.691, "step": 1182 }, { "epoch": 0.15785962103015747, "grad_norm": 15.435206717839069, "learning_rate": 1.9155035697711008e-05, "loss": 1.6704, "step": 1183 }, { "epoch": 0.1579930611155591, "grad_norm": 1.253164673663483, "learning_rate": 1.9153296097843944e-05, "loss": 1.6793, "step": 1184 }, { "epoch": 0.15812650120096078, "grad_norm": 1.2264750200407253, "learning_rate": 1.9151554788245383e-05, "loss": 1.6929, "step": 1185 }, { "epoch": 0.15825994128636242, "grad_norm": 1.4772217364726286, "learning_rate": 1.9149811769240585e-05, "loss": 1.6452, "step": 1186 }, { "epoch": 0.1583933813717641, "grad_norm": 1.1489987221515146, "learning_rate": 1.9148067041155122e-05, "loss": 1.6701, "step": 1187 }, { "epoch": 0.15852682145716573, "grad_norm": 1.2914670100647596, "learning_rate": 1.914632060431489e-05, "loss": 1.7303, "step": 1188 }, { "epoch": 0.15866026154256738, "grad_norm": 1.299350514801071, "learning_rate": 1.9144572459046104e-05, "loss": 1.681, "step": 1189 }, { "epoch": 0.15879370162796905, "grad_norm": 1.3119727658820086, "learning_rate": 1.9142822605675296e-05, "loss": 1.696, "step": 1190 }, { "epoch": 0.1589271417133707, "grad_norm": 1.1081403990699985, "learning_rate": 1.9141071044529318e-05, "loss": 1.6713, "step": 1191 }, { "epoch": 0.15906058179877236, "grad_norm": 1.1284776489853792, "learning_rate": 1.9139317775935348e-05, "loss": 1.6738, "step": 1192 }, { "epoch": 0.159194021884174, "grad_norm": 1.1261514805847666, "learning_rate": 1.9137562800220872e-05, "loss": 1.7054, "step": 1193 }, { "epoch": 0.15932746196957567, "grad_norm": 1.313484879072088, "learning_rate": 1.9135806117713697e-05, "loss": 1.6716, "step": 1194 }, { "epoch": 0.15946090205497732, "grad_norm": 1.278083810674143, "learning_rate": 1.913404772874196e-05, "loss": 1.6833, "step": 1195 }, { "epoch": 0.15959434214037896, "grad_norm": 1.4428806272343444, "learning_rate": 1.9132287633634097e-05, "loss": 1.6512, "step": 1196 }, { "epoch": 0.15972778222578063, "grad_norm": 1.209556177929987, "learning_rate": 1.9130525832718884e-05, "loss": 1.6942, "step": 1197 }, { "epoch": 0.15986122231118227, "grad_norm": 1.1106358013243507, "learning_rate": 1.9128762326325394e-05, "loss": 1.6982, "step": 1198 }, { "epoch": 0.15999466239658394, "grad_norm": 1.0994258199335178, "learning_rate": 1.912699711478304e-05, "loss": 1.6675, "step": 1199 }, { "epoch": 0.16012810248198558, "grad_norm": 1.1185448399034177, "learning_rate": 1.912523019842154e-05, "loss": 1.6926, "step": 1200 }, { "epoch": 0.16026154256738726, "grad_norm": 1.1152656846403104, "learning_rate": 1.912346157757093e-05, "loss": 1.7078, "step": 1201 }, { "epoch": 0.1603949826527889, "grad_norm": 1.2924058078691643, "learning_rate": 1.9121691252561578e-05, "loss": 1.7084, "step": 1202 }, { "epoch": 0.16052842273819054, "grad_norm": 1.0611065471561838, "learning_rate": 1.911991922372415e-05, "loss": 1.6611, "step": 1203 }, { "epoch": 0.1606618628235922, "grad_norm": 1.3699736830557303, "learning_rate": 1.911814549138965e-05, "loss": 1.6897, "step": 1204 }, { "epoch": 0.16079530290899385, "grad_norm": 1.4751343067321943, "learning_rate": 1.9116370055889382e-05, "loss": 1.6932, "step": 1205 }, { "epoch": 0.16092874299439552, "grad_norm": 1.1716253181544283, "learning_rate": 1.911459291755498e-05, "loss": 1.7116, "step": 1206 }, { "epoch": 0.16106218307979717, "grad_norm": 1.1114747666351001, "learning_rate": 1.91128140767184e-05, "loss": 1.6428, "step": 1207 }, { "epoch": 0.16119562316519884, "grad_norm": 1.0879838952081482, "learning_rate": 1.9111033533711897e-05, "loss": 1.6373, "step": 1208 }, { "epoch": 0.16132906325060048, "grad_norm": 1.1094725515932415, "learning_rate": 1.9109251288868064e-05, "loss": 1.6457, "step": 1209 }, { "epoch": 0.16146250333600212, "grad_norm": 1.1303873737596453, "learning_rate": 1.9107467342519808e-05, "loss": 1.6839, "step": 1210 }, { "epoch": 0.1615959434214038, "grad_norm": 1.4781044833796941, "learning_rate": 1.9105681695000342e-05, "loss": 1.6406, "step": 1211 }, { "epoch": 0.16172938350680544, "grad_norm": 1.1657729793459108, "learning_rate": 1.9103894346643204e-05, "loss": 1.6726, "step": 1212 }, { "epoch": 0.1618628235922071, "grad_norm": 1.2198288103403852, "learning_rate": 1.910210529778226e-05, "loss": 1.6929, "step": 1213 }, { "epoch": 0.16199626367760875, "grad_norm": 1.1871995635063328, "learning_rate": 1.9100314548751676e-05, "loss": 1.6907, "step": 1214 }, { "epoch": 0.16212970376301042, "grad_norm": 1.1864807109439603, "learning_rate": 1.9098522099885944e-05, "loss": 1.653, "step": 1215 }, { "epoch": 0.16226314384841206, "grad_norm": 1.2368576392699981, "learning_rate": 1.9096727951519872e-05, "loss": 1.7201, "step": 1216 }, { "epoch": 0.1623965839338137, "grad_norm": 1.1260231051664356, "learning_rate": 1.909493210398859e-05, "loss": 1.6123, "step": 1217 }, { "epoch": 0.16253002401921537, "grad_norm": 1.4256559162950146, "learning_rate": 1.9093134557627544e-05, "loss": 1.666, "step": 1218 }, { "epoch": 0.16266346410461702, "grad_norm": 1.4887124369987752, "learning_rate": 1.9091335312772493e-05, "loss": 1.7597, "step": 1219 }, { "epoch": 0.1627969041900187, "grad_norm": 1.1294991712339444, "learning_rate": 1.9089534369759508e-05, "loss": 1.688, "step": 1220 }, { "epoch": 0.16293034427542033, "grad_norm": 1.177420926926422, "learning_rate": 1.9087731728924996e-05, "loss": 1.7286, "step": 1221 }, { "epoch": 0.163063784360822, "grad_norm": 1.6913536181421298, "learning_rate": 1.9085927390605663e-05, "loss": 1.6913, "step": 1222 }, { "epoch": 0.16319722444622364, "grad_norm": 1.2163160869711371, "learning_rate": 1.908412135513854e-05, "loss": 1.6669, "step": 1223 }, { "epoch": 0.1633306645316253, "grad_norm": 1.3781638643041434, "learning_rate": 1.9082313622860976e-05, "loss": 1.657, "step": 1224 }, { "epoch": 0.16346410461702696, "grad_norm": 1.121301734458933, "learning_rate": 1.9080504194110633e-05, "loss": 1.7017, "step": 1225 }, { "epoch": 0.1635975447024286, "grad_norm": 1.2118234782577832, "learning_rate": 1.907869306922549e-05, "loss": 1.7203, "step": 1226 }, { "epoch": 0.16373098478783027, "grad_norm": 1.2074642428560403, "learning_rate": 1.9076880248543847e-05, "loss": 1.6558, "step": 1227 }, { "epoch": 0.1638644248732319, "grad_norm": 1.119322476739159, "learning_rate": 1.9075065732404318e-05, "loss": 1.6947, "step": 1228 }, { "epoch": 0.16399786495863358, "grad_norm": 1.1009776042368575, "learning_rate": 1.9073249521145833e-05, "loss": 1.6984, "step": 1229 }, { "epoch": 0.16413130504403523, "grad_norm": 1.3636116015946005, "learning_rate": 1.9071431615107642e-05, "loss": 1.6266, "step": 1230 }, { "epoch": 0.1642647451294369, "grad_norm": 1.2193076150679258, "learning_rate": 1.9069612014629307e-05, "loss": 1.6548, "step": 1231 }, { "epoch": 0.16439818521483854, "grad_norm": 1.108647716162107, "learning_rate": 1.9067790720050708e-05, "loss": 1.6949, "step": 1232 }, { "epoch": 0.16453162530024018, "grad_norm": 1.3428440762492813, "learning_rate": 1.9065967731712044e-05, "loss": 1.628, "step": 1233 }, { "epoch": 0.16466506538564185, "grad_norm": 1.1264569960102961, "learning_rate": 1.9064143049953826e-05, "loss": 1.69, "step": 1234 }, { "epoch": 0.1647985054710435, "grad_norm": 1.2129394646481007, "learning_rate": 1.9062316675116886e-05, "loss": 1.7039, "step": 1235 }, { "epoch": 0.16493194555644516, "grad_norm": 1.3168167400509623, "learning_rate": 1.9060488607542373e-05, "loss": 1.6854, "step": 1236 }, { "epoch": 0.1650653856418468, "grad_norm": 1.172892361541946, "learning_rate": 1.9058658847571744e-05, "loss": 1.6908, "step": 1237 }, { "epoch": 0.16519882572724848, "grad_norm": 1.8095333564387064, "learning_rate": 1.905682739554678e-05, "loss": 1.6951, "step": 1238 }, { "epoch": 0.16533226581265012, "grad_norm": 1.3485139476585741, "learning_rate": 1.905499425180957e-05, "loss": 1.6748, "step": 1239 }, { "epoch": 0.16546570589805176, "grad_norm": 1.2787725949509623, "learning_rate": 1.905315941670253e-05, "loss": 1.6657, "step": 1240 }, { "epoch": 0.16559914598345343, "grad_norm": 1.2224431058019596, "learning_rate": 1.9051322890568386e-05, "loss": 1.6656, "step": 1241 }, { "epoch": 0.16573258606885508, "grad_norm": 1.137537755181391, "learning_rate": 1.904948467375018e-05, "loss": 1.6577, "step": 1242 }, { "epoch": 0.16586602615425675, "grad_norm": 1.0868355979985056, "learning_rate": 1.9047644766591273e-05, "loss": 1.7073, "step": 1243 }, { "epoch": 0.1659994662396584, "grad_norm": 1.114462768904141, "learning_rate": 1.9045803169435326e-05, "loss": 1.6305, "step": 1244 }, { "epoch": 0.16613290632506006, "grad_norm": 1.3561430912216557, "learning_rate": 1.9043959882626343e-05, "loss": 1.6719, "step": 1245 }, { "epoch": 0.1662663464104617, "grad_norm": 1.3197220598884711, "learning_rate": 1.9042114906508623e-05, "loss": 1.6904, "step": 1246 }, { "epoch": 0.16639978649586334, "grad_norm": 1.1663473878740998, "learning_rate": 1.9040268241426786e-05, "loss": 1.6815, "step": 1247 }, { "epoch": 0.16653322658126501, "grad_norm": 1.1208171885233782, "learning_rate": 1.9038419887725768e-05, "loss": 1.6888, "step": 1248 }, { "epoch": 0.16666666666666666, "grad_norm": 1.2317096593237165, "learning_rate": 1.9036569845750822e-05, "loss": 1.6998, "step": 1249 }, { "epoch": 0.16680010675206833, "grad_norm": 1.2118086361733955, "learning_rate": 1.9034718115847516e-05, "loss": 1.7239, "step": 1250 }, { "epoch": 0.16693354683746997, "grad_norm": 1.1328099587839497, "learning_rate": 1.9032864698361728e-05, "loss": 1.6664, "step": 1251 }, { "epoch": 0.16706698692287164, "grad_norm": 1.2351889795437343, "learning_rate": 1.9031009593639656e-05, "loss": 1.7225, "step": 1252 }, { "epoch": 0.16720042700827328, "grad_norm": 1.1680475805874204, "learning_rate": 1.9029152802027816e-05, "loss": 1.7055, "step": 1253 }, { "epoch": 0.16733386709367493, "grad_norm": 1.1396480995870708, "learning_rate": 1.9027294323873032e-05, "loss": 1.6754, "step": 1254 }, { "epoch": 0.1674673071790766, "grad_norm": 1.099951848229152, "learning_rate": 1.902543415952245e-05, "loss": 1.6574, "step": 1255 }, { "epoch": 0.16760074726447824, "grad_norm": 1.091862726736696, "learning_rate": 1.9023572309323522e-05, "loss": 1.6254, "step": 1256 }, { "epoch": 0.1677341873498799, "grad_norm": 1.2660462090997906, "learning_rate": 1.9021708773624027e-05, "loss": 1.6797, "step": 1257 }, { "epoch": 0.16786762743528155, "grad_norm": 1.1200461208933035, "learning_rate": 1.901984355277205e-05, "loss": 1.6885, "step": 1258 }, { "epoch": 0.16800106752068322, "grad_norm": 1.2050844123558844, "learning_rate": 1.9017976647115988e-05, "loss": 1.6851, "step": 1259 }, { "epoch": 0.16813450760608487, "grad_norm": 1.142685776004093, "learning_rate": 1.9016108057004566e-05, "loss": 1.625, "step": 1260 }, { "epoch": 0.16826794769148654, "grad_norm": 1.1684071493919217, "learning_rate": 1.9014237782786806e-05, "loss": 1.6768, "step": 1261 }, { "epoch": 0.16840138777688818, "grad_norm": 1.1496482446886893, "learning_rate": 1.9012365824812063e-05, "loss": 1.6409, "step": 1262 }, { "epoch": 0.16853482786228982, "grad_norm": 1.0944121754604215, "learning_rate": 1.9010492183429994e-05, "loss": 1.63, "step": 1263 }, { "epoch": 0.1686682679476915, "grad_norm": 1.0842462969633766, "learning_rate": 1.9008616858990572e-05, "loss": 1.646, "step": 1264 }, { "epoch": 0.16880170803309313, "grad_norm": 1.4767767757650743, "learning_rate": 1.9006739851844086e-05, "loss": 1.6454, "step": 1265 }, { "epoch": 0.1689351481184948, "grad_norm": 1.18032002297524, "learning_rate": 1.9004861162341144e-05, "loss": 1.6861, "step": 1266 }, { "epoch": 0.16906858820389645, "grad_norm": 1.3372419431896025, "learning_rate": 1.9002980790832663e-05, "loss": 1.6586, "step": 1267 }, { "epoch": 0.16920202828929812, "grad_norm": 1.172100506483245, "learning_rate": 1.900109873766987e-05, "loss": 1.6657, "step": 1268 }, { "epoch": 0.16933546837469976, "grad_norm": 1.1145469314883394, "learning_rate": 1.8999215003204316e-05, "loss": 1.7082, "step": 1269 }, { "epoch": 0.1694689084601014, "grad_norm": 1.1206009643179509, "learning_rate": 1.8997329587787856e-05, "loss": 1.6578, "step": 1270 }, { "epoch": 0.16960234854550307, "grad_norm": 1.399049311820928, "learning_rate": 1.8995442491772668e-05, "loss": 1.6955, "step": 1271 }, { "epoch": 0.16973578863090472, "grad_norm": 1.1958580398149483, "learning_rate": 1.899355371551124e-05, "loss": 1.7101, "step": 1272 }, { "epoch": 0.1698692287163064, "grad_norm": 1.3556638628411706, "learning_rate": 1.8991663259356374e-05, "loss": 1.7108, "step": 1273 }, { "epoch": 0.17000266880170803, "grad_norm": 1.3451150415787545, "learning_rate": 1.8989771123661186e-05, "loss": 1.7315, "step": 1274 }, { "epoch": 0.1701361088871097, "grad_norm": 1.3135350609625422, "learning_rate": 1.8987877308779104e-05, "loss": 1.6609, "step": 1275 }, { "epoch": 0.17026954897251134, "grad_norm": 1.1333530911950962, "learning_rate": 1.8985981815063873e-05, "loss": 1.6684, "step": 1276 }, { "epoch": 0.17040298905791298, "grad_norm": 1.2791530568330647, "learning_rate": 1.8984084642869546e-05, "loss": 1.6651, "step": 1277 }, { "epoch": 0.17053642914331466, "grad_norm": 1.3692883154733826, "learning_rate": 1.8982185792550495e-05, "loss": 1.7462, "step": 1278 }, { "epoch": 0.1706698692287163, "grad_norm": 1.335470306118413, "learning_rate": 1.8980285264461404e-05, "loss": 1.6328, "step": 1279 }, { "epoch": 0.17080330931411797, "grad_norm": 1.609394077858067, "learning_rate": 1.8978383058957272e-05, "loss": 1.7349, "step": 1280 }, { "epoch": 0.1709367493995196, "grad_norm": 1.0926180749124967, "learning_rate": 1.8976479176393405e-05, "loss": 1.6251, "step": 1281 }, { "epoch": 0.17107018948492128, "grad_norm": 1.5796826003726887, "learning_rate": 1.8974573617125433e-05, "loss": 1.7046, "step": 1282 }, { "epoch": 0.17120362957032292, "grad_norm": 1.2777834800327206, "learning_rate": 1.8972666381509283e-05, "loss": 1.6877, "step": 1283 }, { "epoch": 0.17133706965572457, "grad_norm": 1.1654445244110958, "learning_rate": 1.8970757469901215e-05, "loss": 1.6931, "step": 1284 }, { "epoch": 0.17147050974112624, "grad_norm": 1.2695593578179767, "learning_rate": 1.8968846882657784e-05, "loss": 1.6171, "step": 1285 }, { "epoch": 0.17160394982652788, "grad_norm": 1.1260255267285963, "learning_rate": 1.896693462013587e-05, "loss": 1.6733, "step": 1286 }, { "epoch": 0.17173738991192955, "grad_norm": 1.1761366558123016, "learning_rate": 1.8965020682692662e-05, "loss": 1.6513, "step": 1287 }, { "epoch": 0.1718708299973312, "grad_norm": 1.3298600101322868, "learning_rate": 1.8963105070685667e-05, "loss": 1.6684, "step": 1288 }, { "epoch": 0.17200427008273286, "grad_norm": 1.1292417955776037, "learning_rate": 1.8961187784472685e-05, "loss": 1.6489, "step": 1289 }, { "epoch": 0.1721377101681345, "grad_norm": 1.2976031015347522, "learning_rate": 1.8959268824411857e-05, "loss": 1.7234, "step": 1290 }, { "epoch": 0.17227115025353618, "grad_norm": 5.468850373981861, "learning_rate": 1.8957348190861612e-05, "loss": 1.6717, "step": 1291 }, { "epoch": 0.17240459033893782, "grad_norm": 1.2061510206612853, "learning_rate": 1.8955425884180715e-05, "loss": 1.6682, "step": 1292 }, { "epoch": 0.17253803042433946, "grad_norm": 1.2340106256554149, "learning_rate": 1.895350190472822e-05, "loss": 1.6452, "step": 1293 }, { "epoch": 0.17267147050974113, "grad_norm": 1.1295754093637331, "learning_rate": 1.895157625286351e-05, "loss": 1.6993, "step": 1294 }, { "epoch": 0.17280491059514277, "grad_norm": 1.1791235092887307, "learning_rate": 1.8949648928946275e-05, "loss": 1.6347, "step": 1295 }, { "epoch": 0.17293835068054444, "grad_norm": 1.0917941158292654, "learning_rate": 1.894771993333651e-05, "loss": 1.6361, "step": 1296 }, { "epoch": 0.1730717907659461, "grad_norm": 1.243225904269034, "learning_rate": 1.8945789266394543e-05, "loss": 1.6859, "step": 1297 }, { "epoch": 0.17320523085134776, "grad_norm": 1.4426394116019783, "learning_rate": 1.894385692848099e-05, "loss": 1.6686, "step": 1298 }, { "epoch": 0.1733386709367494, "grad_norm": 1.1402525280881741, "learning_rate": 1.8941922919956788e-05, "loss": 1.6656, "step": 1299 }, { "epoch": 0.17347211102215104, "grad_norm": 1.1712942843243668, "learning_rate": 1.8939987241183195e-05, "loss": 1.6622, "step": 1300 }, { "epoch": 0.1736055511075527, "grad_norm": 1.1420488961794493, "learning_rate": 1.8938049892521772e-05, "loss": 1.6048, "step": 1301 }, { "epoch": 0.17373899119295436, "grad_norm": 1.4695327916997878, "learning_rate": 1.8936110874334392e-05, "loss": 1.6625, "step": 1302 }, { "epoch": 0.17387243127835603, "grad_norm": 1.0900641908664435, "learning_rate": 1.8934170186983242e-05, "loss": 1.6396, "step": 1303 }, { "epoch": 0.17400587136375767, "grad_norm": 1.1307482898544658, "learning_rate": 1.8932227830830822e-05, "loss": 1.6878, "step": 1304 }, { "epoch": 0.17413931144915934, "grad_norm": 1.0752115709650287, "learning_rate": 1.8930283806239936e-05, "loss": 1.6868, "step": 1305 }, { "epoch": 0.17427275153456098, "grad_norm": 1.3371575151194106, "learning_rate": 1.8928338113573715e-05, "loss": 1.6374, "step": 1306 }, { "epoch": 0.17440619161996262, "grad_norm": 1.2309293597249826, "learning_rate": 1.8926390753195583e-05, "loss": 1.6629, "step": 1307 }, { "epoch": 0.1745396317053643, "grad_norm": 1.1999413493774056, "learning_rate": 1.8924441725469293e-05, "loss": 1.6837, "step": 1308 }, { "epoch": 0.17467307179076594, "grad_norm": 4.1911788978313735, "learning_rate": 1.8922491030758893e-05, "loss": 1.7471, "step": 1309 }, { "epoch": 0.1748065118761676, "grad_norm": 1.1779561572349584, "learning_rate": 1.892053866942876e-05, "loss": 1.7246, "step": 1310 }, { "epoch": 0.17493995196156925, "grad_norm": 1.1617629781133412, "learning_rate": 1.8918584641843564e-05, "loss": 1.6462, "step": 1311 }, { "epoch": 0.17507339204697092, "grad_norm": 1.2004531219911387, "learning_rate": 1.89166289483683e-05, "loss": 1.6432, "step": 1312 }, { "epoch": 0.17520683213237256, "grad_norm": 1.1057648722865505, "learning_rate": 1.891467158936827e-05, "loss": 1.6363, "step": 1313 }, { "epoch": 0.1753402722177742, "grad_norm": 1.0830292426032284, "learning_rate": 1.8912712565209082e-05, "loss": 1.6543, "step": 1314 }, { "epoch": 0.17547371230317588, "grad_norm": 1.390700802694287, "learning_rate": 1.8910751876256663e-05, "loss": 1.5831, "step": 1315 }, { "epoch": 0.17560715238857752, "grad_norm": 1.1442545355007538, "learning_rate": 1.890878952287724e-05, "loss": 1.6725, "step": 1316 }, { "epoch": 0.1757405924739792, "grad_norm": 4.524949596846156, "learning_rate": 1.8906825505437375e-05, "loss": 1.7064, "step": 1317 }, { "epoch": 0.17587403255938083, "grad_norm": 1.5116207526291794, "learning_rate": 1.8904859824303906e-05, "loss": 1.6625, "step": 1318 }, { "epoch": 0.1760074726447825, "grad_norm": 1.3022482630711962, "learning_rate": 1.8902892479844012e-05, "loss": 1.6899, "step": 1319 }, { "epoch": 0.17614091273018415, "grad_norm": 1.3036609447195004, "learning_rate": 1.8900923472425162e-05, "loss": 1.7094, "step": 1320 }, { "epoch": 0.1762743528155858, "grad_norm": 1.201826945742321, "learning_rate": 1.889895280241515e-05, "loss": 1.6874, "step": 1321 }, { "epoch": 0.17640779290098746, "grad_norm": 1.331895493241366, "learning_rate": 1.8896980470182074e-05, "loss": 1.6567, "step": 1322 }, { "epoch": 0.1765412329863891, "grad_norm": 1.2443633859323437, "learning_rate": 1.889500647609434e-05, "loss": 1.667, "step": 1323 }, { "epoch": 0.17667467307179077, "grad_norm": 1.2258655395860159, "learning_rate": 1.8893030820520676e-05, "loss": 1.6747, "step": 1324 }, { "epoch": 0.17680811315719241, "grad_norm": 1.1121857279555136, "learning_rate": 1.8891053503830096e-05, "loss": 1.6623, "step": 1325 }, { "epoch": 0.17694155324259409, "grad_norm": 1.1496733774130343, "learning_rate": 1.8889074526391956e-05, "loss": 1.6505, "step": 1326 }, { "epoch": 0.17707499332799573, "grad_norm": 1.190622161497496, "learning_rate": 1.8887093888575897e-05, "loss": 1.6946, "step": 1327 }, { "epoch": 0.1772084334133974, "grad_norm": 1.234769822108329, "learning_rate": 1.8885111590751887e-05, "loss": 1.6822, "step": 1328 }, { "epoch": 0.17734187349879904, "grad_norm": 1.1961580846084812, "learning_rate": 1.8883127633290193e-05, "loss": 1.6098, "step": 1329 }, { "epoch": 0.17747531358420068, "grad_norm": 1.1500701811676424, "learning_rate": 1.888114201656139e-05, "loss": 1.6979, "step": 1330 }, { "epoch": 0.17760875366960235, "grad_norm": 1.068835069330444, "learning_rate": 1.887915474093638e-05, "loss": 1.6443, "step": 1331 }, { "epoch": 0.177742193755004, "grad_norm": 1.0715312154596754, "learning_rate": 1.8877165806786353e-05, "loss": 1.6178, "step": 1332 }, { "epoch": 0.17787563384040567, "grad_norm": 1.3293068311768508, "learning_rate": 1.8875175214482823e-05, "loss": 1.6494, "step": 1333 }, { "epoch": 0.1780090739258073, "grad_norm": 1.2954287617747784, "learning_rate": 1.8873182964397614e-05, "loss": 1.6546, "step": 1334 }, { "epoch": 0.17814251401120898, "grad_norm": 1.125061002872152, "learning_rate": 1.887118905690285e-05, "loss": 1.6897, "step": 1335 }, { "epoch": 0.17827595409661062, "grad_norm": 1.0654922527296347, "learning_rate": 1.8869193492370975e-05, "loss": 1.6899, "step": 1336 }, { "epoch": 0.17840939418201227, "grad_norm": 1.2859684785579713, "learning_rate": 1.8867196271174732e-05, "loss": 1.6489, "step": 1337 }, { "epoch": 0.17854283426741394, "grad_norm": 1.2913796594054934, "learning_rate": 1.8865197393687184e-05, "loss": 1.6531, "step": 1338 }, { "epoch": 0.17867627435281558, "grad_norm": 1.1287917089144135, "learning_rate": 1.8863196860281696e-05, "loss": 1.7184, "step": 1339 }, { "epoch": 0.17880971443821725, "grad_norm": 1.066588369186229, "learning_rate": 1.8861194671331946e-05, "loss": 1.7052, "step": 1340 }, { "epoch": 0.1789431545236189, "grad_norm": 1.3155491787656075, "learning_rate": 1.8859190827211922e-05, "loss": 1.739, "step": 1341 }, { "epoch": 0.17907659460902056, "grad_norm": 1.351336369334145, "learning_rate": 1.8857185328295915e-05, "loss": 1.6433, "step": 1342 }, { "epoch": 0.1792100346944222, "grad_norm": 1.144045347845426, "learning_rate": 1.885517817495853e-05, "loss": 1.6588, "step": 1343 }, { "epoch": 0.17934347477982385, "grad_norm": 1.0634778809440733, "learning_rate": 1.8853169367574684e-05, "loss": 1.6542, "step": 1344 }, { "epoch": 0.17947691486522552, "grad_norm": 1.177888372263127, "learning_rate": 1.88511589065196e-05, "loss": 1.6408, "step": 1345 }, { "epoch": 0.17961035495062716, "grad_norm": 1.2708302318930562, "learning_rate": 1.88491467921688e-05, "loss": 1.6817, "step": 1346 }, { "epoch": 0.17974379503602883, "grad_norm": 1.170081033949634, "learning_rate": 1.8847133024898135e-05, "loss": 1.6806, "step": 1347 }, { "epoch": 0.17987723512143047, "grad_norm": 1.0804261336260308, "learning_rate": 1.884511760508375e-05, "loss": 1.6134, "step": 1348 }, { "epoch": 0.18001067520683214, "grad_norm": 1.087811387530533, "learning_rate": 1.88431005331021e-05, "loss": 1.6393, "step": 1349 }, { "epoch": 0.1801441152922338, "grad_norm": 1.3853627907695811, "learning_rate": 1.8841081809329952e-05, "loss": 1.7002, "step": 1350 }, { "epoch": 0.18027755537763543, "grad_norm": 1.1966968339164359, "learning_rate": 1.8839061434144383e-05, "loss": 1.6703, "step": 1351 }, { "epoch": 0.1804109954630371, "grad_norm": 1.1737840062211229, "learning_rate": 1.8837039407922776e-05, "loss": 1.7185, "step": 1352 }, { "epoch": 0.18054443554843874, "grad_norm": 1.0896871608203742, "learning_rate": 1.883501573104282e-05, "loss": 1.6583, "step": 1353 }, { "epoch": 0.1806778756338404, "grad_norm": 1.3179615694791937, "learning_rate": 1.883299040388251e-05, "loss": 1.6311, "step": 1354 }, { "epoch": 0.18081131571924206, "grad_norm": 1.1110191440501396, "learning_rate": 1.883096342682017e-05, "loss": 1.6461, "step": 1355 }, { "epoch": 0.18094475580464373, "grad_norm": 1.327469108458132, "learning_rate": 1.88289348002344e-05, "loss": 1.624, "step": 1356 }, { "epoch": 0.18107819589004537, "grad_norm": 1.1046636131234442, "learning_rate": 1.882690452450413e-05, "loss": 1.6798, "step": 1357 }, { "epoch": 0.181211635975447, "grad_norm": 1.0969053723393494, "learning_rate": 1.8824872600008595e-05, "loss": 1.6196, "step": 1358 }, { "epoch": 0.18134507606084868, "grad_norm": 4.9414750848280455, "learning_rate": 1.8822839027127333e-05, "loss": 1.6521, "step": 1359 }, { "epoch": 0.18147851614625032, "grad_norm": 1.1053740384798774, "learning_rate": 1.8820803806240192e-05, "loss": 1.6409, "step": 1360 }, { "epoch": 0.181611956231652, "grad_norm": 1.1800356925014113, "learning_rate": 1.881876693772733e-05, "loss": 1.6952, "step": 1361 }, { "epoch": 0.18174539631705364, "grad_norm": 1.339932629654733, "learning_rate": 1.8816728421969208e-05, "loss": 1.6583, "step": 1362 }, { "epoch": 0.1818788364024553, "grad_norm": 1.1809439235732044, "learning_rate": 1.88146882593466e-05, "loss": 1.657, "step": 1363 }, { "epoch": 0.18201227648785695, "grad_norm": 1.1124814368658855, "learning_rate": 1.8812646450240586e-05, "loss": 1.6248, "step": 1364 }, { "epoch": 0.18214571657325862, "grad_norm": 1.3035826545815201, "learning_rate": 1.881060299503255e-05, "loss": 1.6932, "step": 1365 }, { "epoch": 0.18227915665866026, "grad_norm": 1.2040318351320083, "learning_rate": 1.8808557894104185e-05, "loss": 1.6214, "step": 1366 }, { "epoch": 0.1824125967440619, "grad_norm": 1.2476963311275309, "learning_rate": 1.8806511147837492e-05, "loss": 1.6661, "step": 1367 }, { "epoch": 0.18254603682946358, "grad_norm": 1.1447416720262047, "learning_rate": 1.8804462756614788e-05, "loss": 1.6795, "step": 1368 }, { "epoch": 0.18267947691486522, "grad_norm": 1.2146315931776808, "learning_rate": 1.8802412720818683e-05, "loss": 1.6683, "step": 1369 }, { "epoch": 0.1828129170002669, "grad_norm": 1.0961663105222377, "learning_rate": 1.8800361040832098e-05, "loss": 1.6522, "step": 1370 }, { "epoch": 0.18294635708566853, "grad_norm": 1.4720443132763514, "learning_rate": 1.8798307717038267e-05, "loss": 1.6972, "step": 1371 }, { "epoch": 0.1830797971710702, "grad_norm": 1.077369472777587, "learning_rate": 1.8796252749820728e-05, "loss": 1.6867, "step": 1372 }, { "epoch": 0.18321323725647184, "grad_norm": 1.082352978247377, "learning_rate": 1.8794196139563324e-05, "loss": 1.6186, "step": 1373 }, { "epoch": 0.1833466773418735, "grad_norm": 1.3376025342187408, "learning_rate": 1.8792137886650207e-05, "loss": 1.6242, "step": 1374 }, { "epoch": 0.18348011742727516, "grad_norm": 1.060000822920305, "learning_rate": 1.8790077991465833e-05, "loss": 1.6694, "step": 1375 }, { "epoch": 0.1836135575126768, "grad_norm": 1.1946549594821052, "learning_rate": 1.8788016454394972e-05, "loss": 1.7262, "step": 1376 }, { "epoch": 0.18374699759807847, "grad_norm": 1.1956085836847756, "learning_rate": 1.8785953275822694e-05, "loss": 1.6692, "step": 1377 }, { "epoch": 0.1838804376834801, "grad_norm": 1.398886222221775, "learning_rate": 1.8783888456134373e-05, "loss": 1.6827, "step": 1378 }, { "epoch": 0.18401387776888178, "grad_norm": 1.0879716586164903, "learning_rate": 1.8781821995715694e-05, "loss": 1.6799, "step": 1379 }, { "epoch": 0.18414731785428343, "grad_norm": 1.0905252446482103, "learning_rate": 1.8779753894952658e-05, "loss": 1.6059, "step": 1380 }, { "epoch": 0.18428075793968507, "grad_norm": 11.800697000575934, "learning_rate": 1.877768415423155e-05, "loss": 1.7131, "step": 1381 }, { "epoch": 0.18441419802508674, "grad_norm": 1.4694858089280596, "learning_rate": 1.8775612773938984e-05, "loss": 1.6846, "step": 1382 }, { "epoch": 0.18454763811048838, "grad_norm": 1.4328576424428283, "learning_rate": 1.877353975446186e-05, "loss": 1.6188, "step": 1383 }, { "epoch": 0.18468107819589005, "grad_norm": 1.3413019372811694, "learning_rate": 1.8771465096187404e-05, "loss": 1.6835, "step": 1384 }, { "epoch": 0.1848145182812917, "grad_norm": 1.1291865647681716, "learning_rate": 1.8769388799503138e-05, "loss": 1.6709, "step": 1385 }, { "epoch": 0.18494795836669337, "grad_norm": 1.2530526469172218, "learning_rate": 1.876731086479688e-05, "loss": 1.6951, "step": 1386 }, { "epoch": 0.185081398452095, "grad_norm": 1.1547439851135595, "learning_rate": 1.876523129245678e-05, "loss": 1.6539, "step": 1387 }, { "epoch": 0.18521483853749665, "grad_norm": 1.083610486699219, "learning_rate": 1.8763150082871264e-05, "loss": 1.6559, "step": 1388 }, { "epoch": 0.18534827862289832, "grad_norm": 1.3139745218385874, "learning_rate": 1.876106723642909e-05, "loss": 1.6658, "step": 1389 }, { "epoch": 0.18548171870829996, "grad_norm": 1.2845524278681613, "learning_rate": 1.8758982753519302e-05, "loss": 1.6613, "step": 1390 }, { "epoch": 0.18561515879370163, "grad_norm": 1.3886592309613566, "learning_rate": 1.875689663453126e-05, "loss": 1.6973, "step": 1391 }, { "epoch": 0.18574859887910328, "grad_norm": 1.2127883048481998, "learning_rate": 1.8754808879854628e-05, "loss": 1.6643, "step": 1392 }, { "epoch": 0.18588203896450495, "grad_norm": 1.1707525451232437, "learning_rate": 1.8752719489879373e-05, "loss": 1.6844, "step": 1393 }, { "epoch": 0.1860154790499066, "grad_norm": 1.2601795453565978, "learning_rate": 1.8750628464995775e-05, "loss": 1.7148, "step": 1394 }, { "epoch": 0.18614891913530823, "grad_norm": 1.1313620285698516, "learning_rate": 1.8748535805594406e-05, "loss": 1.6856, "step": 1395 }, { "epoch": 0.1862823592207099, "grad_norm": 1.2540203774883083, "learning_rate": 1.8746441512066155e-05, "loss": 1.6585, "step": 1396 }, { "epoch": 0.18641579930611155, "grad_norm": 1.218035613217223, "learning_rate": 1.8744345584802212e-05, "loss": 1.6955, "step": 1397 }, { "epoch": 0.18654923939151322, "grad_norm": 1.106574827128144, "learning_rate": 1.8742248024194073e-05, "loss": 1.6404, "step": 1398 }, { "epoch": 0.18668267947691486, "grad_norm": 1.1254058920479488, "learning_rate": 1.8740148830633534e-05, "loss": 1.6456, "step": 1399 }, { "epoch": 0.18681611956231653, "grad_norm": 1.1988842778577748, "learning_rate": 1.8738048004512707e-05, "loss": 1.6735, "step": 1400 }, { "epoch": 0.18694955964771817, "grad_norm": 1.1597783939590798, "learning_rate": 1.8735945546224e-05, "loss": 1.6463, "step": 1401 }, { "epoch": 0.18708299973311984, "grad_norm": 1.1108971920248814, "learning_rate": 1.8733841456160123e-05, "loss": 1.7286, "step": 1402 }, { "epoch": 0.18721643981852149, "grad_norm": 1.195745553995662, "learning_rate": 1.8731735734714106e-05, "loss": 1.655, "step": 1403 }, { "epoch": 0.18734987990392313, "grad_norm": 1.1437291616721488, "learning_rate": 1.8729628382279265e-05, "loss": 1.7116, "step": 1404 }, { "epoch": 0.1874833199893248, "grad_norm": 1.2582582933611897, "learning_rate": 1.872751939924923e-05, "loss": 1.6831, "step": 1405 }, { "epoch": 0.18761676007472644, "grad_norm": 1.126051643445675, "learning_rate": 1.8725408786017942e-05, "loss": 1.6487, "step": 1406 }, { "epoch": 0.1877502001601281, "grad_norm": 1.2320240819480008, "learning_rate": 1.8723296542979635e-05, "loss": 1.7051, "step": 1407 }, { "epoch": 0.18788364024552975, "grad_norm": 1.1614213897490868, "learning_rate": 1.872118267052885e-05, "loss": 1.6758, "step": 1408 }, { "epoch": 0.18801708033093142, "grad_norm": 1.2384327956894376, "learning_rate": 1.8719067169060436e-05, "loss": 1.6544, "step": 1409 }, { "epoch": 0.18815052041633307, "grad_norm": 1.4000168772794017, "learning_rate": 1.8716950038969547e-05, "loss": 1.6822, "step": 1410 }, { "epoch": 0.1882839605017347, "grad_norm": 1.1742590284506205, "learning_rate": 1.871483128065164e-05, "loss": 1.6987, "step": 1411 }, { "epoch": 0.18841740058713638, "grad_norm": 1.206314558147808, "learning_rate": 1.8712710894502465e-05, "loss": 1.6341, "step": 1412 }, { "epoch": 0.18855084067253802, "grad_norm": 1.199751013189083, "learning_rate": 1.8710588880918098e-05, "loss": 1.6946, "step": 1413 }, { "epoch": 0.1886842807579397, "grad_norm": 1.2017855345339992, "learning_rate": 1.8708465240294902e-05, "loss": 1.6726, "step": 1414 }, { "epoch": 0.18881772084334134, "grad_norm": 1.2227953100114268, "learning_rate": 1.870633997302955e-05, "loss": 1.6407, "step": 1415 }, { "epoch": 0.188951160928743, "grad_norm": 1.2964391199169882, "learning_rate": 1.8704213079519008e-05, "loss": 1.6367, "step": 1416 }, { "epoch": 0.18908460101414465, "grad_norm": 1.2207494370258463, "learning_rate": 1.8702084560160572e-05, "loss": 1.7262, "step": 1417 }, { "epoch": 0.1892180410995463, "grad_norm": 5.905607034298311, "learning_rate": 1.8699954415351813e-05, "loss": 1.5948, "step": 1418 }, { "epoch": 0.18935148118494796, "grad_norm": 1.1469661893942433, "learning_rate": 1.8697822645490625e-05, "loss": 1.6524, "step": 1419 }, { "epoch": 0.1894849212703496, "grad_norm": 1.1259620274025783, "learning_rate": 1.8695689250975193e-05, "loss": 1.6331, "step": 1420 }, { "epoch": 0.18961836135575127, "grad_norm": 1.0763567309071966, "learning_rate": 1.8693554232204014e-05, "loss": 1.7, "step": 1421 }, { "epoch": 0.18975180144115292, "grad_norm": 1.1307077011187483, "learning_rate": 1.8691417589575885e-05, "loss": 1.6599, "step": 1422 }, { "epoch": 0.1898852415265546, "grad_norm": 1.2777295336158558, "learning_rate": 1.8689279323489902e-05, "loss": 1.6505, "step": 1423 }, { "epoch": 0.19001868161195623, "grad_norm": 1.2379608805979987, "learning_rate": 1.8687139434345478e-05, "loss": 1.6474, "step": 1424 }, { "epoch": 0.19015212169735787, "grad_norm": 1.080970192917941, "learning_rate": 1.8684997922542313e-05, "loss": 1.6836, "step": 1425 }, { "epoch": 0.19028556178275954, "grad_norm": 1.0712881958144271, "learning_rate": 1.8682854788480417e-05, "loss": 1.6071, "step": 1426 }, { "epoch": 0.1904190018681612, "grad_norm": 1.26548294983667, "learning_rate": 1.8680710032560102e-05, "loss": 1.6904, "step": 1427 }, { "epoch": 0.19055244195356286, "grad_norm": 1.0469874519952864, "learning_rate": 1.867856365518199e-05, "loss": 1.6422, "step": 1428 }, { "epoch": 0.1906858820389645, "grad_norm": 1.0942346487916967, "learning_rate": 1.8676415656746996e-05, "loss": 1.6864, "step": 1429 }, { "epoch": 0.19081932212436617, "grad_norm": 1.103259467200675, "learning_rate": 1.8674266037656343e-05, "loss": 1.6855, "step": 1430 }, { "epoch": 0.1909527622097678, "grad_norm": 1.0683367237417463, "learning_rate": 1.867211479831155e-05, "loss": 1.6525, "step": 1431 }, { "epoch": 0.19108620229516948, "grad_norm": 1.1121615299096883, "learning_rate": 1.8669961939114457e-05, "loss": 1.6904, "step": 1432 }, { "epoch": 0.19121964238057113, "grad_norm": 1.1617841181260873, "learning_rate": 1.8667807460467178e-05, "loss": 1.6352, "step": 1433 }, { "epoch": 0.19135308246597277, "grad_norm": 1.084146580072923, "learning_rate": 1.8665651362772152e-05, "loss": 1.6905, "step": 1434 }, { "epoch": 0.19148652255137444, "grad_norm": 1.1047653403081839, "learning_rate": 1.8663493646432116e-05, "loss": 1.7152, "step": 1435 }, { "epoch": 0.19161996263677608, "grad_norm": 1.0986414741598525, "learning_rate": 1.8661334311850104e-05, "loss": 1.6874, "step": 1436 }, { "epoch": 0.19175340272217775, "grad_norm": 1.3689551704009346, "learning_rate": 1.8659173359429455e-05, "loss": 1.6464, "step": 1437 }, { "epoch": 0.1918868428075794, "grad_norm": 1.2060959441159418, "learning_rate": 1.8657010789573812e-05, "loss": 1.7476, "step": 1438 }, { "epoch": 0.19202028289298106, "grad_norm": 1.094136327662655, "learning_rate": 1.8654846602687112e-05, "loss": 1.6862, "step": 1439 }, { "epoch": 0.1921537229783827, "grad_norm": 1.1003792674170316, "learning_rate": 1.865268079917361e-05, "loss": 1.7001, "step": 1440 }, { "epoch": 0.19228716306378435, "grad_norm": 1.397260975809977, "learning_rate": 1.8650513379437854e-05, "loss": 1.6541, "step": 1441 }, { "epoch": 0.19242060314918602, "grad_norm": 1.1317586552025098, "learning_rate": 1.864834434388468e-05, "loss": 1.654, "step": 1442 }, { "epoch": 0.19255404323458766, "grad_norm": 1.2801188417922258, "learning_rate": 1.8646173692919254e-05, "loss": 1.72, "step": 1443 }, { "epoch": 0.19268748331998933, "grad_norm": 1.1044353428107876, "learning_rate": 1.8644001426947017e-05, "loss": 1.6633, "step": 1444 }, { "epoch": 0.19282092340539098, "grad_norm": 1.7385377002254025, "learning_rate": 1.8641827546373736e-05, "loss": 1.6934, "step": 1445 }, { "epoch": 0.19295436349079265, "grad_norm": 1.1251578855313504, "learning_rate": 1.8639652051605454e-05, "loss": 1.6855, "step": 1446 }, { "epoch": 0.1930878035761943, "grad_norm": 1.0823798317472224, "learning_rate": 1.8637474943048538e-05, "loss": 1.6851, "step": 1447 }, { "epoch": 0.19322124366159593, "grad_norm": 1.2414924209927352, "learning_rate": 1.8635296221109643e-05, "loss": 1.6342, "step": 1448 }, { "epoch": 0.1933546837469976, "grad_norm": 1.081073074757343, "learning_rate": 1.8633115886195733e-05, "loss": 1.6326, "step": 1449 }, { "epoch": 0.19348812383239924, "grad_norm": 1.3276067715392756, "learning_rate": 1.863093393871406e-05, "loss": 1.682, "step": 1450 }, { "epoch": 0.19362156391780092, "grad_norm": 1.0512206782809177, "learning_rate": 1.86287503790722e-05, "loss": 1.6339, "step": 1451 }, { "epoch": 0.19375500400320256, "grad_norm": 1.1118643544696887, "learning_rate": 1.8626565207678014e-05, "loss": 1.6412, "step": 1452 }, { "epoch": 0.19388844408860423, "grad_norm": 1.1881349320604198, "learning_rate": 1.862437842493966e-05, "loss": 1.6535, "step": 1453 }, { "epoch": 0.19402188417400587, "grad_norm": 1.0650617509844336, "learning_rate": 1.8622190031265608e-05, "loss": 1.6608, "step": 1454 }, { "epoch": 0.1941553242594075, "grad_norm": 1.2022764202107716, "learning_rate": 1.8620000027064625e-05, "loss": 1.6818, "step": 1455 }, { "epoch": 0.19428876434480918, "grad_norm": 1.0945586894168828, "learning_rate": 1.861780841274578e-05, "loss": 1.6518, "step": 1456 }, { "epoch": 0.19442220443021083, "grad_norm": 1.181691965256765, "learning_rate": 1.8615615188718442e-05, "loss": 1.6526, "step": 1457 }, { "epoch": 0.1945556445156125, "grad_norm": 1.21873002721777, "learning_rate": 1.861342035539228e-05, "loss": 1.6685, "step": 1458 }, { "epoch": 0.19468908460101414, "grad_norm": 1.1888383727594938, "learning_rate": 1.8611223913177264e-05, "loss": 1.6939, "step": 1459 }, { "epoch": 0.1948225246864158, "grad_norm": 1.1092188067858701, "learning_rate": 1.8609025862483658e-05, "loss": 1.7001, "step": 1460 }, { "epoch": 0.19495596477181745, "grad_norm": 1.4312758954493943, "learning_rate": 1.8606826203722043e-05, "loss": 1.6334, "step": 1461 }, { "epoch": 0.1950894048572191, "grad_norm": 1.2022595063751074, "learning_rate": 1.8604624937303285e-05, "loss": 1.6806, "step": 1462 }, { "epoch": 0.19522284494262077, "grad_norm": 1.0171052837252916, "learning_rate": 1.8602422063638554e-05, "loss": 1.6488, "step": 1463 }, { "epoch": 0.1953562850280224, "grad_norm": 1.3510050011424308, "learning_rate": 1.860021758313933e-05, "loss": 1.6833, "step": 1464 }, { "epoch": 0.19548972511342408, "grad_norm": 1.257684566145725, "learning_rate": 1.859801149621737e-05, "loss": 1.6647, "step": 1465 }, { "epoch": 0.19562316519882572, "grad_norm": 1.1217536119317055, "learning_rate": 1.8595803803284757e-05, "loss": 1.6662, "step": 1466 }, { "epoch": 0.1957566052842274, "grad_norm": 1.1668773044915945, "learning_rate": 1.8593594504753863e-05, "loss": 1.711, "step": 1467 }, { "epoch": 0.19589004536962903, "grad_norm": 1.1958090413202758, "learning_rate": 1.8591383601037357e-05, "loss": 1.709, "step": 1468 }, { "epoch": 0.1960234854550307, "grad_norm": 1.3120713864562594, "learning_rate": 1.8589171092548208e-05, "loss": 1.6493, "step": 1469 }, { "epoch": 0.19615692554043235, "grad_norm": 1.1266277557758897, "learning_rate": 1.8586956979699692e-05, "loss": 1.6786, "step": 1470 }, { "epoch": 0.196290365625834, "grad_norm": 2.464652796809538, "learning_rate": 1.858474126290538e-05, "loss": 1.6538, "step": 1471 }, { "epoch": 0.19642380571123566, "grad_norm": 1.358175538034263, "learning_rate": 1.8582523942579138e-05, "loss": 1.6298, "step": 1472 }, { "epoch": 0.1965572457966373, "grad_norm": 1.1218515326836789, "learning_rate": 1.8580305019135144e-05, "loss": 1.6807, "step": 1473 }, { "epoch": 0.19669068588203897, "grad_norm": 1.1314598132036782, "learning_rate": 1.8578084492987857e-05, "loss": 1.6085, "step": 1474 }, { "epoch": 0.19682412596744062, "grad_norm": 1.0806632798443008, "learning_rate": 1.8575862364552056e-05, "loss": 1.6943, "step": 1475 }, { "epoch": 0.1969575660528423, "grad_norm": 1.0456494280144883, "learning_rate": 1.8573638634242806e-05, "loss": 1.6425, "step": 1476 }, { "epoch": 0.19709100613824393, "grad_norm": 1.4643633002109615, "learning_rate": 1.857141330247547e-05, "loss": 1.717, "step": 1477 }, { "epoch": 0.19722444622364557, "grad_norm": 1.3907399472017639, "learning_rate": 1.856918636966572e-05, "loss": 1.6938, "step": 1478 }, { "epoch": 0.19735788630904724, "grad_norm": 1.074837673820265, "learning_rate": 1.856695783622952e-05, "loss": 1.6547, "step": 1479 }, { "epoch": 0.19749132639444889, "grad_norm": 1.1065228121324435, "learning_rate": 1.8564727702583132e-05, "loss": 1.6468, "step": 1480 }, { "epoch": 0.19762476647985056, "grad_norm": 1.1931788828028584, "learning_rate": 1.8562495969143125e-05, "loss": 1.658, "step": 1481 }, { "epoch": 0.1977582065652522, "grad_norm": 1.213866762163482, "learning_rate": 1.8560262636326358e-05, "loss": 1.631, "step": 1482 }, { "epoch": 0.19789164665065387, "grad_norm": 1.099947809835569, "learning_rate": 1.8558027704549993e-05, "loss": 1.6351, "step": 1483 }, { "epoch": 0.1980250867360555, "grad_norm": 1.1088893853131983, "learning_rate": 1.8555791174231494e-05, "loss": 1.6723, "step": 1484 }, { "epoch": 0.19815852682145715, "grad_norm": 1.1011144028074373, "learning_rate": 1.855355304578861e-05, "loss": 1.707, "step": 1485 }, { "epoch": 0.19829196690685882, "grad_norm": 1.3818385213484043, "learning_rate": 1.85513133196394e-05, "loss": 1.652, "step": 1486 }, { "epoch": 0.19842540699226047, "grad_norm": 1.1229293638925342, "learning_rate": 1.8549071996202225e-05, "loss": 1.656, "step": 1487 }, { "epoch": 0.19855884707766214, "grad_norm": 1.080075119649869, "learning_rate": 1.8546829075895737e-05, "loss": 1.7099, "step": 1488 }, { "epoch": 0.19869228716306378, "grad_norm": 1.2852126608442598, "learning_rate": 1.8544584559138888e-05, "loss": 1.7062, "step": 1489 }, { "epoch": 0.19882572724846545, "grad_norm": 1.14699198879905, "learning_rate": 1.8542338446350924e-05, "loss": 1.6963, "step": 1490 }, { "epoch": 0.1989591673338671, "grad_norm": 1.0866913345021045, "learning_rate": 1.85400907379514e-05, "loss": 1.692, "step": 1491 }, { "epoch": 0.19909260741926874, "grad_norm": 1.1345754144326983, "learning_rate": 1.853784143436016e-05, "loss": 1.6911, "step": 1492 }, { "epoch": 0.1992260475046704, "grad_norm": 1.0859201677475647, "learning_rate": 1.853559053599734e-05, "loss": 1.7136, "step": 1493 }, { "epoch": 0.19935948759007205, "grad_norm": 1.114793968801779, "learning_rate": 1.8533338043283392e-05, "loss": 1.6262, "step": 1494 }, { "epoch": 0.19949292767547372, "grad_norm": 1.0646044544826536, "learning_rate": 1.8531083956639054e-05, "loss": 1.6374, "step": 1495 }, { "epoch": 0.19962636776087536, "grad_norm": 1.0943515393210304, "learning_rate": 1.8528828276485365e-05, "loss": 1.6565, "step": 1496 }, { "epoch": 0.19975980784627703, "grad_norm": 1.153219518432924, "learning_rate": 1.852657100324366e-05, "loss": 1.6468, "step": 1497 }, { "epoch": 0.19989324793167867, "grad_norm": 1.3658364399948308, "learning_rate": 1.8524312137335565e-05, "loss": 1.6546, "step": 1498 }, { "epoch": 0.20002668801708032, "grad_norm": 1.0830597310395975, "learning_rate": 1.8522051679183017e-05, "loss": 1.6464, "step": 1499 }, { "epoch": 0.200160128102482, "grad_norm": 1.0897256468244192, "learning_rate": 1.8519789629208245e-05, "loss": 1.6335, "step": 1500 }, { "epoch": 0.20029356818788363, "grad_norm": 1.1152974632184582, "learning_rate": 1.851752598783377e-05, "loss": 1.6539, "step": 1501 }, { "epoch": 0.2004270082732853, "grad_norm": 1.070943336830069, "learning_rate": 1.8515260755482414e-05, "loss": 1.663, "step": 1502 }, { "epoch": 0.20056044835868694, "grad_norm": 1.1301005431558968, "learning_rate": 1.85129939325773e-05, "loss": 1.6386, "step": 1503 }, { "epoch": 0.20069388844408861, "grad_norm": 1.4376814743628707, "learning_rate": 1.8510725519541845e-05, "loss": 1.6777, "step": 1504 }, { "epoch": 0.20082732852949026, "grad_norm": 1.1571460766809745, "learning_rate": 1.8508455516799762e-05, "loss": 1.6458, "step": 1505 }, { "epoch": 0.20096076861489193, "grad_norm": 1.526537352021282, "learning_rate": 1.850618392477506e-05, "loss": 1.6673, "step": 1506 }, { "epoch": 0.20109420870029357, "grad_norm": 1.171428989010074, "learning_rate": 1.8503910743892044e-05, "loss": 1.6548, "step": 1507 }, { "epoch": 0.2012276487856952, "grad_norm": 1.0654992362564706, "learning_rate": 1.8501635974575328e-05, "loss": 1.6856, "step": 1508 }, { "epoch": 0.20136108887109688, "grad_norm": 1.2869430405209437, "learning_rate": 1.84993596172498e-05, "loss": 1.6663, "step": 1509 }, { "epoch": 0.20149452895649853, "grad_norm": 1.3280855964940679, "learning_rate": 1.849708167234067e-05, "loss": 1.6049, "step": 1510 }, { "epoch": 0.2016279690419002, "grad_norm": 1.2170509648131613, "learning_rate": 1.8494802140273423e-05, "loss": 1.6645, "step": 1511 }, { "epoch": 0.20176140912730184, "grad_norm": 1.1693660112296544, "learning_rate": 1.8492521021473854e-05, "loss": 1.6448, "step": 1512 }, { "epoch": 0.2018948492127035, "grad_norm": 1.1495960407236363, "learning_rate": 1.8490238316368048e-05, "loss": 1.6717, "step": 1513 }, { "epoch": 0.20202828929810515, "grad_norm": 1.0607525124108388, "learning_rate": 1.848795402538239e-05, "loss": 1.685, "step": 1514 }, { "epoch": 0.2021617293835068, "grad_norm": 1.1588629194975635, "learning_rate": 1.848566814894356e-05, "loss": 1.6733, "step": 1515 }, { "epoch": 0.20229516946890846, "grad_norm": 1.0477044055470504, "learning_rate": 1.8483380687478535e-05, "loss": 1.6364, "step": 1516 }, { "epoch": 0.2024286095543101, "grad_norm": 1.3539461437967024, "learning_rate": 1.848109164141458e-05, "loss": 1.6329, "step": 1517 }, { "epoch": 0.20256204963971178, "grad_norm": 1.316562445670924, "learning_rate": 1.847880101117927e-05, "loss": 1.7161, "step": 1518 }, { "epoch": 0.20269548972511342, "grad_norm": 1.1325223006869773, "learning_rate": 1.8476508797200464e-05, "loss": 1.6976, "step": 1519 }, { "epoch": 0.2028289298105151, "grad_norm": 1.1271240323882026, "learning_rate": 1.847421499990633e-05, "loss": 1.6724, "step": 1520 }, { "epoch": 0.20296236989591673, "grad_norm": 1.1553839820996277, "learning_rate": 1.8471919619725307e-05, "loss": 1.6322, "step": 1521 }, { "epoch": 0.20309580998131838, "grad_norm": 1.1027336658945248, "learning_rate": 1.8469622657086158e-05, "loss": 1.6447, "step": 1522 }, { "epoch": 0.20322925006672005, "grad_norm": 1.183883565807101, "learning_rate": 1.846732411241793e-05, "loss": 1.6999, "step": 1523 }, { "epoch": 0.2033626901521217, "grad_norm": 1.1642046420989085, "learning_rate": 1.846502398614996e-05, "loss": 1.7149, "step": 1524 }, { "epoch": 0.20349613023752336, "grad_norm": 1.0755063096784014, "learning_rate": 1.8462722278711887e-05, "loss": 1.5943, "step": 1525 }, { "epoch": 0.203629570322925, "grad_norm": 1.1419970032576714, "learning_rate": 1.8460418990533647e-05, "loss": 1.7073, "step": 1526 }, { "epoch": 0.20376301040832667, "grad_norm": 1.302454031541249, "learning_rate": 1.845811412204546e-05, "loss": 1.6758, "step": 1527 }, { "epoch": 0.20389645049372832, "grad_norm": 1.1668477615464077, "learning_rate": 1.845580767367786e-05, "loss": 1.694, "step": 1528 }, { "epoch": 0.20402989057912996, "grad_norm": 1.1087107394603948, "learning_rate": 1.8453499645861656e-05, "loss": 1.691, "step": 1529 }, { "epoch": 0.20416333066453163, "grad_norm": 1.0455825217353827, "learning_rate": 1.8451190039027964e-05, "loss": 1.6684, "step": 1530 }, { "epoch": 0.20429677074993327, "grad_norm": 1.219445656841785, "learning_rate": 1.8448878853608195e-05, "loss": 1.7155, "step": 1531 }, { "epoch": 0.20443021083533494, "grad_norm": 1.335960910259822, "learning_rate": 1.8446566090034053e-05, "loss": 1.7326, "step": 1532 }, { "epoch": 0.20456365092073658, "grad_norm": 1.271132783595969, "learning_rate": 1.8444251748737526e-05, "loss": 1.6686, "step": 1533 }, { "epoch": 0.20469709100613825, "grad_norm": 1.0514626639526814, "learning_rate": 1.844193583015092e-05, "loss": 1.6366, "step": 1534 }, { "epoch": 0.2048305310915399, "grad_norm": 1.0454900741987578, "learning_rate": 1.8439618334706812e-05, "loss": 1.6237, "step": 1535 }, { "epoch": 0.20496397117694154, "grad_norm": 1.073823751577798, "learning_rate": 1.8437299262838092e-05, "loss": 1.6749, "step": 1536 }, { "epoch": 0.2050974112623432, "grad_norm": 1.2798723643570793, "learning_rate": 1.843497861497793e-05, "loss": 1.6411, "step": 1537 }, { "epoch": 0.20523085134774485, "grad_norm": 1.1135912832633508, "learning_rate": 1.8432656391559794e-05, "loss": 1.63, "step": 1538 }, { "epoch": 0.20536429143314652, "grad_norm": 1.0600593382175174, "learning_rate": 1.8430332593017463e-05, "loss": 1.6932, "step": 1539 }, { "epoch": 0.20549773151854817, "grad_norm": 1.243565535810705, "learning_rate": 1.8428007219784978e-05, "loss": 1.6734, "step": 1540 }, { "epoch": 0.20563117160394984, "grad_norm": 1.1107029670183746, "learning_rate": 1.8425680272296706e-05, "loss": 1.6954, "step": 1541 }, { "epoch": 0.20576461168935148, "grad_norm": 1.1611386236626087, "learning_rate": 1.8423351750987285e-05, "loss": 1.6676, "step": 1542 }, { "epoch": 0.20589805177475315, "grad_norm": 1.0922966177873632, "learning_rate": 1.842102165629166e-05, "loss": 1.6751, "step": 1543 }, { "epoch": 0.2060314918601548, "grad_norm": 1.1728266705222763, "learning_rate": 1.8418689988645072e-05, "loss": 1.6662, "step": 1544 }, { "epoch": 0.20616493194555643, "grad_norm": 1.281999128501973, "learning_rate": 1.841635674848304e-05, "loss": 1.6855, "step": 1545 }, { "epoch": 0.2062983720309581, "grad_norm": 1.1950922902237124, "learning_rate": 1.8414021936241394e-05, "loss": 1.6543, "step": 1546 }, { "epoch": 0.20643181211635975, "grad_norm": 1.1228445170626977, "learning_rate": 1.8411685552356244e-05, "loss": 1.6518, "step": 1547 }, { "epoch": 0.20656525220176142, "grad_norm": 1.2061203106698866, "learning_rate": 1.840934759726401e-05, "loss": 1.6469, "step": 1548 }, { "epoch": 0.20669869228716306, "grad_norm": 1.1059566734641804, "learning_rate": 1.840700807140138e-05, "loss": 1.6259, "step": 1549 }, { "epoch": 0.20683213237256473, "grad_norm": 1.1551811421928873, "learning_rate": 1.8404666975205367e-05, "loss": 1.6333, "step": 1550 }, { "epoch": 0.20696557245796637, "grad_norm": 1.1528270603987256, "learning_rate": 1.840232430911325e-05, "loss": 1.7053, "step": 1551 }, { "epoch": 0.20709901254336802, "grad_norm": 1.0615331899156355, "learning_rate": 1.8399980073562615e-05, "loss": 1.6093, "step": 1552 }, { "epoch": 0.2072324526287697, "grad_norm": 1.2018501248996023, "learning_rate": 1.8397634268991343e-05, "loss": 1.6831, "step": 1553 }, { "epoch": 0.20736589271417133, "grad_norm": 1.129260522162621, "learning_rate": 1.8395286895837598e-05, "loss": 1.6725, "step": 1554 }, { "epoch": 0.207499332799573, "grad_norm": 1.2931052464354142, "learning_rate": 1.8392937954539845e-05, "loss": 1.6261, "step": 1555 }, { "epoch": 0.20763277288497464, "grad_norm": 1.158760216852382, "learning_rate": 1.8390587445536844e-05, "loss": 1.6595, "step": 1556 }, { "epoch": 0.2077662129703763, "grad_norm": 1.16264835183651, "learning_rate": 1.8388235369267632e-05, "loss": 1.6538, "step": 1557 }, { "epoch": 0.20789965305577796, "grad_norm": 1.1390190977362333, "learning_rate": 1.838588172617156e-05, "loss": 1.6174, "step": 1558 }, { "epoch": 0.2080330931411796, "grad_norm": 1.0281490566989566, "learning_rate": 1.8383526516688257e-05, "loss": 1.6256, "step": 1559 }, { "epoch": 0.20816653322658127, "grad_norm": 1.3060131907983772, "learning_rate": 1.8381169741257653e-05, "loss": 1.6876, "step": 1560 }, { "epoch": 0.2082999733119829, "grad_norm": 1.0851729874752505, "learning_rate": 1.8378811400319962e-05, "loss": 1.6448, "step": 1561 }, { "epoch": 0.20843341339738458, "grad_norm": 1.1173227297527768, "learning_rate": 1.8376451494315697e-05, "loss": 1.6397, "step": 1562 }, { "epoch": 0.20856685348278622, "grad_norm": 1.1250418404408145, "learning_rate": 1.8374090023685664e-05, "loss": 1.7092, "step": 1563 }, { "epoch": 0.2087002935681879, "grad_norm": 1.1313533969061775, "learning_rate": 1.8371726988870956e-05, "loss": 1.6625, "step": 1564 }, { "epoch": 0.20883373365358954, "grad_norm": 1.1578578516746643, "learning_rate": 1.8369362390312965e-05, "loss": 1.6829, "step": 1565 }, { "epoch": 0.20896717373899118, "grad_norm": 1.0419177694211474, "learning_rate": 1.8366996228453366e-05, "loss": 1.6711, "step": 1566 }, { "epoch": 0.20910061382439285, "grad_norm": 1.110808392383394, "learning_rate": 1.8364628503734133e-05, "loss": 1.6313, "step": 1567 }, { "epoch": 0.2092340539097945, "grad_norm": 1.0553327445995773, "learning_rate": 1.8362259216597532e-05, "loss": 1.6769, "step": 1568 }, { "epoch": 0.20936749399519616, "grad_norm": 1.1605648342199986, "learning_rate": 1.8359888367486116e-05, "loss": 1.6423, "step": 1569 }, { "epoch": 0.2095009340805978, "grad_norm": 1.3528845887090233, "learning_rate": 1.8357515956842735e-05, "loss": 1.6641, "step": 1570 }, { "epoch": 0.20963437416599948, "grad_norm": 1.0381834374569774, "learning_rate": 1.8355141985110524e-05, "loss": 1.6645, "step": 1571 }, { "epoch": 0.20976781425140112, "grad_norm": 1.1268678857914454, "learning_rate": 1.8352766452732922e-05, "loss": 1.6252, "step": 1572 }, { "epoch": 0.20990125433680276, "grad_norm": 1.07645387262839, "learning_rate": 1.8350389360153645e-05, "loss": 1.6234, "step": 1573 }, { "epoch": 0.21003469442220443, "grad_norm": 1.1601184907461661, "learning_rate": 1.8348010707816704e-05, "loss": 1.6625, "step": 1574 }, { "epoch": 0.21016813450760607, "grad_norm": 1.1598070223911565, "learning_rate": 1.8345630496166413e-05, "loss": 1.6072, "step": 1575 }, { "epoch": 0.21030157459300775, "grad_norm": 1.0478963594602182, "learning_rate": 1.834324872564736e-05, "loss": 1.6713, "step": 1576 }, { "epoch": 0.2104350146784094, "grad_norm": 1.1864899396092392, "learning_rate": 1.8340865396704442e-05, "loss": 1.6994, "step": 1577 }, { "epoch": 0.21056845476381106, "grad_norm": 1.2082345518638513, "learning_rate": 1.833848050978283e-05, "loss": 1.6325, "step": 1578 }, { "epoch": 0.2107018948492127, "grad_norm": 1.0705831509203583, "learning_rate": 1.8336094065327994e-05, "loss": 1.6455, "step": 1579 }, { "epoch": 0.21083533493461437, "grad_norm": 1.0714080597971536, "learning_rate": 1.8333706063785698e-05, "loss": 1.6415, "step": 1580 }, { "epoch": 0.21096877502001601, "grad_norm": 1.4855074921163713, "learning_rate": 1.8331316505601995e-05, "loss": 1.6629, "step": 1581 }, { "epoch": 0.21110221510541766, "grad_norm": 1.0376544838658421, "learning_rate": 1.832892539122322e-05, "loss": 1.6218, "step": 1582 }, { "epoch": 0.21123565519081933, "grad_norm": 1.0937886957684666, "learning_rate": 1.832653272109601e-05, "loss": 1.6446, "step": 1583 }, { "epoch": 0.21136909527622097, "grad_norm": 0.9935625042710026, "learning_rate": 1.8324138495667293e-05, "loss": 1.5577, "step": 1584 }, { "epoch": 0.21150253536162264, "grad_norm": 1.0414931198212998, "learning_rate": 1.8321742715384277e-05, "loss": 1.7021, "step": 1585 }, { "epoch": 0.21163597544702428, "grad_norm": 1.0888067291778296, "learning_rate": 1.8319345380694464e-05, "loss": 1.6425, "step": 1586 }, { "epoch": 0.21176941553242595, "grad_norm": 1.1889065641556036, "learning_rate": 1.831694649204566e-05, "loss": 1.6804, "step": 1587 }, { "epoch": 0.2119028556178276, "grad_norm": 1.2027320435869355, "learning_rate": 1.831454604988594e-05, "loss": 1.6518, "step": 1588 }, { "epoch": 0.21203629570322924, "grad_norm": 1.1633662750060685, "learning_rate": 1.8312144054663682e-05, "loss": 1.6471, "step": 1589 }, { "epoch": 0.2121697357886309, "grad_norm": 1.62970080028646, "learning_rate": 1.8309740506827552e-05, "loss": 1.6254, "step": 1590 }, { "epoch": 0.21230317587403255, "grad_norm": 1.0879307197813337, "learning_rate": 1.8307335406826505e-05, "loss": 1.6575, "step": 1591 }, { "epoch": 0.21243661595943422, "grad_norm": 1.269123197918674, "learning_rate": 1.830492875510979e-05, "loss": 1.6364, "step": 1592 }, { "epoch": 0.21257005604483586, "grad_norm": 1.0373985047234797, "learning_rate": 1.8302520552126935e-05, "loss": 1.6571, "step": 1593 }, { "epoch": 0.21270349613023753, "grad_norm": 1.0542865827845302, "learning_rate": 1.830011079832777e-05, "loss": 1.6695, "step": 1594 }, { "epoch": 0.21283693621563918, "grad_norm": 1.0402881306171436, "learning_rate": 1.8297699494162406e-05, "loss": 1.6562, "step": 1595 }, { "epoch": 0.21297037630104082, "grad_norm": 1.1294113481127595, "learning_rate": 1.8295286640081252e-05, "loss": 1.6583, "step": 1596 }, { "epoch": 0.2131038163864425, "grad_norm": 1.1895908197616782, "learning_rate": 1.8292872236534996e-05, "loss": 1.6487, "step": 1597 }, { "epoch": 0.21323725647184413, "grad_norm": 1.0379115957592848, "learning_rate": 1.829045628397463e-05, "loss": 1.6322, "step": 1598 }, { "epoch": 0.2133706965572458, "grad_norm": 1.222610128560367, "learning_rate": 1.8288038782851417e-05, "loss": 1.6657, "step": 1599 }, { "epoch": 0.21350413664264745, "grad_norm": 1.130257915264502, "learning_rate": 1.828561973361692e-05, "loss": 1.6602, "step": 1600 }, { "epoch": 0.21363757672804912, "grad_norm": 1.126575657802467, "learning_rate": 1.8283199136722995e-05, "loss": 1.6918, "step": 1601 }, { "epoch": 0.21377101681345076, "grad_norm": 1.3153982419349068, "learning_rate": 1.828077699262178e-05, "loss": 1.6693, "step": 1602 }, { "epoch": 0.2139044568988524, "grad_norm": 1.262078471065165, "learning_rate": 1.8278353301765702e-05, "loss": 1.6933, "step": 1603 }, { "epoch": 0.21403789698425407, "grad_norm": 1.1173878288546095, "learning_rate": 1.8275928064607478e-05, "loss": 1.5781, "step": 1604 }, { "epoch": 0.21417133706965572, "grad_norm": 2.31232037905848, "learning_rate": 1.8273501281600118e-05, "loss": 1.66, "step": 1605 }, { "epoch": 0.21430477715505739, "grad_norm": 1.1674269299374407, "learning_rate": 1.8271072953196915e-05, "loss": 1.7023, "step": 1606 }, { "epoch": 0.21443821724045903, "grad_norm": 1.1591034820385062, "learning_rate": 1.8268643079851457e-05, "loss": 1.706, "step": 1607 }, { "epoch": 0.2145716573258607, "grad_norm": 1.2450171637095553, "learning_rate": 1.826621166201761e-05, "loss": 1.6084, "step": 1608 }, { "epoch": 0.21470509741126234, "grad_norm": 1.1159214690409671, "learning_rate": 1.826377870014954e-05, "loss": 1.6834, "step": 1609 }, { "epoch": 0.214838537496664, "grad_norm": 1.099652892199378, "learning_rate": 1.8261344194701695e-05, "loss": 1.6745, "step": 1610 }, { "epoch": 0.21497197758206565, "grad_norm": 1.1872360367368382, "learning_rate": 1.8258908146128814e-05, "loss": 1.6332, "step": 1611 }, { "epoch": 0.2151054176674673, "grad_norm": 1.0948685127381836, "learning_rate": 1.8256470554885922e-05, "loss": 1.631, "step": 1612 }, { "epoch": 0.21523885775286897, "grad_norm": 1.1075853907414162, "learning_rate": 1.8254031421428334e-05, "loss": 1.7092, "step": 1613 }, { "epoch": 0.2153722978382706, "grad_norm": 1.1518564227271244, "learning_rate": 1.8251590746211655e-05, "loss": 1.7042, "step": 1614 }, { "epoch": 0.21550573792367228, "grad_norm": 1.1038926708394592, "learning_rate": 1.8249148529691768e-05, "loss": 1.6277, "step": 1615 }, { "epoch": 0.21563917800907392, "grad_norm": 1.1063850648205573, "learning_rate": 1.824670477232486e-05, "loss": 1.6506, "step": 1616 }, { "epoch": 0.2157726180944756, "grad_norm": 1.0603627332408834, "learning_rate": 1.824425947456739e-05, "loss": 1.6636, "step": 1617 }, { "epoch": 0.21590605817987724, "grad_norm": 1.2546873356938104, "learning_rate": 1.8241812636876113e-05, "loss": 1.6793, "step": 1618 }, { "epoch": 0.21603949826527888, "grad_norm": 1.181239728860882, "learning_rate": 1.8239364259708076e-05, "loss": 1.6664, "step": 1619 }, { "epoch": 0.21617293835068055, "grad_norm": 1.180838315282554, "learning_rate": 1.8236914343520598e-05, "loss": 1.6998, "step": 1620 }, { "epoch": 0.2163063784360822, "grad_norm": 1.1308148567631164, "learning_rate": 1.8234462888771308e-05, "loss": 1.7154, "step": 1621 }, { "epoch": 0.21643981852148386, "grad_norm": 1.1326281559924998, "learning_rate": 1.82320098959181e-05, "loss": 1.6256, "step": 1622 }, { "epoch": 0.2165732586068855, "grad_norm": 1.0865149229961686, "learning_rate": 1.822955536541917e-05, "loss": 1.6228, "step": 1623 }, { "epoch": 0.21670669869228718, "grad_norm": 1.2423233898451607, "learning_rate": 1.8227099297732997e-05, "loss": 1.648, "step": 1624 }, { "epoch": 0.21684013877768882, "grad_norm": 1.1502479506643792, "learning_rate": 1.8224641693318338e-05, "loss": 1.6578, "step": 1625 }, { "epoch": 0.21697357886309046, "grad_norm": 1.440887259770447, "learning_rate": 1.8222182552634257e-05, "loss": 1.6826, "step": 1626 }, { "epoch": 0.21710701894849213, "grad_norm": 1.121187218751373, "learning_rate": 1.8219721876140084e-05, "loss": 1.6264, "step": 1627 }, { "epoch": 0.21724045903389377, "grad_norm": 4.7724358351335905, "learning_rate": 1.8217259664295452e-05, "loss": 1.6344, "step": 1628 }, { "epoch": 0.21737389911929544, "grad_norm": 1.2326061740840186, "learning_rate": 1.821479591756027e-05, "loss": 1.6977, "step": 1629 }, { "epoch": 0.2175073392046971, "grad_norm": 1.1773999953923238, "learning_rate": 1.8212330636394743e-05, "loss": 1.6613, "step": 1630 }, { "epoch": 0.21764077929009876, "grad_norm": 1.0745824244836635, "learning_rate": 1.820986382125935e-05, "loss": 1.6746, "step": 1631 }, { "epoch": 0.2177742193755004, "grad_norm": 1.236415394795963, "learning_rate": 1.820739547261487e-05, "loss": 1.6253, "step": 1632 }, { "epoch": 0.21790765946090204, "grad_norm": 1.1065194060629115, "learning_rate": 1.8204925590922362e-05, "loss": 1.604, "step": 1633 }, { "epoch": 0.2180410995463037, "grad_norm": 1.0940043580451333, "learning_rate": 1.820245417664317e-05, "loss": 1.6449, "step": 1634 }, { "epoch": 0.21817453963170536, "grad_norm": 1.1869832191003882, "learning_rate": 1.8199981230238924e-05, "loss": 1.6705, "step": 1635 }, { "epoch": 0.21830797971710703, "grad_norm": 1.1189817227152579, "learning_rate": 1.8197506752171545e-05, "loss": 1.6218, "step": 1636 }, { "epoch": 0.21844141980250867, "grad_norm": 1.1473359068259987, "learning_rate": 1.8195030742903236e-05, "loss": 1.6832, "step": 1637 }, { "epoch": 0.21857485988791034, "grad_norm": 1.2362438437725367, "learning_rate": 1.819255320289649e-05, "loss": 1.6471, "step": 1638 }, { "epoch": 0.21870829997331198, "grad_norm": 1.1116111042127557, "learning_rate": 1.8190074132614083e-05, "loss": 1.6397, "step": 1639 }, { "epoch": 0.21884174005871362, "grad_norm": 1.0931240118741188, "learning_rate": 1.818759353251907e-05, "loss": 1.7028, "step": 1640 }, { "epoch": 0.2189751801441153, "grad_norm": 1.1134694456669874, "learning_rate": 1.818511140307481e-05, "loss": 1.7135, "step": 1641 }, { "epoch": 0.21910862022951694, "grad_norm": 1.1225439283708083, "learning_rate": 1.8182627744744928e-05, "loss": 1.6863, "step": 1642 }, { "epoch": 0.2192420603149186, "grad_norm": 1.1226470273814733, "learning_rate": 1.8180142557993346e-05, "loss": 1.6654, "step": 1643 }, { "epoch": 0.21937550040032025, "grad_norm": 12.702656527886313, "learning_rate": 1.8177655843284268e-05, "loss": 1.6785, "step": 1644 }, { "epoch": 0.21950894048572192, "grad_norm": 1.1646728669056514, "learning_rate": 1.8175167601082187e-05, "loss": 1.6678, "step": 1645 }, { "epoch": 0.21964238057112356, "grad_norm": 1.1370827214131438, "learning_rate": 1.8172677831851874e-05, "loss": 1.6678, "step": 1646 }, { "epoch": 0.21977582065652523, "grad_norm": 1.0805021664243795, "learning_rate": 1.8170186536058393e-05, "loss": 1.6445, "step": 1647 }, { "epoch": 0.21990926074192688, "grad_norm": 1.068690271627419, "learning_rate": 1.8167693714167088e-05, "loss": 1.6944, "step": 1648 }, { "epoch": 0.22004270082732852, "grad_norm": 1.2765183302069458, "learning_rate": 1.816519936664359e-05, "loss": 1.6567, "step": 1649 }, { "epoch": 0.2201761409127302, "grad_norm": 1.1215664466938502, "learning_rate": 1.8162703493953812e-05, "loss": 1.6553, "step": 1650 }, { "epoch": 0.22030958099813183, "grad_norm": 1.057949579182957, "learning_rate": 1.8160206096563957e-05, "loss": 1.6541, "step": 1651 }, { "epoch": 0.2204430210835335, "grad_norm": 1.3213535546275152, "learning_rate": 1.8157707174940516e-05, "loss": 1.6824, "step": 1652 }, { "epoch": 0.22057646116893515, "grad_norm": 1.1669724021565158, "learning_rate": 1.8155206729550248e-05, "loss": 1.6525, "step": 1653 }, { "epoch": 0.22070990125433682, "grad_norm": 1.3110532840171654, "learning_rate": 1.8152704760860217e-05, "loss": 1.678, "step": 1654 }, { "epoch": 0.22084334133973846, "grad_norm": 1.1672424586728913, "learning_rate": 1.815020126933776e-05, "loss": 1.6696, "step": 1655 }, { "epoch": 0.2209767814251401, "grad_norm": 1.3061093334442921, "learning_rate": 1.8147696255450495e-05, "loss": 1.6654, "step": 1656 }, { "epoch": 0.22111022151054177, "grad_norm": 1.0702194871522392, "learning_rate": 1.8145189719666335e-05, "loss": 1.7042, "step": 1657 }, { "epoch": 0.2212436615959434, "grad_norm": 1.2742123644262122, "learning_rate": 1.8142681662453473e-05, "loss": 1.6924, "step": 1658 }, { "epoch": 0.22137710168134508, "grad_norm": 1.090573827396432, "learning_rate": 1.8140172084280384e-05, "loss": 1.7083, "step": 1659 }, { "epoch": 0.22151054176674673, "grad_norm": 1.0477634184103604, "learning_rate": 1.813766098561583e-05, "loss": 1.6293, "step": 1660 }, { "epoch": 0.2216439818521484, "grad_norm": 1.1079746682957217, "learning_rate": 1.8135148366928855e-05, "loss": 1.6785, "step": 1661 }, { "epoch": 0.22177742193755004, "grad_norm": 1.1056303875592357, "learning_rate": 1.8132634228688785e-05, "loss": 1.5936, "step": 1662 }, { "epoch": 0.22191086202295168, "grad_norm": 1.0872810931934485, "learning_rate": 1.8130118571365235e-05, "loss": 1.6854, "step": 1663 }, { "epoch": 0.22204430210835335, "grad_norm": 1.180998755115771, "learning_rate": 1.8127601395428104e-05, "loss": 1.5947, "step": 1664 }, { "epoch": 0.222177742193755, "grad_norm": 1.076365662325189, "learning_rate": 1.8125082701347564e-05, "loss": 1.6827, "step": 1665 }, { "epoch": 0.22231118227915667, "grad_norm": 1.286948665055857, "learning_rate": 1.8122562489594084e-05, "loss": 1.684, "step": 1666 }, { "epoch": 0.2224446223645583, "grad_norm": 1.1501953176983266, "learning_rate": 1.812004076063841e-05, "loss": 1.6775, "step": 1667 }, { "epoch": 0.22257806244995998, "grad_norm": 1.1268435794579326, "learning_rate": 1.8117517514951573e-05, "loss": 1.7057, "step": 1668 }, { "epoch": 0.22271150253536162, "grad_norm": 1.1620900257687603, "learning_rate": 1.8114992753004887e-05, "loss": 1.7043, "step": 1669 }, { "epoch": 0.22284494262076326, "grad_norm": 1.1338951625822513, "learning_rate": 1.8112466475269946e-05, "loss": 1.6707, "step": 1670 }, { "epoch": 0.22297838270616493, "grad_norm": 1.242098630419242, "learning_rate": 1.8109938682218633e-05, "loss": 1.6795, "step": 1671 }, { "epoch": 0.22311182279156658, "grad_norm": 1.1679404879363304, "learning_rate": 1.8107409374323107e-05, "loss": 1.6397, "step": 1672 }, { "epoch": 0.22324526287696825, "grad_norm": 1.176042698983641, "learning_rate": 1.8104878552055817e-05, "loss": 1.6591, "step": 1673 }, { "epoch": 0.2233787029623699, "grad_norm": 1.312264219479212, "learning_rate": 1.810234621588949e-05, "loss": 1.6572, "step": 1674 }, { "epoch": 0.22351214304777156, "grad_norm": 1.1369656057368374, "learning_rate": 1.809981236629714e-05, "loss": 1.7209, "step": 1675 }, { "epoch": 0.2236455831331732, "grad_norm": 1.2223686585190343, "learning_rate": 1.809727700375206e-05, "loss": 1.6587, "step": 1676 }, { "epoch": 0.22377902321857485, "grad_norm": 1.0737175824141045, "learning_rate": 1.8094740128727823e-05, "loss": 1.6123, "step": 1677 }, { "epoch": 0.22391246330397652, "grad_norm": 1.0487717384337922, "learning_rate": 1.8092201741698297e-05, "loss": 1.603, "step": 1678 }, { "epoch": 0.22404590338937816, "grad_norm": 1.0154503615440715, "learning_rate": 1.8089661843137616e-05, "loss": 1.6558, "step": 1679 }, { "epoch": 0.22417934347477983, "grad_norm": 1.0612689044451018, "learning_rate": 1.8087120433520207e-05, "loss": 1.6634, "step": 1680 }, { "epoch": 0.22431278356018147, "grad_norm": 1.0783694345863775, "learning_rate": 1.8084577513320777e-05, "loss": 1.6973, "step": 1681 }, { "epoch": 0.22444622364558314, "grad_norm": 1.4059689465691732, "learning_rate": 1.8082033083014315e-05, "loss": 1.6692, "step": 1682 }, { "epoch": 0.22457966373098479, "grad_norm": 1.0174187245468838, "learning_rate": 1.807948714307609e-05, "loss": 1.6965, "step": 1683 }, { "epoch": 0.22471310381638646, "grad_norm": 1.2508321455102909, "learning_rate": 1.8076939693981652e-05, "loss": 1.6924, "step": 1684 }, { "epoch": 0.2248465439017881, "grad_norm": 1.240543795410627, "learning_rate": 1.807439073620684e-05, "loss": 1.6354, "step": 1685 }, { "epoch": 0.22497998398718974, "grad_norm": 1.5809329681051474, "learning_rate": 1.807184027022777e-05, "loss": 1.6596, "step": 1686 }, { "epoch": 0.2251134240725914, "grad_norm": 1.1469707941171723, "learning_rate": 1.806928829652084e-05, "loss": 1.7269, "step": 1687 }, { "epoch": 0.22524686415799305, "grad_norm": 1.0758984989477276, "learning_rate": 1.8066734815562726e-05, "loss": 1.7109, "step": 1688 }, { "epoch": 0.22538030424339472, "grad_norm": 1.299428663534772, "learning_rate": 1.8064179827830393e-05, "loss": 1.6851, "step": 1689 }, { "epoch": 0.22551374432879637, "grad_norm": 1.043487684350109, "learning_rate": 1.8061623333801085e-05, "loss": 1.6506, "step": 1690 }, { "epoch": 0.22564718441419804, "grad_norm": 1.1237380692317867, "learning_rate": 1.805906533395232e-05, "loss": 1.7187, "step": 1691 }, { "epoch": 0.22578062449959968, "grad_norm": 1.148118031537372, "learning_rate": 1.8056505828761904e-05, "loss": 1.6431, "step": 1692 }, { "epoch": 0.22591406458500132, "grad_norm": 1.2374398808076512, "learning_rate": 1.8053944818707932e-05, "loss": 1.6198, "step": 1693 }, { "epoch": 0.226047504670403, "grad_norm": 1.0903178987983715, "learning_rate": 1.8051382304268762e-05, "loss": 1.6473, "step": 1694 }, { "epoch": 0.22618094475580464, "grad_norm": 1.1205806647882046, "learning_rate": 1.8048818285923047e-05, "loss": 1.6681, "step": 1695 }, { "epoch": 0.2263143848412063, "grad_norm": 1.150148641527194, "learning_rate": 1.8046252764149715e-05, "loss": 1.6546, "step": 1696 }, { "epoch": 0.22644782492660795, "grad_norm": 1.1208283417789737, "learning_rate": 1.8043685739427976e-05, "loss": 1.6365, "step": 1697 }, { "epoch": 0.22658126501200962, "grad_norm": 1.9233641539386115, "learning_rate": 1.804111721223732e-05, "loss": 1.6183, "step": 1698 }, { "epoch": 0.22671470509741126, "grad_norm": 1.1665972627988115, "learning_rate": 1.8038547183057524e-05, "loss": 1.6547, "step": 1699 }, { "epoch": 0.2268481451828129, "grad_norm": 1.0590609907387925, "learning_rate": 1.8035975652368635e-05, "loss": 1.6456, "step": 1700 }, { "epoch": 0.22698158526821458, "grad_norm": 1.0642643971203603, "learning_rate": 1.8033402620650986e-05, "loss": 1.7051, "step": 1701 }, { "epoch": 0.22711502535361622, "grad_norm": 1.364226790660327, "learning_rate": 1.803082808838519e-05, "loss": 1.6045, "step": 1702 }, { "epoch": 0.2272484654390179, "grad_norm": 1.1029813182150021, "learning_rate": 1.8028252056052148e-05, "loss": 1.682, "step": 1703 }, { "epoch": 0.22738190552441953, "grad_norm": 1.1377601146917016, "learning_rate": 1.8025674524133022e-05, "loss": 1.7134, "step": 1704 }, { "epoch": 0.2275153456098212, "grad_norm": 1.0987200088336204, "learning_rate": 1.8023095493109273e-05, "loss": 1.6286, "step": 1705 }, { "epoch": 0.22764878569522284, "grad_norm": 1.0648434423713318, "learning_rate": 1.802051496346263e-05, "loss": 1.6687, "step": 1706 }, { "epoch": 0.2277822257806245, "grad_norm": 1.1058007563564674, "learning_rate": 1.8017932935675108e-05, "loss": 1.6537, "step": 1707 }, { "epoch": 0.22791566586602616, "grad_norm": 1.1956539306049252, "learning_rate": 1.8015349410229004e-05, "loss": 1.6273, "step": 1708 }, { "epoch": 0.2280491059514278, "grad_norm": 1.097833315258532, "learning_rate": 1.8012764387606887e-05, "loss": 1.6283, "step": 1709 }, { "epoch": 0.22818254603682947, "grad_norm": 1.032052358075353, "learning_rate": 1.801017786829161e-05, "loss": 1.6069, "step": 1710 }, { "epoch": 0.2283159861222311, "grad_norm": 1.0217813572157182, "learning_rate": 1.800758985276631e-05, "loss": 1.6272, "step": 1711 }, { "epoch": 0.22844942620763278, "grad_norm": 1.1320615088828117, "learning_rate": 1.8005000341514392e-05, "loss": 1.6433, "step": 1712 }, { "epoch": 0.22858286629303443, "grad_norm": 1.197563433343801, "learning_rate": 1.8002409335019552e-05, "loss": 1.6391, "step": 1713 }, { "epoch": 0.22871630637843607, "grad_norm": 1.0876681965716914, "learning_rate": 1.799981683376576e-05, "loss": 1.6864, "step": 1714 }, { "epoch": 0.22884974646383774, "grad_norm": 1.0354932640447034, "learning_rate": 1.7997222838237264e-05, "loss": 1.6708, "step": 1715 }, { "epoch": 0.22898318654923938, "grad_norm": 2.734360348457865, "learning_rate": 1.7994627348918593e-05, "loss": 1.6584, "step": 1716 }, { "epoch": 0.22911662663464105, "grad_norm": 1.1394911063346953, "learning_rate": 1.7992030366294555e-05, "loss": 1.5976, "step": 1717 }, { "epoch": 0.2292500667200427, "grad_norm": 1.1531653881087183, "learning_rate": 1.798943189085024e-05, "loss": 1.705, "step": 1718 }, { "epoch": 0.22938350680544436, "grad_norm": 1.21758738022185, "learning_rate": 1.7986831923071007e-05, "loss": 1.6886, "step": 1719 }, { "epoch": 0.229516946890846, "grad_norm": 1.0529056168835895, "learning_rate": 1.7984230463442505e-05, "loss": 1.6642, "step": 1720 }, { "epoch": 0.22965038697624768, "grad_norm": 1.2125231023635614, "learning_rate": 1.798162751245066e-05, "loss": 1.6182, "step": 1721 }, { "epoch": 0.22978382706164932, "grad_norm": 1.1933594423844636, "learning_rate": 1.7979023070581664e-05, "loss": 1.609, "step": 1722 }, { "epoch": 0.22991726714705096, "grad_norm": 1.0700311747867306, "learning_rate": 1.7976417138322008e-05, "loss": 1.6493, "step": 1723 }, { "epoch": 0.23005070723245263, "grad_norm": 1.2988519356273156, "learning_rate": 1.7973809716158444e-05, "loss": 1.6935, "step": 1724 }, { "epoch": 0.23018414731785428, "grad_norm": 1.2630821844608227, "learning_rate": 1.7971200804578008e-05, "loss": 1.6877, "step": 1725 }, { "epoch": 0.23031758740325595, "grad_norm": 1.0760773885115775, "learning_rate": 1.796859040406802e-05, "loss": 1.6648, "step": 1726 }, { "epoch": 0.2304510274886576, "grad_norm": 1.0659107902842577, "learning_rate": 1.796597851511607e-05, "loss": 1.6765, "step": 1727 }, { "epoch": 0.23058446757405926, "grad_norm": 1.0640564215933035, "learning_rate": 1.796336513821003e-05, "loss": 1.6519, "step": 1728 }, { "epoch": 0.2307179076594609, "grad_norm": 1.044766417697617, "learning_rate": 1.7960750273838046e-05, "loss": 1.7201, "step": 1729 }, { "epoch": 0.23085134774486255, "grad_norm": 1.3620145766553122, "learning_rate": 1.795813392248855e-05, "loss": 1.6141, "step": 1730 }, { "epoch": 0.23098478783026422, "grad_norm": 1.0460951962418277, "learning_rate": 1.7955516084650245e-05, "loss": 1.6431, "step": 1731 }, { "epoch": 0.23111822791566586, "grad_norm": 1.0561334650845267, "learning_rate": 1.7952896760812106e-05, "loss": 1.6234, "step": 1732 }, { "epoch": 0.23125166800106753, "grad_norm": 1.1267791689507525, "learning_rate": 1.7950275951463404e-05, "loss": 1.6478, "step": 1733 }, { "epoch": 0.23138510808646917, "grad_norm": 1.1014450865423768, "learning_rate": 1.7947653657093672e-05, "loss": 1.6141, "step": 1734 }, { "epoch": 0.23151854817187084, "grad_norm": 1.089444038239352, "learning_rate": 1.7945029878192722e-05, "loss": 1.6511, "step": 1735 }, { "epoch": 0.23165198825727248, "grad_norm": 1.1341511933956758, "learning_rate": 1.7942404615250652e-05, "loss": 1.6546, "step": 1736 }, { "epoch": 0.23178542834267413, "grad_norm": 1.1162152522317423, "learning_rate": 1.7939777868757825e-05, "loss": 1.6413, "step": 1737 }, { "epoch": 0.2319188684280758, "grad_norm": 1.0922713243144162, "learning_rate": 1.7937149639204888e-05, "loss": 1.6675, "step": 1738 }, { "epoch": 0.23205230851347744, "grad_norm": 1.1196061707852998, "learning_rate": 1.7934519927082773e-05, "loss": 1.6891, "step": 1739 }, { "epoch": 0.2321857485988791, "grad_norm": 1.0373630063131618, "learning_rate": 1.7931888732882665e-05, "loss": 1.6752, "step": 1740 }, { "epoch": 0.23231918868428075, "grad_norm": 1.1715453355241499, "learning_rate": 1.7929256057096056e-05, "loss": 1.6895, "step": 1741 }, { "epoch": 0.23245262876968242, "grad_norm": 1.1754651897827133, "learning_rate": 1.792662190021469e-05, "loss": 1.6807, "step": 1742 }, { "epoch": 0.23258606885508407, "grad_norm": 1.089104477056595, "learning_rate": 1.79239862627306e-05, "loss": 1.6262, "step": 1743 }, { "epoch": 0.2327195089404857, "grad_norm": 1.047704791420425, "learning_rate": 1.79213491451361e-05, "loss": 1.674, "step": 1744 }, { "epoch": 0.23285294902588738, "grad_norm": 1.0478486718373898, "learning_rate": 1.7918710547923763e-05, "loss": 1.6386, "step": 1745 }, { "epoch": 0.23298638911128902, "grad_norm": 1.0374153527662222, "learning_rate": 1.7916070471586455e-05, "loss": 1.6535, "step": 1746 }, { "epoch": 0.2331198291966907, "grad_norm": 1.1734013425233318, "learning_rate": 1.7913428916617307e-05, "loss": 1.6911, "step": 1747 }, { "epoch": 0.23325326928209233, "grad_norm": 1.0828478675607198, "learning_rate": 1.791078588350974e-05, "loss": 1.6512, "step": 1748 }, { "epoch": 0.233386709367494, "grad_norm": 1.114480950469954, "learning_rate": 1.7908141372757436e-05, "loss": 1.6491, "step": 1749 }, { "epoch": 0.23352014945289565, "grad_norm": 1.1436925883505036, "learning_rate": 1.790549538485436e-05, "loss": 1.6854, "step": 1750 }, { "epoch": 0.23365358953829732, "grad_norm": 10.687146402666658, "learning_rate": 1.7902847920294754e-05, "loss": 1.621, "step": 1751 }, { "epoch": 0.23378702962369896, "grad_norm": 1.3707877310751082, "learning_rate": 1.7900198979573135e-05, "loss": 1.6246, "step": 1752 }, { "epoch": 0.2339204697091006, "grad_norm": 1.1877922731508994, "learning_rate": 1.7897548563184296e-05, "loss": 1.6376, "step": 1753 }, { "epoch": 0.23405390979450227, "grad_norm": 1.36255029255512, "learning_rate": 1.7894896671623296e-05, "loss": 1.6354, "step": 1754 }, { "epoch": 0.23418734987990392, "grad_norm": 1.4838375564796251, "learning_rate": 1.7892243305385487e-05, "loss": 1.6155, "step": 1755 }, { "epoch": 0.2343207899653056, "grad_norm": 1.1622308653549671, "learning_rate": 1.7889588464966488e-05, "loss": 1.6665, "step": 1756 }, { "epoch": 0.23445423005070723, "grad_norm": 1.336261221880211, "learning_rate": 1.788693215086219e-05, "loss": 1.6409, "step": 1757 }, { "epoch": 0.2345876701361089, "grad_norm": 1.2428755384173737, "learning_rate": 1.788427436356876e-05, "loss": 1.6523, "step": 1758 }, { "epoch": 0.23472111022151054, "grad_norm": 1.1734745642911637, "learning_rate": 1.7881615103582642e-05, "loss": 1.6823, "step": 1759 }, { "epoch": 0.23485455030691219, "grad_norm": 1.359796714523189, "learning_rate": 1.7878954371400563e-05, "loss": 1.6766, "step": 1760 }, { "epoch": 0.23498799039231386, "grad_norm": 1.1543938773322537, "learning_rate": 1.787629216751951e-05, "loss": 1.6808, "step": 1761 }, { "epoch": 0.2351214304777155, "grad_norm": 1.2231767398910764, "learning_rate": 1.7873628492436757e-05, "loss": 1.6149, "step": 1762 }, { "epoch": 0.23525487056311717, "grad_norm": 1.3991330507525954, "learning_rate": 1.7870963346649847e-05, "loss": 1.6663, "step": 1763 }, { "epoch": 0.2353883106485188, "grad_norm": 1.2111519435232232, "learning_rate": 1.7868296730656596e-05, "loss": 1.6832, "step": 1764 }, { "epoch": 0.23552175073392048, "grad_norm": 1.1858904981204466, "learning_rate": 1.7865628644955098e-05, "loss": 1.6314, "step": 1765 }, { "epoch": 0.23565519081932212, "grad_norm": 1.2029794734253174, "learning_rate": 1.786295909004373e-05, "loss": 1.5582, "step": 1766 }, { "epoch": 0.23578863090472377, "grad_norm": 1.319501116381306, "learning_rate": 1.7860288066421118e-05, "loss": 1.6763, "step": 1767 }, { "epoch": 0.23592207099012544, "grad_norm": 1.4049089786241191, "learning_rate": 1.7857615574586193e-05, "loss": 1.6361, "step": 1768 }, { "epoch": 0.23605551107552708, "grad_norm": 1.1918917655522046, "learning_rate": 1.785494161503814e-05, "loss": 1.6137, "step": 1769 }, { "epoch": 0.23618895116092875, "grad_norm": 1.144754220344139, "learning_rate": 1.7852266188276422e-05, "loss": 1.6175, "step": 1770 }, { "epoch": 0.2363223912463304, "grad_norm": 1.3381800606475023, "learning_rate": 1.7849589294800787e-05, "loss": 1.6195, "step": 1771 }, { "epoch": 0.23645583133173206, "grad_norm": 1.0458078567231872, "learning_rate": 1.7846910935111242e-05, "loss": 1.6848, "step": 1772 }, { "epoch": 0.2365892714171337, "grad_norm": 1.4105954493444146, "learning_rate": 1.7844231109708072e-05, "loss": 1.654, "step": 1773 }, { "epoch": 0.23672271150253535, "grad_norm": 1.289151142605529, "learning_rate": 1.784154981909184e-05, "loss": 1.6533, "step": 1774 }, { "epoch": 0.23685615158793702, "grad_norm": 1.2707742721373032, "learning_rate": 1.7838867063763383e-05, "loss": 1.581, "step": 1775 }, { "epoch": 0.23698959167333866, "grad_norm": 1.2053249887223572, "learning_rate": 1.7836182844223808e-05, "loss": 1.6464, "step": 1776 }, { "epoch": 0.23712303175874033, "grad_norm": 1.06210419581067, "learning_rate": 1.7833497160974495e-05, "loss": 1.5838, "step": 1777 }, { "epoch": 0.23725647184414198, "grad_norm": 1.3047023677050178, "learning_rate": 1.78308100145171e-05, "loss": 1.5983, "step": 1778 }, { "epoch": 0.23738991192954365, "grad_norm": 1.29218147582307, "learning_rate": 1.7828121405353553e-05, "loss": 1.6862, "step": 1779 }, { "epoch": 0.2375233520149453, "grad_norm": 1.0466696307900745, "learning_rate": 1.7825431333986052e-05, "loss": 1.6623, "step": 1780 }, { "epoch": 0.23765679210034693, "grad_norm": 1.2349477939262232, "learning_rate": 1.7822739800917073e-05, "loss": 1.5709, "step": 1781 }, { "epoch": 0.2377902321857486, "grad_norm": 1.2147084949319535, "learning_rate": 1.782004680664937e-05, "loss": 1.6726, "step": 1782 }, { "epoch": 0.23792367227115024, "grad_norm": 1.0232408573291623, "learning_rate": 1.7817352351685954e-05, "loss": 1.6278, "step": 1783 }, { "epoch": 0.23805711235655191, "grad_norm": 1.13584849589059, "learning_rate": 1.781465643653012e-05, "loss": 1.7017, "step": 1784 }, { "epoch": 0.23819055244195356, "grad_norm": 1.0754152565333037, "learning_rate": 1.7811959061685438e-05, "loss": 1.6089, "step": 1785 }, { "epoch": 0.23832399252735523, "grad_norm": 1.1362601818532363, "learning_rate": 1.7809260227655747e-05, "loss": 1.6161, "step": 1786 }, { "epoch": 0.23845743261275687, "grad_norm": 1.0336376244273875, "learning_rate": 1.7806559934945156e-05, "loss": 1.6126, "step": 1787 }, { "epoch": 0.23859087269815854, "grad_norm": 1.2284171891499982, "learning_rate": 1.7803858184058053e-05, "loss": 1.609, "step": 1788 }, { "epoch": 0.23872431278356018, "grad_norm": 1.0993616922411698, "learning_rate": 1.780115497549909e-05, "loss": 1.6893, "step": 1789 }, { "epoch": 0.23885775286896183, "grad_norm": 1.0345469608413957, "learning_rate": 1.7798450309773195e-05, "loss": 1.6798, "step": 1790 }, { "epoch": 0.2389911929543635, "grad_norm": 1.2117978955967876, "learning_rate": 1.7795744187385575e-05, "loss": 1.6246, "step": 1791 }, { "epoch": 0.23912463303976514, "grad_norm": 1.0463913434816907, "learning_rate": 1.7793036608841694e-05, "loss": 1.6595, "step": 1792 }, { "epoch": 0.2392580731251668, "grad_norm": 1.1218163300477884, "learning_rate": 1.7790327574647306e-05, "loss": 1.6288, "step": 1793 }, { "epoch": 0.23939151321056845, "grad_norm": 1.3920149154127004, "learning_rate": 1.778761708530842e-05, "loss": 1.638, "step": 1794 }, { "epoch": 0.23952495329597012, "grad_norm": 1.0453245062677134, "learning_rate": 1.7784905141331334e-05, "loss": 1.7159, "step": 1795 }, { "epoch": 0.23965839338137176, "grad_norm": 1.0905984393131543, "learning_rate": 1.7782191743222594e-05, "loss": 1.6959, "step": 1796 }, { "epoch": 0.2397918334667734, "grad_norm": 1.0576313468931025, "learning_rate": 1.7779476891489044e-05, "loss": 1.6626, "step": 1797 }, { "epoch": 0.23992527355217508, "grad_norm": 1.1004207621683646, "learning_rate": 1.777676058663778e-05, "loss": 1.6204, "step": 1798 }, { "epoch": 0.24005871363757672, "grad_norm": 1.1873198699247158, "learning_rate": 1.7774042829176186e-05, "loss": 1.7395, "step": 1799 }, { "epoch": 0.2401921537229784, "grad_norm": 1.0687609878005364, "learning_rate": 1.77713236196119e-05, "loss": 1.6988, "step": 1800 }, { "epoch": 0.24032559380838003, "grad_norm": 1.0679027457183934, "learning_rate": 1.7768602958452837e-05, "loss": 1.6671, "step": 1801 }, { "epoch": 0.2404590338937817, "grad_norm": 1.0417473997306737, "learning_rate": 1.7765880846207193e-05, "loss": 1.631, "step": 1802 }, { "epoch": 0.24059247397918335, "grad_norm": 1.119803585337448, "learning_rate": 1.7763157283383426e-05, "loss": 1.6908, "step": 1803 }, { "epoch": 0.240725914064585, "grad_norm": 1.0143233262923002, "learning_rate": 1.7760432270490266e-05, "loss": 1.6342, "step": 1804 }, { "epoch": 0.24085935414998666, "grad_norm": 1.2812766744363617, "learning_rate": 1.7757705808036714e-05, "loss": 1.6356, "step": 1805 }, { "epoch": 0.2409927942353883, "grad_norm": 1.0926304516768464, "learning_rate": 1.775497789653204e-05, "loss": 1.6898, "step": 1806 }, { "epoch": 0.24112623432078997, "grad_norm": 1.013517851069127, "learning_rate": 1.7752248536485787e-05, "loss": 1.6464, "step": 1807 }, { "epoch": 0.24125967440619162, "grad_norm": 1.0833079031525588, "learning_rate": 1.7749517728407775e-05, "loss": 1.6374, "step": 1808 }, { "epoch": 0.24139311449159329, "grad_norm": 1.031007153005283, "learning_rate": 1.774678547280808e-05, "loss": 1.6838, "step": 1809 }, { "epoch": 0.24152655457699493, "grad_norm": 1.1069793411968527, "learning_rate": 1.7744051770197058e-05, "loss": 1.6641, "step": 1810 }, { "epoch": 0.24165999466239657, "grad_norm": 1.143899658709366, "learning_rate": 1.7741316621085336e-05, "loss": 1.7275, "step": 1811 }, { "epoch": 0.24179343474779824, "grad_norm": 1.0641576337141183, "learning_rate": 1.7738580025983808e-05, "loss": 1.6649, "step": 1812 }, { "epoch": 0.24192687483319988, "grad_norm": 1.2238048171560199, "learning_rate": 1.773584198540364e-05, "loss": 1.6442, "step": 1813 }, { "epoch": 0.24206031491860155, "grad_norm": 1.2197399885035782, "learning_rate": 1.773310249985626e-05, "loss": 1.648, "step": 1814 }, { "epoch": 0.2421937550040032, "grad_norm": 1.0167788681079188, "learning_rate": 1.773036156985338e-05, "loss": 1.6676, "step": 1815 }, { "epoch": 0.24232719508940487, "grad_norm": 1.2114816489016487, "learning_rate": 1.7727619195906972e-05, "loss": 1.6639, "step": 1816 }, { "epoch": 0.2424606351748065, "grad_norm": 8.428467351350283, "learning_rate": 1.7724875378529282e-05, "loss": 1.6576, "step": 1817 }, { "epoch": 0.24259407526020815, "grad_norm": 1.0465542814144335, "learning_rate": 1.7722130118232823e-05, "loss": 1.6399, "step": 1818 }, { "epoch": 0.24272751534560982, "grad_norm": 1.1988097703131617, "learning_rate": 1.7719383415530375e-05, "loss": 1.626, "step": 1819 }, { "epoch": 0.24286095543101147, "grad_norm": 1.1423995626336525, "learning_rate": 1.7716635270934996e-05, "loss": 1.6633, "step": 1820 }, { "epoch": 0.24299439551641314, "grad_norm": 1.0587665476874228, "learning_rate": 1.7713885684960002e-05, "loss": 1.6313, "step": 1821 }, { "epoch": 0.24312783560181478, "grad_norm": 1.2825083624500806, "learning_rate": 1.771113465811899e-05, "loss": 1.6683, "step": 1822 }, { "epoch": 0.24326127568721645, "grad_norm": 1.167576975923983, "learning_rate": 1.770838219092582e-05, "loss": 1.6607, "step": 1823 }, { "epoch": 0.2433947157726181, "grad_norm": 1.0672931246277235, "learning_rate": 1.7705628283894617e-05, "loss": 1.6341, "step": 1824 }, { "epoch": 0.24352815585801976, "grad_norm": 1.134208618821163, "learning_rate": 1.7702872937539783e-05, "loss": 1.7071, "step": 1825 }, { "epoch": 0.2436615959434214, "grad_norm": 1.2440133024491684, "learning_rate": 1.7700116152375986e-05, "loss": 1.7007, "step": 1826 }, { "epoch": 0.24379503602882305, "grad_norm": 1.0256084895859983, "learning_rate": 1.769735792891816e-05, "loss": 1.6557, "step": 1827 }, { "epoch": 0.24392847611422472, "grad_norm": 1.063245387715446, "learning_rate": 1.769459826768151e-05, "loss": 1.6766, "step": 1828 }, { "epoch": 0.24406191619962636, "grad_norm": 1.079027783398279, "learning_rate": 1.7691837169181513e-05, "loss": 1.6567, "step": 1829 }, { "epoch": 0.24419535628502803, "grad_norm": 1.332484756061736, "learning_rate": 1.7689074633933903e-05, "loss": 1.6403, "step": 1830 }, { "epoch": 0.24432879637042967, "grad_norm": 1.114632167604677, "learning_rate": 1.7686310662454698e-05, "loss": 1.654, "step": 1831 }, { "epoch": 0.24446223645583134, "grad_norm": 1.0194057684489455, "learning_rate": 1.7683545255260173e-05, "loss": 1.6437, "step": 1832 }, { "epoch": 0.244595676541233, "grad_norm": 1.1549546103317554, "learning_rate": 1.7680778412866876e-05, "loss": 1.581, "step": 1833 }, { "epoch": 0.24472911662663463, "grad_norm": 1.2807877506947511, "learning_rate": 1.7678010135791616e-05, "loss": 1.6518, "step": 1834 }, { "epoch": 0.2448625567120363, "grad_norm": 1.246730596301354, "learning_rate": 1.7675240424551483e-05, "loss": 1.6254, "step": 1835 }, { "epoch": 0.24499599679743794, "grad_norm": 1.0865214357163941, "learning_rate": 1.7672469279663827e-05, "loss": 1.6957, "step": 1836 }, { "epoch": 0.2451294368828396, "grad_norm": 1.247069373038027, "learning_rate": 1.766969670164626e-05, "loss": 1.5999, "step": 1837 }, { "epoch": 0.24526287696824126, "grad_norm": 1.0837926981903307, "learning_rate": 1.7666922691016673e-05, "loss": 1.6915, "step": 1838 }, { "epoch": 0.24539631705364293, "grad_norm": 1.2095352189589086, "learning_rate": 1.766414724829322e-05, "loss": 1.6801, "step": 1839 }, { "epoch": 0.24552975713904457, "grad_norm": 1.0527519692262675, "learning_rate": 1.7661370373994318e-05, "loss": 1.7169, "step": 1840 }, { "epoch": 0.2456631972244462, "grad_norm": 1.06283226700497, "learning_rate": 1.765859206863866e-05, "loss": 1.653, "step": 1841 }, { "epoch": 0.24579663730984788, "grad_norm": 1.1067902633010984, "learning_rate": 1.7655812332745198e-05, "loss": 1.6609, "step": 1842 }, { "epoch": 0.24593007739524952, "grad_norm": 1.11251472312465, "learning_rate": 1.7653031166833158e-05, "loss": 1.6144, "step": 1843 }, { "epoch": 0.2460635174806512, "grad_norm": 1.074234471124788, "learning_rate": 1.765024857142203e-05, "loss": 1.6302, "step": 1844 }, { "epoch": 0.24619695756605284, "grad_norm": 1.2669376467474798, "learning_rate": 1.764746454703157e-05, "loss": 1.7182, "step": 1845 }, { "epoch": 0.2463303976514545, "grad_norm": 1.0774090725880676, "learning_rate": 1.7644679094181803e-05, "loss": 1.6356, "step": 1846 }, { "epoch": 0.24646383773685615, "grad_norm": 1.2670775121665758, "learning_rate": 1.7641892213393017e-05, "loss": 1.6567, "step": 1847 }, { "epoch": 0.2465972778222578, "grad_norm": 1.2199961678910605, "learning_rate": 1.7639103905185774e-05, "loss": 1.6689, "step": 1848 }, { "epoch": 0.24673071790765946, "grad_norm": 1.093950746093173, "learning_rate": 1.7636314170080893e-05, "loss": 1.6764, "step": 1849 }, { "epoch": 0.2468641579930611, "grad_norm": 1.3555020553608637, "learning_rate": 1.7633523008599468e-05, "loss": 1.6779, "step": 1850 }, { "epoch": 0.24699759807846278, "grad_norm": 1.2770301389385996, "learning_rate": 1.7630730421262857e-05, "loss": 1.6845, "step": 1851 }, { "epoch": 0.24713103816386442, "grad_norm": 1.04055907190113, "learning_rate": 1.7627936408592684e-05, "loss": 1.71, "step": 1852 }, { "epoch": 0.2472644782492661, "grad_norm": 1.2660383381787914, "learning_rate": 1.7625140971110834e-05, "loss": 1.6673, "step": 1853 }, { "epoch": 0.24739791833466773, "grad_norm": 1.1241950492161565, "learning_rate": 1.7622344109339468e-05, "loss": 1.6426, "step": 1854 }, { "epoch": 0.24753135842006938, "grad_norm": 1.1634703156362944, "learning_rate": 1.7619545823801008e-05, "loss": 1.6395, "step": 1855 }, { "epoch": 0.24766479850547105, "grad_norm": 1.1887592831201785, "learning_rate": 1.7616746115018136e-05, "loss": 1.6759, "step": 1856 }, { "epoch": 0.2477982385908727, "grad_norm": 1.04558208144064, "learning_rate": 1.7613944983513812e-05, "loss": 1.6663, "step": 1857 }, { "epoch": 0.24793167867627436, "grad_norm": 1.2246725069546616, "learning_rate": 1.7611142429811253e-05, "loss": 1.6571, "step": 1858 }, { "epoch": 0.248065118761676, "grad_norm": 1.064172850739474, "learning_rate": 1.7608338454433945e-05, "loss": 1.6479, "step": 1859 }, { "epoch": 0.24819855884707767, "grad_norm": 1.1389852926930035, "learning_rate": 1.7605533057905635e-05, "loss": 1.6456, "step": 1860 }, { "epoch": 0.24833199893247931, "grad_norm": 1.1395310795650293, "learning_rate": 1.7602726240750346e-05, "loss": 1.6769, "step": 1861 }, { "epoch": 0.24846543901788098, "grad_norm": 1.1049036582128695, "learning_rate": 1.7599918003492354e-05, "loss": 1.6121, "step": 1862 }, { "epoch": 0.24859887910328263, "grad_norm": 1.2866424724351742, "learning_rate": 1.7597108346656206e-05, "loss": 1.6422, "step": 1863 }, { "epoch": 0.24873231918868427, "grad_norm": 1.0354908069373776, "learning_rate": 1.7594297270766713e-05, "loss": 1.6581, "step": 1864 }, { "epoch": 0.24886575927408594, "grad_norm": 1.0312439084550453, "learning_rate": 1.7591484776348958e-05, "loss": 1.6143, "step": 1865 }, { "epoch": 0.24899919935948758, "grad_norm": 1.0200041648342764, "learning_rate": 1.7588670863928278e-05, "loss": 1.6014, "step": 1866 }, { "epoch": 0.24913263944488925, "grad_norm": 1.0618120625694842, "learning_rate": 1.758585553403028e-05, "loss": 1.6641, "step": 1867 }, { "epoch": 0.2492660795302909, "grad_norm": 1.0713518559188207, "learning_rate": 1.758303878718084e-05, "loss": 1.6472, "step": 1868 }, { "epoch": 0.24939951961569257, "grad_norm": 1.0046786864644603, "learning_rate": 1.7580220623906088e-05, "loss": 1.6321, "step": 1869 }, { "epoch": 0.2495329597010942, "grad_norm": 1.1134637775554515, "learning_rate": 1.7577401044732428e-05, "loss": 1.6474, "step": 1870 }, { "epoch": 0.24966639978649585, "grad_norm": 1.27550190258781, "learning_rate": 1.7574580050186524e-05, "loss": 1.6594, "step": 1871 }, { "epoch": 0.24979983987189752, "grad_norm": 1.0247658171892224, "learning_rate": 1.7571757640795308e-05, "loss": 1.6159, "step": 1872 }, { "epoch": 0.24993327995729916, "grad_norm": 1.0462382916284119, "learning_rate": 1.7568933817085966e-05, "loss": 1.5982, "step": 1873 }, { "epoch": 0.2500667200427008, "grad_norm": 1.0804215813368583, "learning_rate": 1.756610857958597e-05, "loss": 1.7014, "step": 1874 }, { "epoch": 0.2502001601281025, "grad_norm": 1.0738193355707433, "learning_rate": 1.756328192882303e-05, "loss": 1.6642, "step": 1875 }, { "epoch": 0.25033360021350415, "grad_norm": 1.3455199376826739, "learning_rate": 1.7560453865325143e-05, "loss": 1.6508, "step": 1876 }, { "epoch": 0.2504670402989058, "grad_norm": 1.084814244415092, "learning_rate": 1.7557624389620548e-05, "loss": 1.6794, "step": 1877 }, { "epoch": 0.25060048038430743, "grad_norm": 1.061930007641208, "learning_rate": 1.7554793502237765e-05, "loss": 1.6879, "step": 1878 }, { "epoch": 0.2507339204697091, "grad_norm": 1.0450629751248275, "learning_rate": 1.7551961203705573e-05, "loss": 1.6783, "step": 1879 }, { "epoch": 0.2508673605551108, "grad_norm": 1.0478570277539474, "learning_rate": 1.7549127494553005e-05, "loss": 1.6616, "step": 1880 }, { "epoch": 0.2510008006405124, "grad_norm": 1.055824286400764, "learning_rate": 1.7546292375309375e-05, "loss": 1.5953, "step": 1881 }, { "epoch": 0.25113424072591406, "grad_norm": 1.2242477360612551, "learning_rate": 1.7543455846504245e-05, "loss": 1.6843, "step": 1882 }, { "epoch": 0.25126768081131573, "grad_norm": 1.0389910946552412, "learning_rate": 1.754061790866745e-05, "loss": 1.6307, "step": 1883 }, { "epoch": 0.2514011208967174, "grad_norm": 1.054285947670663, "learning_rate": 1.7537778562329083e-05, "loss": 1.6369, "step": 1884 }, { "epoch": 0.251534560982119, "grad_norm": 1.2248259207278507, "learning_rate": 1.7534937808019502e-05, "loss": 1.6693, "step": 1885 }, { "epoch": 0.2516680010675207, "grad_norm": 19.66684114165284, "learning_rate": 1.7532095646269324e-05, "loss": 1.7343, "step": 1886 }, { "epoch": 0.25180144115292236, "grad_norm": 1.1148849102770775, "learning_rate": 1.7529252077609435e-05, "loss": 1.6762, "step": 1887 }, { "epoch": 0.25193488123832397, "grad_norm": 1.3680236555548535, "learning_rate": 1.7526407102570985e-05, "loss": 1.6241, "step": 1888 }, { "epoch": 0.25206832132372564, "grad_norm": 1.0884368102610955, "learning_rate": 1.7523560721685376e-05, "loss": 1.6284, "step": 1889 }, { "epoch": 0.2522017614091273, "grad_norm": 1.0802430716006264, "learning_rate": 1.7520712935484288e-05, "loss": 1.6485, "step": 1890 }, { "epoch": 0.252335201494529, "grad_norm": 1.0586064611090562, "learning_rate": 1.7517863744499645e-05, "loss": 1.5886, "step": 1891 }, { "epoch": 0.2524686415799306, "grad_norm": 1.0549630121480713, "learning_rate": 1.7515013149263654e-05, "loss": 1.6656, "step": 1892 }, { "epoch": 0.25260208166533227, "grad_norm": 1.021796454571563, "learning_rate": 1.7512161150308763e-05, "loss": 1.6442, "step": 1893 }, { "epoch": 0.25273552175073394, "grad_norm": 1.0820683485752427, "learning_rate": 1.7509307748167703e-05, "loss": 1.7135, "step": 1894 }, { "epoch": 0.25286896183613555, "grad_norm": 1.1110777101924227, "learning_rate": 1.750645294337345e-05, "loss": 1.6766, "step": 1895 }, { "epoch": 0.2530024019215372, "grad_norm": 1.380759537151671, "learning_rate": 1.750359673645925e-05, "loss": 1.6784, "step": 1896 }, { "epoch": 0.2531358420069389, "grad_norm": 1.2299813651145617, "learning_rate": 1.750073912795861e-05, "loss": 1.6679, "step": 1897 }, { "epoch": 0.25326928209234056, "grad_norm": 13.108704611195062, "learning_rate": 1.7497880118405305e-05, "loss": 1.6712, "step": 1898 }, { "epoch": 0.2534027221777422, "grad_norm": 8.088838037228543, "learning_rate": 1.7495019708333362e-05, "loss": 1.8226, "step": 1899 }, { "epoch": 0.25353616226314385, "grad_norm": 1.284583284047351, "learning_rate": 1.7492157898277066e-05, "loss": 1.6856, "step": 1900 }, { "epoch": 0.2536696023485455, "grad_norm": 1.1185025918470872, "learning_rate": 1.7489294688770976e-05, "loss": 1.6563, "step": 1901 }, { "epoch": 0.25380304243394713, "grad_norm": 1.310393482796925, "learning_rate": 1.748643008034991e-05, "loss": 1.6655, "step": 1902 }, { "epoch": 0.2539364825193488, "grad_norm": 1.2823242056261748, "learning_rate": 1.748356407354894e-05, "loss": 1.6294, "step": 1903 }, { "epoch": 0.2540699226047505, "grad_norm": 1.115456828324836, "learning_rate": 1.7480696668903405e-05, "loss": 1.6311, "step": 1904 }, { "epoch": 0.25420336269015215, "grad_norm": 1.098358515379306, "learning_rate": 1.7477827866948902e-05, "loss": 1.6474, "step": 1905 }, { "epoch": 0.25433680277555376, "grad_norm": 1.2499952987679348, "learning_rate": 1.7474957668221294e-05, "loss": 1.6127, "step": 1906 }, { "epoch": 0.25447024286095543, "grad_norm": 1.0653256471838468, "learning_rate": 1.7472086073256695e-05, "loss": 1.6237, "step": 1907 }, { "epoch": 0.2546036829463571, "grad_norm": 1.100375109746571, "learning_rate": 1.7469213082591493e-05, "loss": 1.6623, "step": 1908 }, { "epoch": 0.2547371230317587, "grad_norm": 1.0728865640963992, "learning_rate": 1.746633869676233e-05, "loss": 1.6604, "step": 1909 }, { "epoch": 0.2548705631171604, "grad_norm": 1.4592548751795749, "learning_rate": 1.7463462916306103e-05, "loss": 1.7118, "step": 1910 }, { "epoch": 0.25500400320256206, "grad_norm": 1.1932048673196494, "learning_rate": 1.7460585741759978e-05, "loss": 1.6134, "step": 1911 }, { "epoch": 0.2551374432879637, "grad_norm": 1.2140073379811964, "learning_rate": 1.7457707173661378e-05, "loss": 1.7213, "step": 1912 }, { "epoch": 0.25527088337336534, "grad_norm": 1.1053613025091942, "learning_rate": 1.7454827212547988e-05, "loss": 1.6205, "step": 1913 }, { "epoch": 0.255404323458767, "grad_norm": 1.0688175948277936, "learning_rate": 1.745194585895775e-05, "loss": 1.6845, "step": 1914 }, { "epoch": 0.2555377635441687, "grad_norm": 1.0696512783400127, "learning_rate": 1.744906311342887e-05, "loss": 1.6337, "step": 1915 }, { "epoch": 0.2556712036295703, "grad_norm": 1.0723419328954982, "learning_rate": 1.744617897649981e-05, "loss": 1.631, "step": 1916 }, { "epoch": 0.25580464371497197, "grad_norm": 1.0336966371989529, "learning_rate": 1.7443293448709297e-05, "loss": 1.6656, "step": 1917 }, { "epoch": 0.25593808380037364, "grad_norm": 1.0461578537171803, "learning_rate": 1.7440406530596312e-05, "loss": 1.6317, "step": 1918 }, { "epoch": 0.2560715238857753, "grad_norm": 1.1097399246146262, "learning_rate": 1.74375182227001e-05, "loss": 1.6383, "step": 1919 }, { "epoch": 0.2562049639711769, "grad_norm": 1.0337650567788563, "learning_rate": 1.7434628525560163e-05, "loss": 1.6528, "step": 1920 }, { "epoch": 0.2563384040565786, "grad_norm": 1.018254122534702, "learning_rate": 1.7431737439716262e-05, "loss": 1.6495, "step": 1921 }, { "epoch": 0.25647184414198027, "grad_norm": 1.0402215429993655, "learning_rate": 1.7428844965708425e-05, "loss": 1.6646, "step": 1922 }, { "epoch": 0.2566052842273819, "grad_norm": 1.2456549178005256, "learning_rate": 1.7425951104076925e-05, "loss": 1.6625, "step": 1923 }, { "epoch": 0.25673872431278355, "grad_norm": 1.1766568400761854, "learning_rate": 1.7423055855362306e-05, "loss": 1.6389, "step": 1924 }, { "epoch": 0.2568721643981852, "grad_norm": 0.9891969725565736, "learning_rate": 1.742015922010537e-05, "loss": 1.6642, "step": 1925 }, { "epoch": 0.2570056044835869, "grad_norm": 1.0679382580870513, "learning_rate": 1.7417261198847175e-05, "loss": 1.6542, "step": 1926 }, { "epoch": 0.2571390445689885, "grad_norm": 1.0468412544977608, "learning_rate": 1.7414361792129034e-05, "loss": 1.6376, "step": 1927 }, { "epoch": 0.2572724846543902, "grad_norm": 1.0350649856988523, "learning_rate": 1.7411461000492527e-05, "loss": 1.6653, "step": 1928 }, { "epoch": 0.25740592473979185, "grad_norm": 1.036385533715168, "learning_rate": 1.7408558824479485e-05, "loss": 1.6968, "step": 1929 }, { "epoch": 0.25753936482519346, "grad_norm": 1.0569592050053869, "learning_rate": 1.7405655264632007e-05, "loss": 1.6024, "step": 1930 }, { "epoch": 0.25767280491059513, "grad_norm": 1.004763067338033, "learning_rate": 1.7402750321492445e-05, "loss": 1.6594, "step": 1931 }, { "epoch": 0.2578062449959968, "grad_norm": 1.1608084994679726, "learning_rate": 1.7399843995603404e-05, "loss": 1.657, "step": 1932 }, { "epoch": 0.2579396850813985, "grad_norm": 1.1836864291351534, "learning_rate": 1.739693628750775e-05, "loss": 1.6493, "step": 1933 }, { "epoch": 0.2580731251668001, "grad_norm": 1.0463963871461142, "learning_rate": 1.739402719774862e-05, "loss": 1.6738, "step": 1934 }, { "epoch": 0.25820656525220176, "grad_norm": 1.0417140646609107, "learning_rate": 1.7391116726869395e-05, "loss": 1.6386, "step": 1935 }, { "epoch": 0.25834000533760343, "grad_norm": 1.0230471101003555, "learning_rate": 1.7388204875413716e-05, "loss": 1.6851, "step": 1936 }, { "epoch": 0.25847344542300504, "grad_norm": 1.175831444603941, "learning_rate": 1.7385291643925478e-05, "loss": 1.631, "step": 1937 }, { "epoch": 0.2586068855084067, "grad_norm": 1.0005993864292368, "learning_rate": 1.738237703294885e-05, "loss": 1.6293, "step": 1938 }, { "epoch": 0.2587403255938084, "grad_norm": 1.071233770103943, "learning_rate": 1.7379461043028242e-05, "loss": 1.6477, "step": 1939 }, { "epoch": 0.25887376567921005, "grad_norm": 1.27924501051401, "learning_rate": 1.7376543674708332e-05, "loss": 1.6601, "step": 1940 }, { "epoch": 0.25900720576461167, "grad_norm": 1.0592769595491713, "learning_rate": 1.737362492853405e-05, "loss": 1.7318, "step": 1941 }, { "epoch": 0.25914064585001334, "grad_norm": 1.0043569196961857, "learning_rate": 1.737070480505058e-05, "loss": 1.6029, "step": 1942 }, { "epoch": 0.259274085935415, "grad_norm": 1.0651608229693987, "learning_rate": 1.7367783304803373e-05, "loss": 1.6302, "step": 1943 }, { "epoch": 0.2594075260208166, "grad_norm": 1.0274924793106943, "learning_rate": 1.736486042833813e-05, "loss": 1.6225, "step": 1944 }, { "epoch": 0.2595409661062183, "grad_norm": 1.0973325495792983, "learning_rate": 1.7361936176200806e-05, "loss": 1.637, "step": 1945 }, { "epoch": 0.25967440619161997, "grad_norm": 1.1480559231420868, "learning_rate": 1.735901054893763e-05, "loss": 1.6579, "step": 1946 }, { "epoch": 0.25980784627702164, "grad_norm": 9.742353418129637, "learning_rate": 1.7356083547095065e-05, "loss": 1.7036, "step": 1947 }, { "epoch": 0.25994128636242325, "grad_norm": 1.1930642002916203, "learning_rate": 1.7353155171219845e-05, "loss": 1.686, "step": 1948 }, { "epoch": 0.2600747264478249, "grad_norm": 1.1344495102081051, "learning_rate": 1.7350225421858963e-05, "loss": 1.6927, "step": 1949 }, { "epoch": 0.2602081665332266, "grad_norm": 1.160268471890251, "learning_rate": 1.7347294299559656e-05, "loss": 1.6395, "step": 1950 }, { "epoch": 0.26034160661862826, "grad_norm": 1.0571542473634332, "learning_rate": 1.7344361804869422e-05, "loss": 1.6482, "step": 1951 }, { "epoch": 0.2604750467040299, "grad_norm": 1.0741618519583103, "learning_rate": 1.734142793833602e-05, "loss": 1.6322, "step": 1952 }, { "epoch": 0.26060848678943155, "grad_norm": 1.0810620959227508, "learning_rate": 1.733849270050747e-05, "loss": 1.6614, "step": 1953 }, { "epoch": 0.2607419268748332, "grad_norm": 1.650858745406716, "learning_rate": 1.7335556091932033e-05, "loss": 1.6594, "step": 1954 }, { "epoch": 0.26087536696023483, "grad_norm": 1.1357173618685, "learning_rate": 1.7332618113158238e-05, "loss": 1.6246, "step": 1955 }, { "epoch": 0.2610088070456365, "grad_norm": 1.228866928090115, "learning_rate": 1.7329678764734865e-05, "loss": 1.6502, "step": 1956 }, { "epoch": 0.2611422471310382, "grad_norm": 1.063500974508951, "learning_rate": 1.732673804721095e-05, "loss": 1.6553, "step": 1957 }, { "epoch": 0.26127568721643984, "grad_norm": 1.1182247677924306, "learning_rate": 1.732379596113578e-05, "loss": 1.5929, "step": 1958 }, { "epoch": 0.26140912730184146, "grad_norm": 1.0343485691283105, "learning_rate": 1.7320852507058914e-05, "loss": 1.6195, "step": 1959 }, { "epoch": 0.26154256738724313, "grad_norm": 1.1899371182298315, "learning_rate": 1.7317907685530152e-05, "loss": 1.6208, "step": 1960 }, { "epoch": 0.2616760074726448, "grad_norm": 1.2655530384071296, "learning_rate": 1.7314961497099546e-05, "loss": 1.6743, "step": 1961 }, { "epoch": 0.2618094475580464, "grad_norm": 1.1142841192035378, "learning_rate": 1.7312013942317423e-05, "loss": 1.6915, "step": 1962 }, { "epoch": 0.2619428876434481, "grad_norm": 1.2748126364938344, "learning_rate": 1.7309065021734345e-05, "loss": 1.6708, "step": 1963 }, { "epoch": 0.26207632772884976, "grad_norm": 1.0763574665626, "learning_rate": 1.7306114735901135e-05, "loss": 1.6867, "step": 1964 }, { "epoch": 0.2622097678142514, "grad_norm": 1.1840696437423681, "learning_rate": 1.7303163085368876e-05, "loss": 1.6526, "step": 1965 }, { "epoch": 0.26234320789965304, "grad_norm": 1.0215098328750423, "learning_rate": 1.7300210070688902e-05, "loss": 1.6395, "step": 1966 }, { "epoch": 0.2624766479850547, "grad_norm": 1.0619335145218514, "learning_rate": 1.7297255692412807e-05, "loss": 1.6554, "step": 1967 }, { "epoch": 0.2626100880704564, "grad_norm": 1.0512215210347395, "learning_rate": 1.7294299951092427e-05, "loss": 1.6577, "step": 1968 }, { "epoch": 0.262743528155858, "grad_norm": 1.1231704876083466, "learning_rate": 1.7291342847279864e-05, "loss": 1.61, "step": 1969 }, { "epoch": 0.26287696824125967, "grad_norm": 1.051508193824374, "learning_rate": 1.7288384381527473e-05, "loss": 1.6351, "step": 1970 }, { "epoch": 0.26301040832666134, "grad_norm": 1.0623240318595546, "learning_rate": 1.7285424554387863e-05, "loss": 1.6435, "step": 1971 }, { "epoch": 0.263143848412063, "grad_norm": 1.0134535097893844, "learning_rate": 1.7282463366413895e-05, "loss": 1.6195, "step": 1972 }, { "epoch": 0.2632772884974646, "grad_norm": 1.0944547998591982, "learning_rate": 1.7279500818158678e-05, "loss": 1.6385, "step": 1973 }, { "epoch": 0.2634107285828663, "grad_norm": 1.014436566990623, "learning_rate": 1.7276536910175596e-05, "loss": 1.5849, "step": 1974 }, { "epoch": 0.26354416866826796, "grad_norm": 1.0957069455341975, "learning_rate": 1.727357164301826e-05, "loss": 1.6463, "step": 1975 }, { "epoch": 0.2636776087536696, "grad_norm": 1.2346921385867662, "learning_rate": 1.7270605017240557e-05, "loss": 1.653, "step": 1976 }, { "epoch": 0.26381104883907125, "grad_norm": 1.0897637998274192, "learning_rate": 1.7267637033396613e-05, "loss": 1.6921, "step": 1977 }, { "epoch": 0.2639444889244729, "grad_norm": 1.415961246967308, "learning_rate": 1.7264667692040816e-05, "loss": 1.6308, "step": 1978 }, { "epoch": 0.2640779290098746, "grad_norm": 14.277629558243925, "learning_rate": 1.726169699372781e-05, "loss": 1.6807, "step": 1979 }, { "epoch": 0.2642113690952762, "grad_norm": 1.3259197439263068, "learning_rate": 1.725872493901248e-05, "loss": 1.6796, "step": 1980 }, { "epoch": 0.2643448091806779, "grad_norm": 1.4714297960193359, "learning_rate": 1.7255751528449972e-05, "loss": 1.6084, "step": 1981 }, { "epoch": 0.26447824926607955, "grad_norm": 1.3783447543972154, "learning_rate": 1.7252776762595695e-05, "loss": 1.674, "step": 1982 }, { "epoch": 0.26461168935148116, "grad_norm": 1.345089903670124, "learning_rate": 1.724980064200529e-05, "loss": 1.6719, "step": 1983 }, { "epoch": 0.26474512943688283, "grad_norm": 1.6788138951425957, "learning_rate": 1.724682316723467e-05, "loss": 1.6242, "step": 1984 }, { "epoch": 0.2648785695222845, "grad_norm": 1.057556035989643, "learning_rate": 1.724384433883999e-05, "loss": 1.6688, "step": 1985 }, { "epoch": 0.26501200960768617, "grad_norm": 1.935146990397484, "learning_rate": 1.724086415737766e-05, "loss": 1.6685, "step": 1986 }, { "epoch": 0.2651454496930878, "grad_norm": 1.4029312343106273, "learning_rate": 1.7237882623404347e-05, "loss": 1.69, "step": 1987 }, { "epoch": 0.26527888977848946, "grad_norm": 1.0640529672227876, "learning_rate": 1.7234899737476968e-05, "loss": 1.5853, "step": 1988 }, { "epoch": 0.2654123298638911, "grad_norm": 1.0733639436628284, "learning_rate": 1.7231915500152685e-05, "loss": 1.677, "step": 1989 }, { "epoch": 0.26554576994929274, "grad_norm": 1.0241005822898823, "learning_rate": 1.722892991198893e-05, "loss": 1.6374, "step": 1990 }, { "epoch": 0.2656792100346944, "grad_norm": 1.0875108906486028, "learning_rate": 1.7225942973543368e-05, "loss": 1.6445, "step": 1991 }, { "epoch": 0.2658126501200961, "grad_norm": 1.2697214356659474, "learning_rate": 1.7222954685373933e-05, "loss": 1.5832, "step": 1992 }, { "epoch": 0.26594609020549775, "grad_norm": 1.0830834504640086, "learning_rate": 1.7219965048038795e-05, "loss": 1.6888, "step": 1993 }, { "epoch": 0.26607953029089937, "grad_norm": 1.3072951866209532, "learning_rate": 1.7216974062096392e-05, "loss": 1.6289, "step": 1994 }, { "epoch": 0.26621297037630104, "grad_norm": 1.0836421243679286, "learning_rate": 1.72139817281054e-05, "loss": 1.6658, "step": 1995 }, { "epoch": 0.2663464104617027, "grad_norm": 1.8046043701192065, "learning_rate": 1.7210988046624758e-05, "loss": 1.622, "step": 1996 }, { "epoch": 0.2664798505471043, "grad_norm": 1.0933300325897486, "learning_rate": 1.720799301821365e-05, "loss": 1.6297, "step": 1997 }, { "epoch": 0.266613290632506, "grad_norm": 1.0459969375513192, "learning_rate": 1.720499664343151e-05, "loss": 1.6484, "step": 1998 }, { "epoch": 0.26674673071790767, "grad_norm": 1.0751449290453639, "learning_rate": 1.720199892283803e-05, "loss": 1.625, "step": 1999 }, { "epoch": 0.26688017080330934, "grad_norm": 1.0928368447753933, "learning_rate": 1.719899985699315e-05, "loss": 1.6806, "step": 2000 }, { "epoch": 0.26701361088871095, "grad_norm": 1.0349222592130347, "learning_rate": 1.7195999446457053e-05, "loss": 1.595, "step": 2001 }, { "epoch": 0.2671470509741126, "grad_norm": 1.063465046316729, "learning_rate": 1.7192997691790197e-05, "loss": 1.6143, "step": 2002 }, { "epoch": 0.2672804910595143, "grad_norm": 1.0284024134886187, "learning_rate": 1.7189994593553266e-05, "loss": 1.6447, "step": 2003 }, { "epoch": 0.2674139311449159, "grad_norm": 1.307165702348938, "learning_rate": 1.7186990152307203e-05, "loss": 1.6691, "step": 2004 }, { "epoch": 0.2675473712303176, "grad_norm": 1.119045388640825, "learning_rate": 1.718398436861321e-05, "loss": 1.6466, "step": 2005 }, { "epoch": 0.26768081131571925, "grad_norm": 1.0226913583499544, "learning_rate": 1.718097724303273e-05, "loss": 1.6593, "step": 2006 }, { "epoch": 0.2678142514011209, "grad_norm": 1.1645428303381167, "learning_rate": 1.717796877612746e-05, "loss": 1.6316, "step": 2007 }, { "epoch": 0.26794769148652253, "grad_norm": 1.0290487690958936, "learning_rate": 1.7174958968459344e-05, "loss": 1.6416, "step": 2008 }, { "epoch": 0.2680811315719242, "grad_norm": 1.113710094399956, "learning_rate": 1.7171947820590584e-05, "loss": 1.6459, "step": 2009 }, { "epoch": 0.2682145716573259, "grad_norm": 1.0721033027517342, "learning_rate": 1.7168935333083624e-05, "loss": 1.6481, "step": 2010 }, { "epoch": 0.2683480117427275, "grad_norm": 4.148877971329257, "learning_rate": 1.7165921506501168e-05, "loss": 1.6853, "step": 2011 }, { "epoch": 0.26848145182812916, "grad_norm": 1.026325617891188, "learning_rate": 1.716290634140616e-05, "loss": 1.6395, "step": 2012 }, { "epoch": 0.26861489191353083, "grad_norm": 1.0458614779045183, "learning_rate": 1.71598898383618e-05, "loss": 1.6385, "step": 2013 }, { "epoch": 0.2687483319989325, "grad_norm": 1.3489378298749055, "learning_rate": 1.715687199793154e-05, "loss": 1.6306, "step": 2014 }, { "epoch": 0.2688817720843341, "grad_norm": 1.066106782654306, "learning_rate": 1.7153852820679073e-05, "loss": 1.6143, "step": 2015 }, { "epoch": 0.2690152121697358, "grad_norm": 1.0517288617879899, "learning_rate": 1.715083230716835e-05, "loss": 1.6918, "step": 2016 }, { "epoch": 0.26914865225513745, "grad_norm": 1.0631774833951584, "learning_rate": 1.7147810457963565e-05, "loss": 1.6196, "step": 2017 }, { "epoch": 0.2692820923405391, "grad_norm": 1.5500958673330705, "learning_rate": 1.714478727362917e-05, "loss": 1.605, "step": 2018 }, { "epoch": 0.26941553242594074, "grad_norm": 1.2061760172934797, "learning_rate": 1.7141762754729855e-05, "loss": 1.627, "step": 2019 }, { "epoch": 0.2695489725113424, "grad_norm": 1.063335069996398, "learning_rate": 1.713873690183057e-05, "loss": 1.6546, "step": 2020 }, { "epoch": 0.2696824125967441, "grad_norm": 1.0345427555272797, "learning_rate": 1.713570971549651e-05, "loss": 1.6332, "step": 2021 }, { "epoch": 0.2698158526821457, "grad_norm": 1.0387506796892585, "learning_rate": 1.713268119629312e-05, "loss": 1.6208, "step": 2022 }, { "epoch": 0.26994929276754737, "grad_norm": 1.0451937535961915, "learning_rate": 1.7129651344786088e-05, "loss": 1.6419, "step": 2023 }, { "epoch": 0.27008273285294904, "grad_norm": 1.0424161111119927, "learning_rate": 1.7126620161541364e-05, "loss": 1.6801, "step": 2024 }, { "epoch": 0.2702161729383507, "grad_norm": 1.0453586363001601, "learning_rate": 1.712358764712513e-05, "loss": 1.6202, "step": 2025 }, { "epoch": 0.2703496130237523, "grad_norm": 1.1453862840223692, "learning_rate": 1.7120553802103828e-05, "loss": 1.6623, "step": 2026 }, { "epoch": 0.270483053109154, "grad_norm": 1.325903344513902, "learning_rate": 1.7117518627044148e-05, "loss": 1.6376, "step": 2027 }, { "epoch": 0.27061649319455566, "grad_norm": 1.080112760240277, "learning_rate": 1.7114482122513024e-05, "loss": 1.6184, "step": 2028 }, { "epoch": 0.2707499332799573, "grad_norm": 1.0594663654156977, "learning_rate": 1.711144428907764e-05, "loss": 1.6887, "step": 2029 }, { "epoch": 0.27088337336535895, "grad_norm": 1.0462695966057693, "learning_rate": 1.710840512730543e-05, "loss": 1.5665, "step": 2030 }, { "epoch": 0.2710168134507606, "grad_norm": 1.1251591649106787, "learning_rate": 1.7105364637764075e-05, "loss": 1.6294, "step": 2031 }, { "epoch": 0.2711502535361623, "grad_norm": 1.1329461782399421, "learning_rate": 1.7102322821021505e-05, "loss": 1.6344, "step": 2032 }, { "epoch": 0.2712836936215639, "grad_norm": 1.0166935218662512, "learning_rate": 1.709927967764589e-05, "loss": 1.6171, "step": 2033 }, { "epoch": 0.2714171337069656, "grad_norm": 1.2601520388342762, "learning_rate": 1.7096235208205665e-05, "loss": 1.634, "step": 2034 }, { "epoch": 0.27155057379236724, "grad_norm": 1.076982069439433, "learning_rate": 1.7093189413269497e-05, "loss": 1.6637, "step": 2035 }, { "epoch": 0.27168401387776886, "grad_norm": 1.0321358819200304, "learning_rate": 1.70901422934063e-05, "loss": 1.6222, "step": 2036 }, { "epoch": 0.27181745396317053, "grad_norm": 1.0654015458142159, "learning_rate": 1.708709384918525e-05, "loss": 1.586, "step": 2037 }, { "epoch": 0.2719508940485722, "grad_norm": 0.9875095341342335, "learning_rate": 1.708404408117576e-05, "loss": 1.6419, "step": 2038 }, { "epoch": 0.27208433413397387, "grad_norm": 1.2163989586437813, "learning_rate": 1.708099298994749e-05, "loss": 1.6356, "step": 2039 }, { "epoch": 0.2722177742193755, "grad_norm": 1.0424446221236658, "learning_rate": 1.7077940576070347e-05, "loss": 1.6426, "step": 2040 }, { "epoch": 0.27235121430477716, "grad_norm": 1.1917437972972873, "learning_rate": 1.707488684011449e-05, "loss": 1.6128, "step": 2041 }, { "epoch": 0.2724846543901788, "grad_norm": 1.0561747251809694, "learning_rate": 1.7071831782650325e-05, "loss": 1.6523, "step": 2042 }, { "epoch": 0.27261809447558044, "grad_norm": 1.0385624086824277, "learning_rate": 1.7068775404248497e-05, "loss": 1.6634, "step": 2043 }, { "epoch": 0.2727515345609821, "grad_norm": 1.303828973820152, "learning_rate": 1.7065717705479906e-05, "loss": 1.6508, "step": 2044 }, { "epoch": 0.2728849746463838, "grad_norm": 1.1910010358615803, "learning_rate": 1.706265868691569e-05, "loss": 1.6699, "step": 2045 }, { "epoch": 0.27301841473178545, "grad_norm": 1.1784688305684132, "learning_rate": 1.7059598349127245e-05, "loss": 1.645, "step": 2046 }, { "epoch": 0.27315185481718707, "grad_norm": 0.985356851054121, "learning_rate": 1.7056536692686204e-05, "loss": 1.6146, "step": 2047 }, { "epoch": 0.27328529490258874, "grad_norm": 1.0255343499954996, "learning_rate": 1.7053473718164455e-05, "loss": 1.6418, "step": 2048 }, { "epoch": 0.2734187349879904, "grad_norm": 1.0638983947233618, "learning_rate": 1.7050409426134117e-05, "loss": 1.7435, "step": 2049 }, { "epoch": 0.273552175073392, "grad_norm": 1.0851770979360476, "learning_rate": 1.704734381716757e-05, "loss": 1.658, "step": 2050 }, { "epoch": 0.2736856151587937, "grad_norm": 1.1763821324448986, "learning_rate": 1.704427689183744e-05, "loss": 1.6535, "step": 2051 }, { "epoch": 0.27381905524419536, "grad_norm": 1.3104441981575137, "learning_rate": 1.7041208650716586e-05, "loss": 1.6589, "step": 2052 }, { "epoch": 0.27395249532959703, "grad_norm": 1.0136315973372723, "learning_rate": 1.703813909437812e-05, "loss": 1.6133, "step": 2053 }, { "epoch": 0.27408593541499865, "grad_norm": 0.9960414593425704, "learning_rate": 1.7035068223395407e-05, "loss": 1.621, "step": 2054 }, { "epoch": 0.2742193755004003, "grad_norm": 1.2127828477430966, "learning_rate": 1.7031996038342045e-05, "loss": 1.6397, "step": 2055 }, { "epoch": 0.274352815585802, "grad_norm": 0.9828639066951254, "learning_rate": 1.702892253979189e-05, "loss": 1.6125, "step": 2056 }, { "epoch": 0.2744862556712036, "grad_norm": 0.9758056512184737, "learning_rate": 1.7025847728319027e-05, "loss": 1.6425, "step": 2057 }, { "epoch": 0.2746196957566053, "grad_norm": 1.0754633573934569, "learning_rate": 1.7022771604497802e-05, "loss": 1.6622, "step": 2058 }, { "epoch": 0.27475313584200695, "grad_norm": 1.0344089922720026, "learning_rate": 1.70196941689028e-05, "loss": 1.6888, "step": 2059 }, { "epoch": 0.2748865759274086, "grad_norm": 1.0736302080536169, "learning_rate": 1.7016615422108847e-05, "loss": 1.658, "step": 2060 }, { "epoch": 0.27502001601281023, "grad_norm": 1.20409158832475, "learning_rate": 1.7013535364691023e-05, "loss": 1.6427, "step": 2061 }, { "epoch": 0.2751534560982119, "grad_norm": 1.069163552633532, "learning_rate": 1.7010453997224642e-05, "loss": 1.6237, "step": 2062 }, { "epoch": 0.27528689618361357, "grad_norm": 1.1894978043506685, "learning_rate": 1.7007371320285275e-05, "loss": 1.6635, "step": 2063 }, { "epoch": 0.2754203362690152, "grad_norm": 1.1903195019190183, "learning_rate": 1.7004287334448723e-05, "loss": 1.5996, "step": 2064 }, { "epoch": 0.27555377635441686, "grad_norm": 1.0460875658896527, "learning_rate": 1.7001202040291048e-05, "loss": 1.7078, "step": 2065 }, { "epoch": 0.2756872164398185, "grad_norm": 1.4369499128117869, "learning_rate": 1.699811543838854e-05, "loss": 1.6444, "step": 2066 }, { "epoch": 0.2758206565252202, "grad_norm": 1.209341903013088, "learning_rate": 1.6995027529317746e-05, "loss": 1.6181, "step": 2067 }, { "epoch": 0.2759540966106218, "grad_norm": 1.1036519149996684, "learning_rate": 1.6991938313655453e-05, "loss": 1.6224, "step": 2068 }, { "epoch": 0.2760875366960235, "grad_norm": 1.146216102199816, "learning_rate": 1.6988847791978687e-05, "loss": 1.6165, "step": 2069 }, { "epoch": 0.27622097678142515, "grad_norm": 1.1564719422933685, "learning_rate": 1.6985755964864723e-05, "loss": 1.7315, "step": 2070 }, { "epoch": 0.27635441686682677, "grad_norm": 1.1449774555454721, "learning_rate": 1.698266283289108e-05, "loss": 1.6398, "step": 2071 }, { "epoch": 0.27648785695222844, "grad_norm": 1.0564082538230344, "learning_rate": 1.6979568396635526e-05, "loss": 1.5539, "step": 2072 }, { "epoch": 0.2766212970376301, "grad_norm": 1.0646332200728448, "learning_rate": 1.6976472656676058e-05, "loss": 1.6822, "step": 2073 }, { "epoch": 0.2767547371230318, "grad_norm": 1.2326207337266486, "learning_rate": 1.697337561359093e-05, "loss": 1.5862, "step": 2074 }, { "epoch": 0.2768881772084334, "grad_norm": 1.2149498397188576, "learning_rate": 1.6970277267958625e-05, "loss": 1.6561, "step": 2075 }, { "epoch": 0.27702161729383507, "grad_norm": 1.15730574634711, "learning_rate": 1.6967177620357894e-05, "loss": 1.6772, "step": 2076 }, { "epoch": 0.27715505737923674, "grad_norm": 1.041366398730123, "learning_rate": 1.6964076671367703e-05, "loss": 1.6171, "step": 2077 }, { "epoch": 0.27728849746463835, "grad_norm": 1.0099579426889422, "learning_rate": 1.696097442156728e-05, "loss": 1.6003, "step": 2078 }, { "epoch": 0.27742193755004, "grad_norm": 1.1597495545248406, "learning_rate": 1.6957870871536086e-05, "loss": 1.629, "step": 2079 }, { "epoch": 0.2775553776354417, "grad_norm": 1.2971905519568292, "learning_rate": 1.6954766021853836e-05, "loss": 1.6335, "step": 2080 }, { "epoch": 0.27768881772084336, "grad_norm": 1.0081447437285673, "learning_rate": 1.6951659873100474e-05, "loss": 1.661, "step": 2081 }, { "epoch": 0.277822257806245, "grad_norm": 1.155709221150846, "learning_rate": 1.6948552425856197e-05, "loss": 1.6418, "step": 2082 }, { "epoch": 0.27795569789164665, "grad_norm": 1.3578902806881925, "learning_rate": 1.694544368070144e-05, "loss": 1.7022, "step": 2083 }, { "epoch": 0.2780891379770483, "grad_norm": 1.0473984071647915, "learning_rate": 1.694233363821688e-05, "loss": 1.6471, "step": 2084 }, { "epoch": 0.27822257806244993, "grad_norm": 1.0998361204044, "learning_rate": 1.6939222298983432e-05, "loss": 1.6853, "step": 2085 }, { "epoch": 0.2783560181478516, "grad_norm": 1.0597277511694383, "learning_rate": 1.693610966358227e-05, "loss": 1.6518, "step": 2086 }, { "epoch": 0.2784894582332533, "grad_norm": 1.0956187785249976, "learning_rate": 1.693299573259479e-05, "loss": 1.6657, "step": 2087 }, { "epoch": 0.27862289831865494, "grad_norm": 1.136040109917539, "learning_rate": 1.6929880506602644e-05, "loss": 1.6538, "step": 2088 }, { "epoch": 0.27875633840405656, "grad_norm": 1.13032282747865, "learning_rate": 1.692676398618772e-05, "loss": 1.6329, "step": 2089 }, { "epoch": 0.27888977848945823, "grad_norm": 1.297996071275606, "learning_rate": 1.6923646171932148e-05, "loss": 1.6816, "step": 2090 }, { "epoch": 0.2790232185748599, "grad_norm": 1.036055755996991, "learning_rate": 1.6920527064418298e-05, "loss": 1.6716, "step": 2091 }, { "epoch": 0.27915665866026157, "grad_norm": 1.1008523939624144, "learning_rate": 1.6917406664228785e-05, "loss": 1.6202, "step": 2092 }, { "epoch": 0.2792900987456632, "grad_norm": 1.1056127215942615, "learning_rate": 1.6914284971946466e-05, "loss": 1.6673, "step": 2093 }, { "epoch": 0.27942353883106485, "grad_norm": 1.3280112976952287, "learning_rate": 1.6911161988154435e-05, "loss": 1.6588, "step": 2094 }, { "epoch": 0.2795569789164665, "grad_norm": 1.2052642948323982, "learning_rate": 1.6908037713436037e-05, "loss": 1.6314, "step": 2095 }, { "epoch": 0.27969041900186814, "grad_norm": 0.9865645913607, "learning_rate": 1.6904912148374837e-05, "loss": 1.6214, "step": 2096 }, { "epoch": 0.2798238590872698, "grad_norm": 1.059342517625569, "learning_rate": 1.6901785293554667e-05, "loss": 1.6119, "step": 2097 }, { "epoch": 0.2799572991726715, "grad_norm": 1.0202929873543987, "learning_rate": 1.6898657149559585e-05, "loss": 1.681, "step": 2098 }, { "epoch": 0.28009073925807315, "grad_norm": 1.0885504607819505, "learning_rate": 1.689552771697389e-05, "loss": 1.641, "step": 2099 }, { "epoch": 0.28022417934347477, "grad_norm": 1.2387895966845563, "learning_rate": 1.6892396996382125e-05, "loss": 1.6229, "step": 2100 }, { "epoch": 0.28035761942887644, "grad_norm": 1.091745358204505, "learning_rate": 1.6889264988369074e-05, "loss": 1.5917, "step": 2101 }, { "epoch": 0.2804910595142781, "grad_norm": 1.003916358618551, "learning_rate": 1.688613169351976e-05, "loss": 1.6095, "step": 2102 }, { "epoch": 0.2806244995996797, "grad_norm": 0.9972302973585249, "learning_rate": 1.6882997112419452e-05, "loss": 1.6731, "step": 2103 }, { "epoch": 0.2807579396850814, "grad_norm": 1.0210763608341367, "learning_rate": 1.6879861245653647e-05, "loss": 1.7185, "step": 2104 }, { "epoch": 0.28089137977048306, "grad_norm": 1.0637023781709813, "learning_rate": 1.687672409380809e-05, "loss": 1.6653, "step": 2105 }, { "epoch": 0.28102481985588473, "grad_norm": 1.0336190974023656, "learning_rate": 1.687358565746877e-05, "loss": 1.6459, "step": 2106 }, { "epoch": 0.28115825994128635, "grad_norm": 1.0180661487350322, "learning_rate": 1.68704459372219e-05, "loss": 1.6258, "step": 2107 }, { "epoch": 0.281291700026688, "grad_norm": 1.1141096916948245, "learning_rate": 1.686730493365396e-05, "loss": 1.658, "step": 2108 }, { "epoch": 0.2814251401120897, "grad_norm": 3.197208541082059, "learning_rate": 1.686416264735164e-05, "loss": 1.818, "step": 2109 }, { "epoch": 0.2815585801974913, "grad_norm": 1.221770226887997, "learning_rate": 1.6861019078901894e-05, "loss": 1.558, "step": 2110 }, { "epoch": 0.281692020282893, "grad_norm": 1.0374977596663437, "learning_rate": 1.6857874228891896e-05, "loss": 1.6517, "step": 2111 }, { "epoch": 0.28182546036829464, "grad_norm": 1.0335222022664212, "learning_rate": 1.6854728097909073e-05, "loss": 1.6713, "step": 2112 }, { "epoch": 0.2819589004536963, "grad_norm": 1.3195530821608707, "learning_rate": 1.6851580686541087e-05, "loss": 1.7385, "step": 2113 }, { "epoch": 0.28209234053909793, "grad_norm": 1.195240974112386, "learning_rate": 1.6848431995375834e-05, "loss": 1.6521, "step": 2114 }, { "epoch": 0.2822257806244996, "grad_norm": 1.0436378450311068, "learning_rate": 1.684528202500146e-05, "loss": 1.6722, "step": 2115 }, { "epoch": 0.28235922070990127, "grad_norm": 1.02333315302456, "learning_rate": 1.6842130776006332e-05, "loss": 1.6158, "step": 2116 }, { "epoch": 0.2824926607953029, "grad_norm": 1.107427692009098, "learning_rate": 1.6838978248979083e-05, "loss": 1.5904, "step": 2117 }, { "epoch": 0.28262610088070456, "grad_norm": 1.1045697230959286, "learning_rate": 1.683582444450856e-05, "loss": 1.6672, "step": 2118 }, { "epoch": 0.2827595409661062, "grad_norm": 1.0660148829983587, "learning_rate": 1.6832669363183863e-05, "loss": 1.5848, "step": 2119 }, { "epoch": 0.2828929810515079, "grad_norm": 0.997746177166706, "learning_rate": 1.6829513005594318e-05, "loss": 1.6168, "step": 2120 }, { "epoch": 0.2830264211369095, "grad_norm": 1.0851170598026787, "learning_rate": 1.6826355372329502e-05, "loss": 1.6698, "step": 2121 }, { "epoch": 0.2831598612223112, "grad_norm": 1.157100041505963, "learning_rate": 1.682319646397922e-05, "loss": 1.6833, "step": 2122 }, { "epoch": 0.28329330130771285, "grad_norm": 1.0494927317075036, "learning_rate": 1.682003628113353e-05, "loss": 1.6263, "step": 2123 }, { "epoch": 0.28342674139311447, "grad_norm": 1.0386913852887472, "learning_rate": 1.6816874824382704e-05, "loss": 1.6658, "step": 2124 }, { "epoch": 0.28356018147851614, "grad_norm": 0.9903946170282486, "learning_rate": 1.6813712094317282e-05, "loss": 1.5875, "step": 2125 }, { "epoch": 0.2836936215639178, "grad_norm": 1.0742398505956183, "learning_rate": 1.681054809152801e-05, "loss": 1.713, "step": 2126 }, { "epoch": 0.2838270616493195, "grad_norm": 1.0730348396285945, "learning_rate": 1.6807382816605903e-05, "loss": 1.6241, "step": 2127 }, { "epoch": 0.2839605017347211, "grad_norm": 1.2030549884856265, "learning_rate": 1.6804216270142183e-05, "loss": 1.6708, "step": 2128 }, { "epoch": 0.28409394182012276, "grad_norm": 1.0495436375707339, "learning_rate": 1.6801048452728338e-05, "loss": 1.6271, "step": 2129 }, { "epoch": 0.28422738190552443, "grad_norm": 1.0625147413097187, "learning_rate": 1.6797879364956075e-05, "loss": 1.6423, "step": 2130 }, { "epoch": 0.28436082199092605, "grad_norm": 1.1920231207118999, "learning_rate": 1.679470900741734e-05, "loss": 1.6252, "step": 2131 }, { "epoch": 0.2844942620763277, "grad_norm": 1.0599528204014201, "learning_rate": 1.6791537380704326e-05, "loss": 1.6599, "step": 2132 }, { "epoch": 0.2846277021617294, "grad_norm": 1.0231405998686767, "learning_rate": 1.6788364485409454e-05, "loss": 1.6155, "step": 2133 }, { "epoch": 0.28476114224713106, "grad_norm": 0.9948628308935985, "learning_rate": 1.6785190322125382e-05, "loss": 1.6538, "step": 2134 }, { "epoch": 0.2848945823325327, "grad_norm": 3.8142735734554662, "learning_rate": 1.678201489144501e-05, "loss": 1.6744, "step": 2135 }, { "epoch": 0.28502802241793435, "grad_norm": 1.1704524649360155, "learning_rate": 1.6778838193961475e-05, "loss": 1.7045, "step": 2136 }, { "epoch": 0.285161462503336, "grad_norm": 1.1934124423424233, "learning_rate": 1.6775660230268146e-05, "loss": 1.6685, "step": 2137 }, { "epoch": 0.28529490258873763, "grad_norm": 1.0625696814614358, "learning_rate": 1.6772481000958628e-05, "loss": 1.6241, "step": 2138 }, { "epoch": 0.2854283426741393, "grad_norm": 1.0424150836147343, "learning_rate": 1.6769300506626766e-05, "loss": 1.5597, "step": 2139 }, { "epoch": 0.28556178275954097, "grad_norm": 1.3055292931541427, "learning_rate": 1.676611874786664e-05, "loss": 1.6378, "step": 2140 }, { "epoch": 0.28569522284494264, "grad_norm": 1.207338132471859, "learning_rate": 1.676293572527257e-05, "loss": 1.6932, "step": 2141 }, { "epoch": 0.28582866293034426, "grad_norm": 1.3576084701670912, "learning_rate": 1.6759751439439105e-05, "loss": 1.6737, "step": 2142 }, { "epoch": 0.2859621030157459, "grad_norm": 1.1131581474723593, "learning_rate": 1.675656589096103e-05, "loss": 1.6575, "step": 2143 }, { "epoch": 0.2860955431011476, "grad_norm": 1.0404856911010416, "learning_rate": 1.6753379080433375e-05, "loss": 1.6199, "step": 2144 }, { "epoch": 0.2862289831865492, "grad_norm": 1.0326352373682337, "learning_rate": 1.6750191008451403e-05, "loss": 1.6281, "step": 2145 }, { "epoch": 0.2863624232719509, "grad_norm": 1.0618014692593303, "learning_rate": 1.6747001675610596e-05, "loss": 1.7052, "step": 2146 }, { "epoch": 0.28649586335735255, "grad_norm": 1.156021303055593, "learning_rate": 1.67438110825067e-05, "loss": 1.6295, "step": 2147 }, { "epoch": 0.2866293034427542, "grad_norm": 1.0687356689670264, "learning_rate": 1.674061922973567e-05, "loss": 1.6377, "step": 2148 }, { "epoch": 0.28676274352815584, "grad_norm": 1.0422058937640384, "learning_rate": 1.6737426117893716e-05, "loss": 1.5742, "step": 2149 }, { "epoch": 0.2868961836135575, "grad_norm": 1.0058798981165387, "learning_rate": 1.673423174757727e-05, "loss": 1.6808, "step": 2150 }, { "epoch": 0.2870296236989592, "grad_norm": 1.3968858626075848, "learning_rate": 1.6731036119383007e-05, "loss": 1.6662, "step": 2151 }, { "epoch": 0.2871630637843608, "grad_norm": 1.0783132009504053, "learning_rate": 1.6727839233907833e-05, "loss": 1.62, "step": 2152 }, { "epoch": 0.28729650386976247, "grad_norm": 1.045602618496203, "learning_rate": 1.672464109174889e-05, "loss": 1.6085, "step": 2153 }, { "epoch": 0.28742994395516414, "grad_norm": 1.0204746262045936, "learning_rate": 1.672144169350355e-05, "loss": 1.682, "step": 2154 }, { "epoch": 0.2875633840405658, "grad_norm": 0.9712644138016593, "learning_rate": 1.671824103976943e-05, "loss": 1.591, "step": 2155 }, { "epoch": 0.2876968241259674, "grad_norm": 1.2273318949317467, "learning_rate": 1.6715039131144375e-05, "loss": 1.6639, "step": 2156 }, { "epoch": 0.2878302642113691, "grad_norm": 1.241652981677857, "learning_rate": 1.6711835968226463e-05, "loss": 1.656, "step": 2157 }, { "epoch": 0.28796370429677076, "grad_norm": 1.0646723228459751, "learning_rate": 1.670863155161401e-05, "loss": 1.6509, "step": 2158 }, { "epoch": 0.28809714438217243, "grad_norm": 0.9640798880564916, "learning_rate": 1.6705425881905564e-05, "loss": 1.6043, "step": 2159 }, { "epoch": 0.28823058446757405, "grad_norm": 1.0190920068560503, "learning_rate": 1.6702218959699906e-05, "loss": 1.6723, "step": 2160 }, { "epoch": 0.2883640245529757, "grad_norm": 1.029425832894891, "learning_rate": 1.6699010785596056e-05, "loss": 1.6663, "step": 2161 }, { "epoch": 0.2884974646383774, "grad_norm": 1.0069920792713327, "learning_rate": 1.669580136019326e-05, "loss": 1.6331, "step": 2162 }, { "epoch": 0.288630904723779, "grad_norm": 1.1668045734582408, "learning_rate": 1.6692590684091004e-05, "loss": 1.6401, "step": 2163 }, { "epoch": 0.2887643448091807, "grad_norm": 1.056267375662492, "learning_rate": 1.6689378757889007e-05, "loss": 1.6311, "step": 2164 }, { "epoch": 0.28889778489458234, "grad_norm": 1.0648778315304113, "learning_rate": 1.6686165582187223e-05, "loss": 1.6636, "step": 2165 }, { "epoch": 0.289031224979984, "grad_norm": 1.0124410015208933, "learning_rate": 1.668295115758583e-05, "loss": 1.6381, "step": 2166 }, { "epoch": 0.28916466506538563, "grad_norm": 1.0022830955131297, "learning_rate": 1.6679735484685247e-05, "loss": 1.5945, "step": 2167 }, { "epoch": 0.2892981051507873, "grad_norm": 12.676569029129164, "learning_rate": 1.6676518564086133e-05, "loss": 1.6591, "step": 2168 }, { "epoch": 0.28943154523618897, "grad_norm": 1.1813552827187637, "learning_rate": 1.667330039638936e-05, "loss": 1.6071, "step": 2169 }, { "epoch": 0.2895649853215906, "grad_norm": 1.059369062118976, "learning_rate": 1.6670080982196055e-05, "loss": 1.6634, "step": 2170 }, { "epoch": 0.28969842540699225, "grad_norm": 12.044345487483524, "learning_rate": 1.6666860322107563e-05, "loss": 1.7567, "step": 2171 }, { "epoch": 0.2898318654923939, "grad_norm": 1.1174156183857291, "learning_rate": 1.6663638416725466e-05, "loss": 1.6669, "step": 2172 }, { "epoch": 0.2899653055777956, "grad_norm": 1.422061825117082, "learning_rate": 1.6660415266651588e-05, "loss": 1.6174, "step": 2173 }, { "epoch": 0.2900987456631972, "grad_norm": 1.09465155740145, "learning_rate": 1.6657190872487964e-05, "loss": 1.6354, "step": 2174 }, { "epoch": 0.2902321857485989, "grad_norm": 1.6546096916969328, "learning_rate": 1.665396523483688e-05, "loss": 1.5803, "step": 2175 }, { "epoch": 0.29036562583400055, "grad_norm": 10.336323730521444, "learning_rate": 1.6650738354300848e-05, "loss": 1.6456, "step": 2176 }, { "epoch": 0.29049906591940217, "grad_norm": 1.1978798944636024, "learning_rate": 1.664751023148262e-05, "loss": 1.6693, "step": 2177 }, { "epoch": 0.29063250600480384, "grad_norm": 1.1345982910370151, "learning_rate": 1.6644280866985155e-05, "loss": 1.5768, "step": 2178 }, { "epoch": 0.2907659460902055, "grad_norm": 1.3001746534071794, "learning_rate": 1.6641050261411676e-05, "loss": 1.6761, "step": 2179 }, { "epoch": 0.2908993861756072, "grad_norm": 1.321671111629523, "learning_rate": 1.6637818415365622e-05, "loss": 1.6474, "step": 2180 }, { "epoch": 0.2910328262610088, "grad_norm": 1.0570804882194134, "learning_rate": 1.663458532945066e-05, "loss": 1.6042, "step": 2181 }, { "epoch": 0.29116626634641046, "grad_norm": 1.242601537641294, "learning_rate": 1.66313510042707e-05, "loss": 1.6398, "step": 2182 }, { "epoch": 0.29129970643181213, "grad_norm": 1.3245077209972027, "learning_rate": 1.6628115440429867e-05, "loss": 1.643, "step": 2183 }, { "epoch": 0.29143314651721375, "grad_norm": 1.0817940467899572, "learning_rate": 1.662487863853254e-05, "loss": 1.5973, "step": 2184 }, { "epoch": 0.2915665866026154, "grad_norm": 1.0749074924656212, "learning_rate": 1.6621640599183307e-05, "loss": 1.6395, "step": 2185 }, { "epoch": 0.2917000266880171, "grad_norm": 1.0699143617678875, "learning_rate": 1.6618401322986998e-05, "loss": 1.6294, "step": 2186 }, { "epoch": 0.29183346677341876, "grad_norm": 1.2973422398546064, "learning_rate": 1.6615160810548677e-05, "loss": 1.6334, "step": 2187 }, { "epoch": 0.2919669068588204, "grad_norm": 1.2752225836668032, "learning_rate": 1.6611919062473635e-05, "loss": 1.6568, "step": 2188 }, { "epoch": 0.29210034694422204, "grad_norm": 1.0820454505520494, "learning_rate": 1.660867607936739e-05, "loss": 1.6779, "step": 2189 }, { "epoch": 0.2922337870296237, "grad_norm": 1.1086323014280555, "learning_rate": 1.6605431861835695e-05, "loss": 1.6372, "step": 2190 }, { "epoch": 0.29236722711502533, "grad_norm": 1.1434647125560675, "learning_rate": 1.6602186410484536e-05, "loss": 1.5664, "step": 2191 }, { "epoch": 0.292500667200427, "grad_norm": 1.6902321777443527, "learning_rate": 1.6598939725920122e-05, "loss": 1.6695, "step": 2192 }, { "epoch": 0.29263410728582867, "grad_norm": 1.2716880193854676, "learning_rate": 1.65956918087489e-05, "loss": 1.7116, "step": 2193 }, { "epoch": 0.29276754737123034, "grad_norm": 1.1161202972917799, "learning_rate": 1.6592442659577545e-05, "loss": 1.6309, "step": 2194 }, { "epoch": 0.29290098745663196, "grad_norm": 1.0699166169760714, "learning_rate": 1.6589192279012956e-05, "loss": 1.6484, "step": 2195 }, { "epoch": 0.2930344275420336, "grad_norm": 1.0222347107395644, "learning_rate": 1.658594066766227e-05, "loss": 1.6596, "step": 2196 }, { "epoch": 0.2931678676274353, "grad_norm": 1.0807908428296922, "learning_rate": 1.6582687826132854e-05, "loss": 1.6678, "step": 2197 }, { "epoch": 0.2933013077128369, "grad_norm": 1.081068785656472, "learning_rate": 1.6579433755032297e-05, "loss": 1.6052, "step": 2198 }, { "epoch": 0.2934347477982386, "grad_norm": 1.0103124427107717, "learning_rate": 1.6576178454968422e-05, "loss": 1.6409, "step": 2199 }, { "epoch": 0.29356818788364025, "grad_norm": 0.9811350388342566, "learning_rate": 1.657292192654929e-05, "loss": 1.5593, "step": 2200 }, { "epoch": 0.2937016279690419, "grad_norm": 1.306039871025814, "learning_rate": 1.656966417038317e-05, "loss": 1.6302, "step": 2201 }, { "epoch": 0.29383506805444354, "grad_norm": 1.021756773048609, "learning_rate": 1.656640518707859e-05, "loss": 1.6938, "step": 2202 }, { "epoch": 0.2939685081398452, "grad_norm": 1.0718659276893743, "learning_rate": 1.6563144977244277e-05, "loss": 1.6301, "step": 2203 }, { "epoch": 0.2941019482252469, "grad_norm": 1.2325944349438038, "learning_rate": 1.6559883541489204e-05, "loss": 1.6064, "step": 2204 }, { "epoch": 0.2942353883106485, "grad_norm": 1.04367146349336, "learning_rate": 1.6556620880422577e-05, "loss": 1.6461, "step": 2205 }, { "epoch": 0.29436882839605016, "grad_norm": 1.1064673095517306, "learning_rate": 1.6553356994653818e-05, "loss": 1.6265, "step": 2206 }, { "epoch": 0.29450226848145183, "grad_norm": 1.0133174183375973, "learning_rate": 1.6550091884792586e-05, "loss": 1.647, "step": 2207 }, { "epoch": 0.2946357085668535, "grad_norm": 1.0556625341442718, "learning_rate": 1.6546825551448766e-05, "loss": 1.6508, "step": 2208 }, { "epoch": 0.2947691486522551, "grad_norm": 1.0201247011199586, "learning_rate": 1.6543557995232472e-05, "loss": 1.6311, "step": 2209 }, { "epoch": 0.2949025887376568, "grad_norm": 1.0341845337571378, "learning_rate": 1.6540289216754042e-05, "loss": 1.6522, "step": 2210 }, { "epoch": 0.29503602882305846, "grad_norm": 1.0728331970850973, "learning_rate": 1.6537019216624055e-05, "loss": 1.6073, "step": 2211 }, { "epoch": 0.2951694689084601, "grad_norm": 1.087051064593804, "learning_rate": 1.65337479954533e-05, "loss": 1.6518, "step": 2212 }, { "epoch": 0.29530290899386175, "grad_norm": 1.1504157623012197, "learning_rate": 1.653047555385281e-05, "loss": 1.6489, "step": 2213 }, { "epoch": 0.2954363490792634, "grad_norm": 1.6929487032356967, "learning_rate": 1.652720189243384e-05, "loss": 1.6564, "step": 2214 }, { "epoch": 0.2955697891646651, "grad_norm": 9.547350526694943, "learning_rate": 1.6523927011807873e-05, "loss": 1.6765, "step": 2215 }, { "epoch": 0.2957032292500667, "grad_norm": 1.166505303552692, "learning_rate": 1.652065091258662e-05, "loss": 1.6389, "step": 2216 }, { "epoch": 0.29583666933546837, "grad_norm": 1.1611750428510519, "learning_rate": 1.651737359538201e-05, "loss": 1.6382, "step": 2217 }, { "epoch": 0.29597010942087004, "grad_norm": 1.161944383190572, "learning_rate": 1.651409506080622e-05, "loss": 1.6458, "step": 2218 }, { "epoch": 0.29610354950627166, "grad_norm": 1.0328377880475517, "learning_rate": 1.6510815309471638e-05, "loss": 1.6698, "step": 2219 }, { "epoch": 0.2962369895916733, "grad_norm": 1.2670883726284181, "learning_rate": 1.6507534341990884e-05, "loss": 1.6669, "step": 2220 }, { "epoch": 0.296370429677075, "grad_norm": 1.2369384329163466, "learning_rate": 1.6504252158976804e-05, "loss": 1.6523, "step": 2221 }, { "epoch": 0.29650386976247667, "grad_norm": 1.0226119191785725, "learning_rate": 1.6500968761042477e-05, "loss": 1.6438, "step": 2222 }, { "epoch": 0.2966373098478783, "grad_norm": 0.9968700209514012, "learning_rate": 1.6497684148801204e-05, "loss": 1.7007, "step": 2223 }, { "epoch": 0.29677074993327995, "grad_norm": 1.036350320781629, "learning_rate": 1.6494398322866503e-05, "loss": 1.6678, "step": 2224 }, { "epoch": 0.2969041900186816, "grad_norm": 0.9847140373170601, "learning_rate": 1.6491111283852147e-05, "loss": 1.6433, "step": 2225 }, { "epoch": 0.29703763010408324, "grad_norm": 0.9415756130495302, "learning_rate": 1.64878230323721e-05, "loss": 1.6222, "step": 2226 }, { "epoch": 0.2971710701894849, "grad_norm": 1.4397026580916885, "learning_rate": 1.648453356904058e-05, "loss": 1.777, "step": 2227 }, { "epoch": 0.2973045102748866, "grad_norm": 1.1618462036671973, "learning_rate": 1.6481242894472016e-05, "loss": 1.6368, "step": 2228 }, { "epoch": 0.29743795036028825, "grad_norm": 1.0250435345601665, "learning_rate": 1.6477951009281072e-05, "loss": 1.6461, "step": 2229 }, { "epoch": 0.29757139044568987, "grad_norm": 0.9923678067336387, "learning_rate": 1.6474657914082638e-05, "loss": 1.6785, "step": 2230 }, { "epoch": 0.29770483053109154, "grad_norm": 1.0267804282384663, "learning_rate": 1.647136360949182e-05, "loss": 1.6343, "step": 2231 }, { "epoch": 0.2978382706164932, "grad_norm": 1.023674327352044, "learning_rate": 1.6468068096123957e-05, "loss": 1.5966, "step": 2232 }, { "epoch": 0.2979717107018949, "grad_norm": 1.1944718768952494, "learning_rate": 1.6464771374594615e-05, "loss": 1.6535, "step": 2233 }, { "epoch": 0.2981051507872965, "grad_norm": 1.152852164366958, "learning_rate": 1.6461473445519585e-05, "loss": 1.6781, "step": 2234 }, { "epoch": 0.29823859087269816, "grad_norm": 1.0678201700556582, "learning_rate": 1.6458174309514882e-05, "loss": 1.6313, "step": 2235 }, { "epoch": 0.29837203095809983, "grad_norm": 1.0355313650728948, "learning_rate": 1.6454873967196748e-05, "loss": 1.6683, "step": 2236 }, { "epoch": 0.29850547104350145, "grad_norm": 1.0884233154304546, "learning_rate": 1.645157241918165e-05, "loss": 1.5859, "step": 2237 }, { "epoch": 0.2986389111289031, "grad_norm": 1.0472190401927721, "learning_rate": 1.6448269666086278e-05, "loss": 1.6596, "step": 2238 }, { "epoch": 0.2987723512143048, "grad_norm": 1.0311680152934846, "learning_rate": 1.6444965708527546e-05, "loss": 1.6525, "step": 2239 }, { "epoch": 0.29890579129970646, "grad_norm": 2.10561403694868, "learning_rate": 1.64416605471226e-05, "loss": 1.6469, "step": 2240 }, { "epoch": 0.2990392313851081, "grad_norm": 1.0497996204276114, "learning_rate": 1.6438354182488802e-05, "loss": 1.607, "step": 2241 }, { "epoch": 0.29917267147050974, "grad_norm": 1.043507533207965, "learning_rate": 1.6435046615243747e-05, "loss": 1.6621, "step": 2242 }, { "epoch": 0.2993061115559114, "grad_norm": 1.042723485887548, "learning_rate": 1.6431737846005254e-05, "loss": 1.6912, "step": 2243 }, { "epoch": 0.29943955164131303, "grad_norm": 1.0051152191792925, "learning_rate": 1.6428427875391353e-05, "loss": 1.6349, "step": 2244 }, { "epoch": 0.2995729917267147, "grad_norm": 1.128346274800114, "learning_rate": 1.642511670402032e-05, "loss": 1.7053, "step": 2245 }, { "epoch": 0.29970643181211637, "grad_norm": 1.0722777257603044, "learning_rate": 1.642180433251064e-05, "loss": 1.6878, "step": 2246 }, { "epoch": 0.29983987189751804, "grad_norm": 1.017261398089863, "learning_rate": 1.6418490761481022e-05, "loss": 1.6521, "step": 2247 }, { "epoch": 0.29997331198291965, "grad_norm": 1.0292260761586245, "learning_rate": 1.6415175991550412e-05, "loss": 1.6755, "step": 2248 }, { "epoch": 0.3001067520683213, "grad_norm": 1.0931207786222135, "learning_rate": 1.6411860023337962e-05, "loss": 1.6749, "step": 2249 }, { "epoch": 0.300240192153723, "grad_norm": 1.0831524536934993, "learning_rate": 1.6408542857463062e-05, "loss": 1.587, "step": 2250 }, { "epoch": 0.3003736322391246, "grad_norm": 1.0454308838172524, "learning_rate": 1.6405224494545322e-05, "loss": 1.6651, "step": 2251 }, { "epoch": 0.3005070723245263, "grad_norm": 1.0806258557902406, "learning_rate": 1.640190493520457e-05, "loss": 1.6699, "step": 2252 }, { "epoch": 0.30064051240992795, "grad_norm": 12.400419377894314, "learning_rate": 1.6398584180060867e-05, "loss": 1.6377, "step": 2253 }, { "epoch": 0.3007739524953296, "grad_norm": 1.2948113785574655, "learning_rate": 1.6395262229734486e-05, "loss": 1.607, "step": 2254 }, { "epoch": 0.30090739258073124, "grad_norm": 1.198607665528997, "learning_rate": 1.639193908484594e-05, "loss": 1.7028, "step": 2255 }, { "epoch": 0.3010408326661329, "grad_norm": 1.1317395785557423, "learning_rate": 1.6388614746015946e-05, "loss": 1.6525, "step": 2256 }, { "epoch": 0.3011742727515346, "grad_norm": 1.1476872660243271, "learning_rate": 1.6385289213865452e-05, "loss": 1.64, "step": 2257 }, { "epoch": 0.3013077128369362, "grad_norm": 1.480875813813185, "learning_rate": 1.6381962489015633e-05, "loss": 1.6505, "step": 2258 }, { "epoch": 0.30144115292233786, "grad_norm": 1.0975918871872568, "learning_rate": 1.637863457208788e-05, "loss": 1.6105, "step": 2259 }, { "epoch": 0.30157459300773953, "grad_norm": 1.147793532979415, "learning_rate": 1.6375305463703816e-05, "loss": 1.6267, "step": 2260 }, { "epoch": 0.3017080330931412, "grad_norm": 1.2879635429689233, "learning_rate": 1.6371975164485277e-05, "loss": 1.7015, "step": 2261 }, { "epoch": 0.3018414731785428, "grad_norm": 1.3283545115204671, "learning_rate": 1.636864367505432e-05, "loss": 1.6196, "step": 2262 }, { "epoch": 0.3019749132639445, "grad_norm": 1.0479344997197932, "learning_rate": 1.636531099603324e-05, "loss": 1.6243, "step": 2263 }, { "epoch": 0.30210835334934616, "grad_norm": 1.0765762306122926, "learning_rate": 1.6361977128044535e-05, "loss": 1.6196, "step": 2264 }, { "epoch": 0.3022417934347478, "grad_norm": 1.0309118980744778, "learning_rate": 1.6358642071710935e-05, "loss": 1.6135, "step": 2265 }, { "epoch": 0.30237523352014944, "grad_norm": 0.9953925772810231, "learning_rate": 1.6355305827655398e-05, "loss": 1.6429, "step": 2266 }, { "epoch": 0.3025086736055511, "grad_norm": 1.2130624721172607, "learning_rate": 1.6351968396501084e-05, "loss": 1.6343, "step": 2267 }, { "epoch": 0.3026421136909528, "grad_norm": 1.0691255501843615, "learning_rate": 1.6348629778871393e-05, "loss": 1.6658, "step": 2268 }, { "epoch": 0.3027755537763544, "grad_norm": 1.0154122944841089, "learning_rate": 1.6345289975389946e-05, "loss": 1.6048, "step": 2269 }, { "epoch": 0.30290899386175607, "grad_norm": 1.0303963960703182, "learning_rate": 1.6341948986680574e-05, "loss": 1.5902, "step": 2270 }, { "epoch": 0.30304243394715774, "grad_norm": 1.103571288321129, "learning_rate": 1.6338606813367334e-05, "loss": 1.6284, "step": 2271 }, { "epoch": 0.30317587403255936, "grad_norm": 1.0340663960820138, "learning_rate": 1.633526345607451e-05, "loss": 1.6625, "step": 2272 }, { "epoch": 0.303309314117961, "grad_norm": 1.0868515192551496, "learning_rate": 1.6331918915426607e-05, "loss": 1.6508, "step": 2273 }, { "epoch": 0.3034427542033627, "grad_norm": 1.0806584970772868, "learning_rate": 1.6328573192048337e-05, "loss": 1.6793, "step": 2274 }, { "epoch": 0.30357619428876437, "grad_norm": 1.1131107600277885, "learning_rate": 1.632522628656465e-05, "loss": 1.6524, "step": 2275 }, { "epoch": 0.303709634374166, "grad_norm": 0.970695670852881, "learning_rate": 1.6321878199600705e-05, "loss": 1.6254, "step": 2276 }, { "epoch": 0.30384307445956765, "grad_norm": 1.0449698885856158, "learning_rate": 1.6318528931781893e-05, "loss": 1.6488, "step": 2277 }, { "epoch": 0.3039765145449693, "grad_norm": 2.3976140688143754, "learning_rate": 1.6315178483733817e-05, "loss": 1.6349, "step": 2278 }, { "epoch": 0.30410995463037094, "grad_norm": 1.0721815977781701, "learning_rate": 1.6311826856082298e-05, "loss": 1.6444, "step": 2279 }, { "epoch": 0.3042433947157726, "grad_norm": 1.0877886525914027, "learning_rate": 1.6308474049453385e-05, "loss": 1.6218, "step": 2280 }, { "epoch": 0.3043768348011743, "grad_norm": 1.2215891275833313, "learning_rate": 1.6305120064473348e-05, "loss": 1.6729, "step": 2281 }, { "epoch": 0.30451027488657595, "grad_norm": 1.0066658286438341, "learning_rate": 1.6301764901768665e-05, "loss": 1.6514, "step": 2282 }, { "epoch": 0.30464371497197756, "grad_norm": 5.18175722987443, "learning_rate": 1.629840856196605e-05, "loss": 1.7849, "step": 2283 }, { "epoch": 0.30477715505737923, "grad_norm": 1.0661836522987367, "learning_rate": 1.6295051045692422e-05, "loss": 1.6515, "step": 2284 }, { "epoch": 0.3049105951427809, "grad_norm": 0.9948954364094378, "learning_rate": 1.6291692353574934e-05, "loss": 1.6653, "step": 2285 }, { "epoch": 0.3050440352281825, "grad_norm": 1.1776280843724025, "learning_rate": 1.6288332486240946e-05, "loss": 1.6249, "step": 2286 }, { "epoch": 0.3051774753135842, "grad_norm": 0.9803581714490958, "learning_rate": 1.6284971444318046e-05, "loss": 1.6381, "step": 2287 }, { "epoch": 0.30531091539898586, "grad_norm": 1.0334824917780492, "learning_rate": 1.628160922843404e-05, "loss": 1.7033, "step": 2288 }, { "epoch": 0.30544435548438753, "grad_norm": 1.1825464876579848, "learning_rate": 1.6278245839216947e-05, "loss": 1.6364, "step": 2289 }, { "epoch": 0.30557779556978915, "grad_norm": 1.089932123868756, "learning_rate": 1.627488127729501e-05, "loss": 1.6358, "step": 2290 }, { "epoch": 0.3057112356551908, "grad_norm": 1.0063881587721086, "learning_rate": 1.6271515543296693e-05, "loss": 1.6245, "step": 2291 }, { "epoch": 0.3058446757405925, "grad_norm": 0.9557576711140595, "learning_rate": 1.6268148637850675e-05, "loss": 1.6148, "step": 2292 }, { "epoch": 0.3059781158259941, "grad_norm": 1.155386941840756, "learning_rate": 1.626478056158586e-05, "loss": 1.6679, "step": 2293 }, { "epoch": 0.30611155591139577, "grad_norm": 1.002724278928917, "learning_rate": 1.626141131513136e-05, "loss": 1.6717, "step": 2294 }, { "epoch": 0.30624499599679744, "grad_norm": 1.0181937079803456, "learning_rate": 1.6258040899116517e-05, "loss": 1.6276, "step": 2295 }, { "epoch": 0.3063784360821991, "grad_norm": 1.0190676733905637, "learning_rate": 1.6254669314170887e-05, "loss": 1.6428, "step": 2296 }, { "epoch": 0.3065118761676007, "grad_norm": 1.0126611742120983, "learning_rate": 1.625129656092424e-05, "loss": 1.6943, "step": 2297 }, { "epoch": 0.3066453162530024, "grad_norm": 0.9702747048248926, "learning_rate": 1.624792264000657e-05, "loss": 1.6091, "step": 2298 }, { "epoch": 0.30677875633840407, "grad_norm": 1.0176969387216936, "learning_rate": 1.624454755204808e-05, "loss": 1.6432, "step": 2299 }, { "epoch": 0.30691219642380574, "grad_norm": 0.9905054594963236, "learning_rate": 1.624117129767921e-05, "loss": 1.6908, "step": 2300 }, { "epoch": 0.30704563650920735, "grad_norm": 1.0990745253836063, "learning_rate": 1.62377938775306e-05, "loss": 1.6545, "step": 2301 }, { "epoch": 0.307179076594609, "grad_norm": 0.9872989722764705, "learning_rate": 1.623441529223311e-05, "loss": 1.6699, "step": 2302 }, { "epoch": 0.3073125166800107, "grad_norm": 1.2705211460028154, "learning_rate": 1.6231035542417826e-05, "loss": 1.6582, "step": 2303 }, { "epoch": 0.3074459567654123, "grad_norm": 1.0150840493548405, "learning_rate": 1.6227654628716044e-05, "loss": 1.665, "step": 2304 }, { "epoch": 0.307579396850814, "grad_norm": 1.1379930735351889, "learning_rate": 1.6224272551759288e-05, "loss": 1.676, "step": 2305 }, { "epoch": 0.30771283693621565, "grad_norm": 1.2444071746179561, "learning_rate": 1.622088931217928e-05, "loss": 1.6459, "step": 2306 }, { "epoch": 0.3078462770216173, "grad_norm": 0.9976784854166414, "learning_rate": 1.621750491060798e-05, "loss": 1.6636, "step": 2307 }, { "epoch": 0.30797971710701894, "grad_norm": 1.2682449189830896, "learning_rate": 1.6214119347677548e-05, "loss": 1.6616, "step": 2308 }, { "epoch": 0.3081131571924206, "grad_norm": 1.0829225349432583, "learning_rate": 1.621073262402037e-05, "loss": 1.644, "step": 2309 }, { "epoch": 0.3082465972778223, "grad_norm": 1.0838885803573566, "learning_rate": 1.6207344740269056e-05, "loss": 1.6494, "step": 2310 }, { "epoch": 0.3083800373632239, "grad_norm": 1.0708462365866025, "learning_rate": 1.6203955697056416e-05, "loss": 1.6671, "step": 2311 }, { "epoch": 0.30851347744862556, "grad_norm": 0.9823970370572715, "learning_rate": 1.6200565495015487e-05, "loss": 1.6396, "step": 2312 }, { "epoch": 0.30864691753402723, "grad_norm": 1.0031602532602553, "learning_rate": 1.6197174134779517e-05, "loss": 1.6102, "step": 2313 }, { "epoch": 0.3087803576194289, "grad_norm": 1.2206609039872867, "learning_rate": 1.619378161698198e-05, "loss": 1.648, "step": 2314 }, { "epoch": 0.3089137977048305, "grad_norm": 1.3166269143643101, "learning_rate": 1.6190387942256548e-05, "loss": 1.6009, "step": 2315 }, { "epoch": 0.3090472377902322, "grad_norm": 1.2675618240810085, "learning_rate": 1.6186993111237134e-05, "loss": 1.6224, "step": 2316 }, { "epoch": 0.30918067787563386, "grad_norm": 1.0640732745150545, "learning_rate": 1.618359712455785e-05, "loss": 1.6662, "step": 2317 }, { "epoch": 0.3093141179610355, "grad_norm": 1.019032967258582, "learning_rate": 1.6180199982853026e-05, "loss": 1.6237, "step": 2318 }, { "epoch": 0.30944755804643714, "grad_norm": 1.0944634109792915, "learning_rate": 1.6176801686757207e-05, "loss": 1.6415, "step": 2319 }, { "epoch": 0.3095809981318388, "grad_norm": 1.24072168667135, "learning_rate": 1.6173402236905156e-05, "loss": 1.5915, "step": 2320 }, { "epoch": 0.3097144382172405, "grad_norm": 0.9746140716316298, "learning_rate": 1.6170001633931857e-05, "loss": 1.6443, "step": 2321 }, { "epoch": 0.3098478783026421, "grad_norm": 1.1844321844657195, "learning_rate": 1.6166599878472502e-05, "loss": 1.6174, "step": 2322 }, { "epoch": 0.30998131838804377, "grad_norm": 1.001876907977257, "learning_rate": 1.61631969711625e-05, "loss": 1.6162, "step": 2323 }, { "epoch": 0.31011475847344544, "grad_norm": 0.9886662045991245, "learning_rate": 1.6159792912637467e-05, "loss": 1.6354, "step": 2324 }, { "epoch": 0.31024819855884705, "grad_norm": 0.9775352505641491, "learning_rate": 1.6156387703533258e-05, "loss": 1.6204, "step": 2325 }, { "epoch": 0.3103816386442487, "grad_norm": 0.9826413139650488, "learning_rate": 1.6152981344485915e-05, "loss": 1.6398, "step": 2326 }, { "epoch": 0.3105150787296504, "grad_norm": 1.0049022279607043, "learning_rate": 1.6149573836131713e-05, "loss": 1.6421, "step": 2327 }, { "epoch": 0.31064851881505207, "grad_norm": 1.151819306544648, "learning_rate": 1.6146165179107135e-05, "loss": 1.5993, "step": 2328 }, { "epoch": 0.3107819589004537, "grad_norm": 1.144986255564853, "learning_rate": 1.6142755374048876e-05, "loss": 1.6405, "step": 2329 }, { "epoch": 0.31091539898585535, "grad_norm": 1.0389802092801697, "learning_rate": 1.613934442159385e-05, "loss": 1.6334, "step": 2330 }, { "epoch": 0.311048839071257, "grad_norm": 1.016003258094143, "learning_rate": 1.6135932322379188e-05, "loss": 1.6236, "step": 2331 }, { "epoch": 0.31118227915665864, "grad_norm": 0.966534466876772, "learning_rate": 1.6132519077042224e-05, "loss": 1.6443, "step": 2332 }, { "epoch": 0.3113157192420603, "grad_norm": 1.0190490159617054, "learning_rate": 1.6129104686220522e-05, "loss": 1.6327, "step": 2333 }, { "epoch": 0.311449159327462, "grad_norm": 0.9508676466180186, "learning_rate": 1.6125689150551846e-05, "loss": 1.5877, "step": 2334 }, { "epoch": 0.31158259941286365, "grad_norm": 1.0322824165183992, "learning_rate": 1.612227247067418e-05, "loss": 1.6722, "step": 2335 }, { "epoch": 0.31171603949826526, "grad_norm": 1.1605340980731977, "learning_rate": 1.6118854647225722e-05, "loss": 1.6116, "step": 2336 }, { "epoch": 0.31184947958366693, "grad_norm": 1.0040963152787474, "learning_rate": 1.611543568084488e-05, "loss": 1.6418, "step": 2337 }, { "epoch": 0.3119829196690686, "grad_norm": 1.0426733287211412, "learning_rate": 1.6112015572170283e-05, "loss": 1.6439, "step": 2338 }, { "epoch": 0.3121163597544702, "grad_norm": 1.0528462917004136, "learning_rate": 1.6108594321840763e-05, "loss": 1.6341, "step": 2339 }, { "epoch": 0.3122497998398719, "grad_norm": 1.005308410868297, "learning_rate": 1.6105171930495373e-05, "loss": 1.6207, "step": 2340 }, { "epoch": 0.31238323992527356, "grad_norm": 1.0757636731132663, "learning_rate": 1.6101748398773374e-05, "loss": 1.651, "step": 2341 }, { "epoch": 0.31251668001067523, "grad_norm": 1.157330419763036, "learning_rate": 1.609832372731425e-05, "loss": 1.5569, "step": 2342 }, { "epoch": 0.31265012009607684, "grad_norm": 1.1655530310462219, "learning_rate": 1.609489791675768e-05, "loss": 1.622, "step": 2343 }, { "epoch": 0.3127835601814785, "grad_norm": 1.0163847934254429, "learning_rate": 1.6091470967743577e-05, "loss": 1.6283, "step": 2344 }, { "epoch": 0.3129170002668802, "grad_norm": 0.9913652557421357, "learning_rate": 1.608804288091205e-05, "loss": 1.6029, "step": 2345 }, { "epoch": 0.3130504403522818, "grad_norm": 1.0250978527017118, "learning_rate": 1.6084613656903426e-05, "loss": 1.6701, "step": 2346 }, { "epoch": 0.31318388043768347, "grad_norm": 1.0036453857759162, "learning_rate": 1.6081183296358246e-05, "loss": 1.6656, "step": 2347 }, { "epoch": 0.31331732052308514, "grad_norm": 1.0093725549829087, "learning_rate": 1.607775179991726e-05, "loss": 1.62, "step": 2348 }, { "epoch": 0.3134507606084868, "grad_norm": 1.0549793980242639, "learning_rate": 1.6074319168221446e-05, "loss": 1.6474, "step": 2349 }, { "epoch": 0.3135842006938884, "grad_norm": 1.0014461970290505, "learning_rate": 1.6070885401911964e-05, "loss": 1.6402, "step": 2350 }, { "epoch": 0.3137176407792901, "grad_norm": 1.2302428695760383, "learning_rate": 1.606745050163021e-05, "loss": 1.6615, "step": 2351 }, { "epoch": 0.31385108086469177, "grad_norm": 1.278062546442448, "learning_rate": 1.6064014468017778e-05, "loss": 1.6443, "step": 2352 }, { "epoch": 0.3139845209500934, "grad_norm": 1.014494022872304, "learning_rate": 1.606057730171649e-05, "loss": 1.6771, "step": 2353 }, { "epoch": 0.31411796103549505, "grad_norm": 0.9992975913870881, "learning_rate": 1.6057139003368365e-05, "loss": 1.6673, "step": 2354 }, { "epoch": 0.3142514011208967, "grad_norm": 1.0264389162628824, "learning_rate": 1.6053699573615634e-05, "loss": 1.645, "step": 2355 }, { "epoch": 0.3143848412062984, "grad_norm": 1.2998311967872789, "learning_rate": 1.605025901310075e-05, "loss": 1.653, "step": 2356 }, { "epoch": 0.3145182812917, "grad_norm": 1.1129013572248108, "learning_rate": 1.604681732246636e-05, "loss": 1.5694, "step": 2357 }, { "epoch": 0.3146517213771017, "grad_norm": 0.998120908236157, "learning_rate": 1.6043374502355348e-05, "loss": 1.6577, "step": 2358 }, { "epoch": 0.31478516146250335, "grad_norm": 1.1224918644835182, "learning_rate": 1.603993055341078e-05, "loss": 1.6469, "step": 2359 }, { "epoch": 0.31491860154790496, "grad_norm": 1.0269545547781742, "learning_rate": 1.6036485476275957e-05, "loss": 1.6068, "step": 2360 }, { "epoch": 0.31505204163330663, "grad_norm": 1.136514585808053, "learning_rate": 1.603303927159437e-05, "loss": 1.6382, "step": 2361 }, { "epoch": 0.3151854817187083, "grad_norm": 1.2494184496056624, "learning_rate": 1.602959194000974e-05, "loss": 1.7152, "step": 2362 }, { "epoch": 0.31531892180411, "grad_norm": 1.0140755418750587, "learning_rate": 1.602614348216598e-05, "loss": 1.591, "step": 2363 }, { "epoch": 0.3154523618895116, "grad_norm": 1.0007366397635697, "learning_rate": 1.602269389870723e-05, "loss": 1.6349, "step": 2364 }, { "epoch": 0.31558580197491326, "grad_norm": 1.0379624969316805, "learning_rate": 1.6019243190277832e-05, "loss": 1.583, "step": 2365 }, { "epoch": 0.31571924206031493, "grad_norm": 1.122118459336519, "learning_rate": 1.6015791357522336e-05, "loss": 1.5789, "step": 2366 }, { "epoch": 0.31585268214571655, "grad_norm": 1.218518313111331, "learning_rate": 1.601233840108551e-05, "loss": 1.6483, "step": 2367 }, { "epoch": 0.3159861222311182, "grad_norm": 1.0325320279857855, "learning_rate": 1.600888432161232e-05, "loss": 1.6786, "step": 2368 }, { "epoch": 0.3161195623165199, "grad_norm": 1.0105488940747966, "learning_rate": 1.600542911974796e-05, "loss": 1.6214, "step": 2369 }, { "epoch": 0.31625300240192156, "grad_norm": 1.0174955025149985, "learning_rate": 1.6001972796137804e-05, "loss": 1.6188, "step": 2370 }, { "epoch": 0.31638644248732317, "grad_norm": 0.9686366099778118, "learning_rate": 1.5998515351427472e-05, "loss": 1.6052, "step": 2371 }, { "epoch": 0.31651988257272484, "grad_norm": 1.0943501073343973, "learning_rate": 1.5995056786262763e-05, "loss": 1.6652, "step": 2372 }, { "epoch": 0.3166533226581265, "grad_norm": 1.1056669145759417, "learning_rate": 1.59915971012897e-05, "loss": 1.5992, "step": 2373 }, { "epoch": 0.3167867627435282, "grad_norm": 1.0153669225410895, "learning_rate": 1.5988136297154525e-05, "loss": 1.6659, "step": 2374 }, { "epoch": 0.3169202028289298, "grad_norm": 0.9485851246206498, "learning_rate": 1.5984674374503663e-05, "loss": 1.638, "step": 2375 }, { "epoch": 0.31705364291433147, "grad_norm": 1.2485290662581918, "learning_rate": 1.5981211333983765e-05, "loss": 1.6167, "step": 2376 }, { "epoch": 0.31718708299973314, "grad_norm": 1.0025032236221025, "learning_rate": 1.5977747176241688e-05, "loss": 1.6485, "step": 2377 }, { "epoch": 0.31732052308513475, "grad_norm": 1.027037150167266, "learning_rate": 1.59742819019245e-05, "loss": 1.635, "step": 2378 }, { "epoch": 0.3174539631705364, "grad_norm": 1.0390794115599424, "learning_rate": 1.5970815511679472e-05, "loss": 1.6242, "step": 2379 }, { "epoch": 0.3175874032559381, "grad_norm": 1.178314062565435, "learning_rate": 1.5967348006154084e-05, "loss": 1.6109, "step": 2380 }, { "epoch": 0.31772084334133976, "grad_norm": 1.1670387061276462, "learning_rate": 1.5963879385996032e-05, "loss": 1.6196, "step": 2381 }, { "epoch": 0.3178542834267414, "grad_norm": 1.0774835661150515, "learning_rate": 1.596040965185321e-05, "loss": 1.6497, "step": 2382 }, { "epoch": 0.31798772351214305, "grad_norm": 1.1109464505151274, "learning_rate": 1.5956938804373726e-05, "loss": 1.6395, "step": 2383 }, { "epoch": 0.3181211635975447, "grad_norm": 1.0759976671368048, "learning_rate": 1.59534668442059e-05, "loss": 1.6022, "step": 2384 }, { "epoch": 0.31825460368294634, "grad_norm": 1.0363642572438576, "learning_rate": 1.5949993771998248e-05, "loss": 1.6947, "step": 2385 }, { "epoch": 0.318388043768348, "grad_norm": 1.1364653119855215, "learning_rate": 1.59465195883995e-05, "loss": 1.6464, "step": 2386 }, { "epoch": 0.3185214838537497, "grad_norm": 1.1363035834435125, "learning_rate": 1.59430442940586e-05, "loss": 1.6516, "step": 2387 }, { "epoch": 0.31865492393915135, "grad_norm": 1.0660321057250608, "learning_rate": 1.5939567889624693e-05, "loss": 1.6402, "step": 2388 }, { "epoch": 0.31878836402455296, "grad_norm": 1.0643000544064976, "learning_rate": 1.5936090375747123e-05, "loss": 1.6054, "step": 2389 }, { "epoch": 0.31892180410995463, "grad_norm": 1.234549999729085, "learning_rate": 1.593261175307546e-05, "loss": 1.6499, "step": 2390 }, { "epoch": 0.3190552441953563, "grad_norm": 1.1660252709285277, "learning_rate": 1.592913202225946e-05, "loss": 1.6534, "step": 2391 }, { "epoch": 0.3191886842807579, "grad_norm": 1.0300136024667785, "learning_rate": 1.592565118394911e-05, "loss": 1.6732, "step": 2392 }, { "epoch": 0.3193221243661596, "grad_norm": 1.0430395771115888, "learning_rate": 1.592216923879458e-05, "loss": 1.6707, "step": 2393 }, { "epoch": 0.31945556445156126, "grad_norm": 1.056183541401937, "learning_rate": 1.591868618744627e-05, "loss": 1.6245, "step": 2394 }, { "epoch": 0.31958900453696293, "grad_norm": 1.0460496279175218, "learning_rate": 1.591520203055476e-05, "loss": 1.6525, "step": 2395 }, { "epoch": 0.31972244462236454, "grad_norm": 1.168050816657518, "learning_rate": 1.5911716768770863e-05, "loss": 1.692, "step": 2396 }, { "epoch": 0.3198558847077662, "grad_norm": 1.014257361343356, "learning_rate": 1.5908230402745576e-05, "loss": 1.639, "step": 2397 }, { "epoch": 0.3199893247931679, "grad_norm": 0.9882262303186528, "learning_rate": 1.590474293313012e-05, "loss": 1.6204, "step": 2398 }, { "epoch": 0.3201227648785695, "grad_norm": 1.0647363395044251, "learning_rate": 1.5901254360575912e-05, "loss": 1.6276, "step": 2399 }, { "epoch": 0.32025620496397117, "grad_norm": 1.205341934103895, "learning_rate": 1.589776468573458e-05, "loss": 1.6697, "step": 2400 }, { "epoch": 0.32038964504937284, "grad_norm": 1.1796009355838823, "learning_rate": 1.589427390925795e-05, "loss": 1.6451, "step": 2401 }, { "epoch": 0.3205230851347745, "grad_norm": 1.238674085054047, "learning_rate": 1.5890782031798065e-05, "loss": 1.6161, "step": 2402 }, { "epoch": 0.3206565252201761, "grad_norm": 1.0448493530439331, "learning_rate": 1.5887289054007166e-05, "loss": 1.6487, "step": 2403 }, { "epoch": 0.3207899653055778, "grad_norm": 1.196197573510979, "learning_rate": 1.5883794976537697e-05, "loss": 1.6033, "step": 2404 }, { "epoch": 0.32092340539097947, "grad_norm": 1.0194877794457862, "learning_rate": 1.588029980004232e-05, "loss": 1.6694, "step": 2405 }, { "epoch": 0.3210568454763811, "grad_norm": 1.0318768025671652, "learning_rate": 1.5876803525173883e-05, "loss": 1.6659, "step": 2406 }, { "epoch": 0.32119028556178275, "grad_norm": 0.9865820243660075, "learning_rate": 1.5873306152585466e-05, "loss": 1.6008, "step": 2407 }, { "epoch": 0.3213237256471844, "grad_norm": 1.016542806768825, "learning_rate": 1.5869807682930327e-05, "loss": 1.5922, "step": 2408 }, { "epoch": 0.3214571657325861, "grad_norm": 6.608911252971496, "learning_rate": 1.5866308116861944e-05, "loss": 1.6503, "step": 2409 }, { "epoch": 0.3215906058179877, "grad_norm": 1.2676970243988273, "learning_rate": 1.586280745503399e-05, "loss": 1.6671, "step": 2410 }, { "epoch": 0.3217240459033894, "grad_norm": 1.1773866986931305, "learning_rate": 1.585930569810036e-05, "loss": 1.6256, "step": 2411 }, { "epoch": 0.32185748598879105, "grad_norm": 1.0205646117216987, "learning_rate": 1.5855802846715134e-05, "loss": 1.6661, "step": 2412 }, { "epoch": 0.32199092607419266, "grad_norm": 1.1132522973470225, "learning_rate": 1.5852298901532604e-05, "loss": 1.615, "step": 2413 }, { "epoch": 0.32212436615959433, "grad_norm": 1.0003679945512642, "learning_rate": 1.5848793863207276e-05, "loss": 1.6667, "step": 2414 }, { "epoch": 0.322257806244996, "grad_norm": 1.0445124912690658, "learning_rate": 1.5845287732393845e-05, "loss": 1.6084, "step": 2415 }, { "epoch": 0.3223912463303977, "grad_norm": 1.341548510183378, "learning_rate": 1.5841780509747213e-05, "loss": 1.5919, "step": 2416 }, { "epoch": 0.3225246864157993, "grad_norm": 0.9783216242963252, "learning_rate": 1.5838272195922492e-05, "loss": 1.6474, "step": 2417 }, { "epoch": 0.32265812650120096, "grad_norm": 1.0079878098801114, "learning_rate": 1.5834762791575e-05, "loss": 1.6694, "step": 2418 }, { "epoch": 0.32279156658660263, "grad_norm": 0.9777003006073635, "learning_rate": 1.5831252297360248e-05, "loss": 1.6087, "step": 2419 }, { "epoch": 0.32292500667200424, "grad_norm": 1.0210674211146504, "learning_rate": 1.5827740713933958e-05, "loss": 1.6225, "step": 2420 }, { "epoch": 0.3230584467574059, "grad_norm": 0.9767096638361283, "learning_rate": 1.5824228041952054e-05, "loss": 1.6115, "step": 2421 }, { "epoch": 0.3231918868428076, "grad_norm": 1.049229142974769, "learning_rate": 1.5820714282070663e-05, "loss": 1.586, "step": 2422 }, { "epoch": 0.32332532692820926, "grad_norm": 0.9905778604640099, "learning_rate": 1.5817199434946115e-05, "loss": 1.5858, "step": 2423 }, { "epoch": 0.32345876701361087, "grad_norm": 1.0227388841710876, "learning_rate": 1.5813683501234946e-05, "loss": 1.6233, "step": 2424 }, { "epoch": 0.32359220709901254, "grad_norm": 1.0857008064049787, "learning_rate": 1.581016648159389e-05, "loss": 1.6423, "step": 2425 }, { "epoch": 0.3237256471844142, "grad_norm": 1.1213206892388514, "learning_rate": 1.5806648376679885e-05, "loss": 1.6308, "step": 2426 }, { "epoch": 0.3238590872698158, "grad_norm": 1.0371863646742951, "learning_rate": 1.5803129187150074e-05, "loss": 1.613, "step": 2427 }, { "epoch": 0.3239925273552175, "grad_norm": 1.0285238928995435, "learning_rate": 1.57996089136618e-05, "loss": 1.632, "step": 2428 }, { "epoch": 0.32412596744061917, "grad_norm": 1.0053527521421413, "learning_rate": 1.5796087556872616e-05, "loss": 1.6295, "step": 2429 }, { "epoch": 0.32425940752602084, "grad_norm": 1.0593515172965136, "learning_rate": 1.5792565117440267e-05, "loss": 1.6714, "step": 2430 }, { "epoch": 0.32439284761142245, "grad_norm": 0.9962509724536518, "learning_rate": 1.5789041596022702e-05, "loss": 1.6014, "step": 2431 }, { "epoch": 0.3245262876968241, "grad_norm": 1.150755264452728, "learning_rate": 1.5785516993278082e-05, "loss": 1.568, "step": 2432 }, { "epoch": 0.3246597277822258, "grad_norm": 1.1482871392650946, "learning_rate": 1.578199130986476e-05, "loss": 1.6891, "step": 2433 }, { "epoch": 0.3247931678676274, "grad_norm": 1.0524081924283053, "learning_rate": 1.5778464546441283e-05, "loss": 1.6186, "step": 2434 }, { "epoch": 0.3249266079530291, "grad_norm": 0.9963103718861039, "learning_rate": 1.5774936703666423e-05, "loss": 1.6579, "step": 2435 }, { "epoch": 0.32506004803843075, "grad_norm": 1.034416639979626, "learning_rate": 1.5771407782199137e-05, "loss": 1.6743, "step": 2436 }, { "epoch": 0.3251934881238324, "grad_norm": 1.0233115564518966, "learning_rate": 1.5767877782698588e-05, "loss": 1.6388, "step": 2437 }, { "epoch": 0.32532692820923403, "grad_norm": 1.0203611371448205, "learning_rate": 1.576434670582414e-05, "loss": 1.6796, "step": 2438 }, { "epoch": 0.3254603682946357, "grad_norm": 1.1316243969954793, "learning_rate": 1.5760814552235353e-05, "loss": 1.6287, "step": 2439 }, { "epoch": 0.3255938083800374, "grad_norm": 2.773420421285832, "learning_rate": 1.5757281322592e-05, "loss": 1.7335, "step": 2440 }, { "epoch": 0.32572724846543905, "grad_norm": 0.9752268956267421, "learning_rate": 1.5753747017554043e-05, "loss": 1.6287, "step": 2441 }, { "epoch": 0.32586068855084066, "grad_norm": 1.1865730627498443, "learning_rate": 1.5750211637781654e-05, "loss": 1.6662, "step": 2442 }, { "epoch": 0.32599412863624233, "grad_norm": 1.066883641739535, "learning_rate": 1.57466751839352e-05, "loss": 1.6201, "step": 2443 }, { "epoch": 0.326127568721644, "grad_norm": 1.012400294015082, "learning_rate": 1.5743137656675248e-05, "loss": 1.6834, "step": 2444 }, { "epoch": 0.3262610088070456, "grad_norm": 1.2919053431818395, "learning_rate": 1.5739599056662572e-05, "loss": 1.63, "step": 2445 }, { "epoch": 0.3263944488924473, "grad_norm": 1.1008048203769303, "learning_rate": 1.5736059384558136e-05, "loss": 1.6535, "step": 2446 }, { "epoch": 0.32652788897784896, "grad_norm": 1.0198928027043783, "learning_rate": 1.573251864102312e-05, "loss": 1.6043, "step": 2447 }, { "epoch": 0.3266613290632506, "grad_norm": 1.255841958086851, "learning_rate": 1.5728976826718887e-05, "loss": 1.613, "step": 2448 }, { "epoch": 0.32679476914865224, "grad_norm": 1.1079219569085927, "learning_rate": 1.572543394230701e-05, "loss": 1.5904, "step": 2449 }, { "epoch": 0.3269282092340539, "grad_norm": 1.0252017128719715, "learning_rate": 1.5721889988449258e-05, "loss": 1.6363, "step": 2450 }, { "epoch": 0.3270616493194556, "grad_norm": 1.051784185647374, "learning_rate": 1.5718344965807602e-05, "loss": 1.611, "step": 2451 }, { "epoch": 0.3271950894048572, "grad_norm": 0.9834253613840619, "learning_rate": 1.5714798875044214e-05, "loss": 1.6072, "step": 2452 }, { "epoch": 0.32732852949025887, "grad_norm": 1.0314033695212004, "learning_rate": 1.5711251716821458e-05, "loss": 1.6627, "step": 2453 }, { "epoch": 0.32746196957566054, "grad_norm": 0.9745160928801103, "learning_rate": 1.570770349180191e-05, "loss": 1.6387, "step": 2454 }, { "epoch": 0.3275954096610622, "grad_norm": 1.0004048445912241, "learning_rate": 1.5704154200648328e-05, "loss": 1.616, "step": 2455 }, { "epoch": 0.3277288497464638, "grad_norm": 1.0107807331175838, "learning_rate": 1.5700603844023695e-05, "loss": 1.6356, "step": 2456 }, { "epoch": 0.3278622898318655, "grad_norm": 0.9520529144617134, "learning_rate": 1.5697052422591163e-05, "loss": 1.5897, "step": 2457 }, { "epoch": 0.32799572991726716, "grad_norm": 1.0058106372712747, "learning_rate": 1.56934999370141e-05, "loss": 1.6069, "step": 2458 }, { "epoch": 0.3281291700026688, "grad_norm": 1.0663435525855554, "learning_rate": 1.5689946387956075e-05, "loss": 1.7066, "step": 2459 }, { "epoch": 0.32826261008807045, "grad_norm": 0.9831844195678802, "learning_rate": 1.5686391776080844e-05, "loss": 1.5786, "step": 2460 }, { "epoch": 0.3283960501734721, "grad_norm": 1.0642774090185225, "learning_rate": 1.5682836102052376e-05, "loss": 1.6452, "step": 2461 }, { "epoch": 0.3285294902588738, "grad_norm": 1.037534479854548, "learning_rate": 1.5679279366534822e-05, "loss": 1.6082, "step": 2462 }, { "epoch": 0.3286629303442754, "grad_norm": 1.173992093363848, "learning_rate": 1.5675721570192545e-05, "loss": 1.6464, "step": 2463 }, { "epoch": 0.3287963704296771, "grad_norm": 1.0051707385512147, "learning_rate": 1.5672162713690098e-05, "loss": 1.6256, "step": 2464 }, { "epoch": 0.32892981051507875, "grad_norm": 1.0045689191856597, "learning_rate": 1.5668602797692238e-05, "loss": 1.629, "step": 2465 }, { "epoch": 0.32906325060048036, "grad_norm": 1.0240816684727387, "learning_rate": 1.566504182286391e-05, "loss": 1.6125, "step": 2466 }, { "epoch": 0.32919669068588203, "grad_norm": 0.9893049585917324, "learning_rate": 1.5661479789870277e-05, "loss": 1.6334, "step": 2467 }, { "epoch": 0.3293301307712837, "grad_norm": 1.0764320535215586, "learning_rate": 1.5657916699376668e-05, "loss": 1.6781, "step": 2468 }, { "epoch": 0.3294635708566854, "grad_norm": 1.2439578576993158, "learning_rate": 1.565435255204864e-05, "loss": 1.6416, "step": 2469 }, { "epoch": 0.329597010942087, "grad_norm": 1.006873672025213, "learning_rate": 1.5650787348551934e-05, "loss": 1.6524, "step": 2470 }, { "epoch": 0.32973045102748866, "grad_norm": 1.0804959807305459, "learning_rate": 1.564722108955249e-05, "loss": 1.6437, "step": 2471 }, { "epoch": 0.32986389111289033, "grad_norm": 1.1619045794374623, "learning_rate": 1.564365377571644e-05, "loss": 1.5983, "step": 2472 }, { "epoch": 0.32999733119829194, "grad_norm": 1.1240948384895437, "learning_rate": 1.5640085407710122e-05, "loss": 1.6569, "step": 2473 }, { "epoch": 0.3301307712836936, "grad_norm": 1.0470044546188255, "learning_rate": 1.5636515986200064e-05, "loss": 1.6384, "step": 2474 }, { "epoch": 0.3302642113690953, "grad_norm": 0.9851348342236802, "learning_rate": 1.563294551185299e-05, "loss": 1.6211, "step": 2475 }, { "epoch": 0.33039765145449695, "grad_norm": 1.0566689739549326, "learning_rate": 1.5629373985335836e-05, "loss": 1.6107, "step": 2476 }, { "epoch": 0.33053109153989857, "grad_norm": 1.2307836242301233, "learning_rate": 1.5625801407315706e-05, "loss": 1.6671, "step": 2477 }, { "epoch": 0.33066453162530024, "grad_norm": 1.1555471488035003, "learning_rate": 1.562222777845993e-05, "loss": 1.6066, "step": 2478 }, { "epoch": 0.3307979717107019, "grad_norm": 1.132011537007146, "learning_rate": 1.5618653099436015e-05, "loss": 1.6749, "step": 2479 }, { "epoch": 0.3309314117961035, "grad_norm": 1.0591256350110159, "learning_rate": 1.561507737091167e-05, "loss": 1.6399, "step": 2480 }, { "epoch": 0.3310648518815052, "grad_norm": 1.006730799707782, "learning_rate": 1.561150059355481e-05, "loss": 1.641, "step": 2481 }, { "epoch": 0.33119829196690687, "grad_norm": 1.114208556739606, "learning_rate": 1.560792276803352e-05, "loss": 1.645, "step": 2482 }, { "epoch": 0.33133173205230854, "grad_norm": 1.0266741767972964, "learning_rate": 1.5604343895016107e-05, "loss": 1.6305, "step": 2483 }, { "epoch": 0.33146517213771015, "grad_norm": 1.0172781397984123, "learning_rate": 1.5600763975171065e-05, "loss": 1.6619, "step": 2484 }, { "epoch": 0.3315986122231118, "grad_norm": 1.0013978329841358, "learning_rate": 1.5597183009167076e-05, "loss": 1.6364, "step": 2485 }, { "epoch": 0.3317320523085135, "grad_norm": 1.023294574063603, "learning_rate": 1.559360099767303e-05, "loss": 1.6665, "step": 2486 }, { "epoch": 0.3318654923939151, "grad_norm": 1.0000206147873272, "learning_rate": 1.5590017941358002e-05, "loss": 1.5405, "step": 2487 }, { "epoch": 0.3319989324793168, "grad_norm": 1.0226703188487969, "learning_rate": 1.5586433840891268e-05, "loss": 1.6402, "step": 2488 }, { "epoch": 0.33213237256471845, "grad_norm": 1.1484976146272843, "learning_rate": 1.5582848696942298e-05, "loss": 1.6451, "step": 2489 }, { "epoch": 0.3322658126501201, "grad_norm": 1.0151865246487197, "learning_rate": 1.557926251018075e-05, "loss": 1.6442, "step": 2490 }, { "epoch": 0.33239925273552173, "grad_norm": 1.0315722760396437, "learning_rate": 1.557567528127649e-05, "loss": 1.6478, "step": 2491 }, { "epoch": 0.3325326928209234, "grad_norm": 0.9943912244470389, "learning_rate": 1.557208701089957e-05, "loss": 1.622, "step": 2492 }, { "epoch": 0.3326661329063251, "grad_norm": 1.27101225719953, "learning_rate": 1.5568497699720237e-05, "loss": 1.619, "step": 2493 }, { "epoch": 0.3327995729917267, "grad_norm": 1.0729429345846575, "learning_rate": 1.556490734840893e-05, "loss": 1.6505, "step": 2494 }, { "epoch": 0.33293301307712836, "grad_norm": 1.1996428230299965, "learning_rate": 1.5561315957636294e-05, "loss": 1.6437, "step": 2495 }, { "epoch": 0.33306645316253003, "grad_norm": 1.019452040571454, "learning_rate": 1.555772352807315e-05, "loss": 1.5922, "step": 2496 }, { "epoch": 0.3331998932479317, "grad_norm": 0.9966197268098483, "learning_rate": 1.5554130060390535e-05, "loss": 1.6469, "step": 2497 }, { "epoch": 0.3333333333333333, "grad_norm": 0.9924997296677041, "learning_rate": 1.5550535555259654e-05, "loss": 1.6905, "step": 2498 }, { "epoch": 0.333466773418735, "grad_norm": 1.0222682047094578, "learning_rate": 1.554694001335193e-05, "loss": 1.6463, "step": 2499 }, { "epoch": 0.33360021350413666, "grad_norm": 0.9803234389294245, "learning_rate": 1.5543343435338965e-05, "loss": 1.6299, "step": 2500 }, { "epoch": 0.33373365358953827, "grad_norm": 1.1178595006838485, "learning_rate": 1.5539745821892558e-05, "loss": 1.5962, "step": 2501 }, { "epoch": 0.33386709367493994, "grad_norm": 1.2903970617621783, "learning_rate": 1.5536147173684706e-05, "loss": 1.6377, "step": 2502 }, { "epoch": 0.3340005337603416, "grad_norm": 0.9991404310713827, "learning_rate": 1.553254749138759e-05, "loss": 1.676, "step": 2503 }, { "epoch": 0.3341339738457433, "grad_norm": 1.0017240659943722, "learning_rate": 1.55289467756736e-05, "loss": 1.6382, "step": 2504 }, { "epoch": 0.3342674139311449, "grad_norm": 1.0389840759263576, "learning_rate": 1.5525345027215296e-05, "loss": 1.6566, "step": 2505 }, { "epoch": 0.33440085401654657, "grad_norm": 1.1995901930820982, "learning_rate": 1.552174224668545e-05, "loss": 1.586, "step": 2506 }, { "epoch": 0.33453429410194824, "grad_norm": 1.0567962769597756, "learning_rate": 1.551813843475702e-05, "loss": 1.6554, "step": 2507 }, { "epoch": 0.33466773418734985, "grad_norm": 1.005841753947666, "learning_rate": 1.5514533592103154e-05, "loss": 1.625, "step": 2508 }, { "epoch": 0.3348011742727515, "grad_norm": 0.9857509429122189, "learning_rate": 1.5510927719397203e-05, "loss": 1.6568, "step": 2509 }, { "epoch": 0.3349346143581532, "grad_norm": 1.0001185366594785, "learning_rate": 1.55073208173127e-05, "loss": 1.6372, "step": 2510 }, { "epoch": 0.33506805444355486, "grad_norm": 1.015290456272018, "learning_rate": 1.5503712886523366e-05, "loss": 1.6635, "step": 2511 }, { "epoch": 0.3352014945289565, "grad_norm": 1.010515707552436, "learning_rate": 1.5500103927703128e-05, "loss": 1.5674, "step": 2512 }, { "epoch": 0.33533493461435815, "grad_norm": 1.2221140435977949, "learning_rate": 1.54964939415261e-05, "loss": 1.6435, "step": 2513 }, { "epoch": 0.3354683746997598, "grad_norm": 1.0752443571797692, "learning_rate": 1.549288292866658e-05, "loss": 1.6192, "step": 2514 }, { "epoch": 0.3356018147851615, "grad_norm": 0.9988521065908124, "learning_rate": 1.5489270889799074e-05, "loss": 1.6454, "step": 2515 }, { "epoch": 0.3357352548705631, "grad_norm": 1.386021157462588, "learning_rate": 1.548565782559826e-05, "loss": 1.6567, "step": 2516 }, { "epoch": 0.3358686949559648, "grad_norm": 1.2499411923947341, "learning_rate": 1.5482043736739018e-05, "loss": 1.65, "step": 2517 }, { "epoch": 0.33600213504136645, "grad_norm": 1.040398945045627, "learning_rate": 1.5478428623896423e-05, "loss": 1.6505, "step": 2518 }, { "epoch": 0.33613557512676806, "grad_norm": 1.0467166314907603, "learning_rate": 1.5474812487745738e-05, "loss": 1.6145, "step": 2519 }, { "epoch": 0.33626901521216973, "grad_norm": 0.9735753640039505, "learning_rate": 1.547119532896241e-05, "loss": 1.5752, "step": 2520 }, { "epoch": 0.3364024552975714, "grad_norm": 1.1331650065762788, "learning_rate": 1.5467577148222087e-05, "loss": 1.5787, "step": 2521 }, { "epoch": 0.33653589538297307, "grad_norm": 1.0303627504262778, "learning_rate": 1.5463957946200606e-05, "loss": 1.5802, "step": 2522 }, { "epoch": 0.3366693354683747, "grad_norm": 1.0155983478918824, "learning_rate": 1.5460337723573985e-05, "loss": 1.6243, "step": 2523 }, { "epoch": 0.33680277555377636, "grad_norm": 1.0289284948725979, "learning_rate": 1.545671648101845e-05, "loss": 1.658, "step": 2524 }, { "epoch": 0.336936215639178, "grad_norm": 1.2059550726829327, "learning_rate": 1.5453094219210402e-05, "loss": 1.6115, "step": 2525 }, { "epoch": 0.33706965572457964, "grad_norm": 1.0560606404340085, "learning_rate": 1.544947093882644e-05, "loss": 1.6066, "step": 2526 }, { "epoch": 0.3372030958099813, "grad_norm": 1.1249026083140772, "learning_rate": 1.5445846640543353e-05, "loss": 1.6824, "step": 2527 }, { "epoch": 0.337336535895383, "grad_norm": 1.0524987361668996, "learning_rate": 1.5442221325038113e-05, "loss": 1.6226, "step": 2528 }, { "epoch": 0.33746997598078465, "grad_norm": 1.1006613854032747, "learning_rate": 1.5438594992987893e-05, "loss": 1.6257, "step": 2529 }, { "epoch": 0.33760341606618627, "grad_norm": 1.1312957489575692, "learning_rate": 1.543496764507005e-05, "loss": 1.6335, "step": 2530 }, { "epoch": 0.33773685615158794, "grad_norm": 1.0553318381697188, "learning_rate": 1.543133928196213e-05, "loss": 1.6227, "step": 2531 }, { "epoch": 0.3378702962369896, "grad_norm": 1.1640026808016215, "learning_rate": 1.542770990434187e-05, "loss": 1.615, "step": 2532 }, { "epoch": 0.3380037363223912, "grad_norm": 1.1330496348678385, "learning_rate": 1.5424079512887197e-05, "loss": 1.6289, "step": 2533 }, { "epoch": 0.3381371764077929, "grad_norm": 1.235500382588085, "learning_rate": 1.5420448108276225e-05, "loss": 1.6293, "step": 2534 }, { "epoch": 0.33827061649319456, "grad_norm": 0.9892442737683309, "learning_rate": 1.5416815691187263e-05, "loss": 1.6451, "step": 2535 }, { "epoch": 0.33840405657859624, "grad_norm": 1.0714941246892575, "learning_rate": 1.54131822622988e-05, "loss": 1.6383, "step": 2536 }, { "epoch": 0.33853749666399785, "grad_norm": 0.9765304919004115, "learning_rate": 1.5409547822289523e-05, "loss": 1.6106, "step": 2537 }, { "epoch": 0.3386709367493995, "grad_norm": 1.1397242313512768, "learning_rate": 1.5405912371838303e-05, "loss": 1.6759, "step": 2538 }, { "epoch": 0.3388043768348012, "grad_norm": 1.04733183839702, "learning_rate": 1.5402275911624202e-05, "loss": 1.6065, "step": 2539 }, { "epoch": 0.3389378169202028, "grad_norm": 1.0923000163302528, "learning_rate": 1.5398638442326466e-05, "loss": 1.5728, "step": 2540 }, { "epoch": 0.3390712570056045, "grad_norm": 1.012586965117265, "learning_rate": 1.5394999964624534e-05, "loss": 1.6695, "step": 2541 }, { "epoch": 0.33920469709100615, "grad_norm": 1.2400537139148242, "learning_rate": 1.5391360479198035e-05, "loss": 1.6388, "step": 2542 }, { "epoch": 0.3393381371764078, "grad_norm": 1.2509378456829192, "learning_rate": 1.538771998672678e-05, "loss": 1.596, "step": 2543 }, { "epoch": 0.33947157726180943, "grad_norm": 1.075544011557188, "learning_rate": 1.538407848789077e-05, "loss": 1.5977, "step": 2544 }, { "epoch": 0.3396050173472111, "grad_norm": 1.2176294488296542, "learning_rate": 1.5380435983370207e-05, "loss": 1.6392, "step": 2545 }, { "epoch": 0.3397384574326128, "grad_norm": 1.3397975236283604, "learning_rate": 1.5376792473845453e-05, "loss": 1.6818, "step": 2546 }, { "epoch": 0.3398718975180144, "grad_norm": 1.046773813890146, "learning_rate": 1.5373147959997085e-05, "loss": 1.6099, "step": 2547 }, { "epoch": 0.34000533760341606, "grad_norm": 1.0648283430543095, "learning_rate": 1.5369502442505852e-05, "loss": 1.6449, "step": 2548 }, { "epoch": 0.34013877768881773, "grad_norm": 1.1528358481625516, "learning_rate": 1.53658559220527e-05, "loss": 1.6178, "step": 2549 }, { "epoch": 0.3402722177742194, "grad_norm": 1.230719491103403, "learning_rate": 1.5362208399318753e-05, "loss": 1.6431, "step": 2550 }, { "epoch": 0.340405657859621, "grad_norm": 0.9911045985776983, "learning_rate": 1.5358559874985326e-05, "loss": 1.6593, "step": 2551 }, { "epoch": 0.3405390979450227, "grad_norm": 1.0148399749816694, "learning_rate": 1.5354910349733926e-05, "loss": 1.6305, "step": 2552 }, { "epoch": 0.34067253803042435, "grad_norm": 0.9757587532893943, "learning_rate": 1.5351259824246244e-05, "loss": 1.6463, "step": 2553 }, { "epoch": 0.34080597811582597, "grad_norm": 1.0377383659545665, "learning_rate": 1.5347608299204147e-05, "loss": 1.5969, "step": 2554 }, { "epoch": 0.34093941820122764, "grad_norm": 1.1327301101792895, "learning_rate": 1.5343955775289708e-05, "loss": 1.5886, "step": 2555 }, { "epoch": 0.3410728582866293, "grad_norm": 1.1199829427800625, "learning_rate": 1.534030225318517e-05, "loss": 1.6568, "step": 2556 }, { "epoch": 0.341206298372031, "grad_norm": 1.1255501040237739, "learning_rate": 1.533664773357298e-05, "loss": 1.6195, "step": 2557 }, { "epoch": 0.3413397384574326, "grad_norm": 0.9751153526994987, "learning_rate": 1.5332992217135746e-05, "loss": 1.6075, "step": 2558 }, { "epoch": 0.34147317854283427, "grad_norm": 1.0250661162211099, "learning_rate": 1.5329335704556284e-05, "loss": 1.6398, "step": 2559 }, { "epoch": 0.34160661862823594, "grad_norm": 1.1963912340246656, "learning_rate": 1.532567819651759e-05, "loss": 1.6364, "step": 2560 }, { "epoch": 0.34174005871363755, "grad_norm": 1.0270905478988817, "learning_rate": 1.532201969370285e-05, "loss": 1.6027, "step": 2561 }, { "epoch": 0.3418734987990392, "grad_norm": 0.9716309113456719, "learning_rate": 1.531836019679542e-05, "loss": 1.6109, "step": 2562 }, { "epoch": 0.3420069388844409, "grad_norm": 1.0974203593959186, "learning_rate": 1.5314699706478856e-05, "loss": 1.6531, "step": 2563 }, { "epoch": 0.34214037896984256, "grad_norm": 1.1280911638633904, "learning_rate": 1.53110382234369e-05, "loss": 1.6526, "step": 2564 }, { "epoch": 0.3422738190552442, "grad_norm": 1.0352666364430376, "learning_rate": 1.530737574835347e-05, "loss": 1.638, "step": 2565 }, { "epoch": 0.34240725914064585, "grad_norm": 0.9852011126418923, "learning_rate": 1.530371228191268e-05, "loss": 1.629, "step": 2566 }, { "epoch": 0.3425406992260475, "grad_norm": 1.7874095738872313, "learning_rate": 1.5300047824798815e-05, "loss": 1.6322, "step": 2567 }, { "epoch": 0.34267413931144913, "grad_norm": 1.068401710484991, "learning_rate": 1.5296382377696364e-05, "loss": 1.6951, "step": 2568 }, { "epoch": 0.3428075793968508, "grad_norm": 0.9922432887897237, "learning_rate": 1.5292715941289985e-05, "loss": 1.6335, "step": 2569 }, { "epoch": 0.3429410194822525, "grad_norm": 0.9469476131303167, "learning_rate": 1.528904851626453e-05, "loss": 1.5556, "step": 2570 }, { "epoch": 0.34307445956765414, "grad_norm": 1.0252957676256522, "learning_rate": 1.528538010330503e-05, "loss": 1.6207, "step": 2571 }, { "epoch": 0.34320789965305576, "grad_norm": 1.256901118189651, "learning_rate": 1.52817107030967e-05, "loss": 1.5499, "step": 2572 }, { "epoch": 0.34334133973845743, "grad_norm": 1.1760660704565096, "learning_rate": 1.5278040316324947e-05, "loss": 1.5907, "step": 2573 }, { "epoch": 0.3434747798238591, "grad_norm": 1.056335970772178, "learning_rate": 1.5274368943675358e-05, "loss": 1.6585, "step": 2574 }, { "epoch": 0.3436082199092607, "grad_norm": 1.0440163675723362, "learning_rate": 1.5270696585833697e-05, "loss": 1.6956, "step": 2575 }, { "epoch": 0.3437416599946624, "grad_norm": 1.1496273753007886, "learning_rate": 1.5267023243485927e-05, "loss": 1.626, "step": 2576 }, { "epoch": 0.34387510008006406, "grad_norm": 0.989190837912653, "learning_rate": 1.526334891731818e-05, "loss": 1.5821, "step": 2577 }, { "epoch": 0.3440085401654657, "grad_norm": 0.975382429954711, "learning_rate": 1.5259673608016782e-05, "loss": 1.6546, "step": 2578 }, { "epoch": 0.34414198025086734, "grad_norm": 1.2380666712332904, "learning_rate": 1.5255997316268234e-05, "loss": 1.6596, "step": 2579 }, { "epoch": 0.344275420336269, "grad_norm": 1.0226269070252, "learning_rate": 1.5252320042759232e-05, "loss": 1.6045, "step": 2580 }, { "epoch": 0.3444088604216707, "grad_norm": 0.9919122022483479, "learning_rate": 1.5248641788176647e-05, "loss": 1.5316, "step": 2581 }, { "epoch": 0.34454230050707235, "grad_norm": 1.1347269775006643, "learning_rate": 1.5244962553207531e-05, "loss": 1.6384, "step": 2582 }, { "epoch": 0.34467574059247397, "grad_norm": 0.9828544036878673, "learning_rate": 1.524128233853913e-05, "loss": 1.6399, "step": 2583 }, { "epoch": 0.34480918067787564, "grad_norm": 1.3101691050309692, "learning_rate": 1.523760114485886e-05, "loss": 1.6285, "step": 2584 }, { "epoch": 0.3449426207632773, "grad_norm": 1.0927621397780132, "learning_rate": 1.5233918972854329e-05, "loss": 1.5724, "step": 2585 }, { "epoch": 0.3450760608486789, "grad_norm": 1.0744584089585294, "learning_rate": 1.5230235823213325e-05, "loss": 1.5861, "step": 2586 }, { "epoch": 0.3452095009340806, "grad_norm": 1.1708529579248028, "learning_rate": 1.5226551696623815e-05, "loss": 1.6419, "step": 2587 }, { "epoch": 0.34534294101948226, "grad_norm": 1.170484702302893, "learning_rate": 1.5222866593773955e-05, "loss": 1.6793, "step": 2588 }, { "epoch": 0.34547638110488393, "grad_norm": 0.9940131617224933, "learning_rate": 1.521918051535208e-05, "loss": 1.6196, "step": 2589 }, { "epoch": 0.34560982119028555, "grad_norm": 1.0291494485832964, "learning_rate": 1.5215493462046711e-05, "loss": 1.6436, "step": 2590 }, { "epoch": 0.3457432612756872, "grad_norm": 1.1772938675721651, "learning_rate": 1.5211805434546541e-05, "loss": 1.5946, "step": 2591 }, { "epoch": 0.3458767013610889, "grad_norm": 1.3230747393201525, "learning_rate": 1.5208116433540454e-05, "loss": 1.6298, "step": 2592 }, { "epoch": 0.3460101414464905, "grad_norm": 1.0683806522297445, "learning_rate": 1.5204426459717516e-05, "loss": 1.6174, "step": 2593 }, { "epoch": 0.3461435815318922, "grad_norm": 1.1046905551827517, "learning_rate": 1.520073551376697e-05, "loss": 1.6434, "step": 2594 }, { "epoch": 0.34627702161729385, "grad_norm": 1.070450277819012, "learning_rate": 1.519704359637824e-05, "loss": 1.6019, "step": 2595 }, { "epoch": 0.3464104617026955, "grad_norm": 1.3058537795961895, "learning_rate": 1.519335070824094e-05, "loss": 1.6049, "step": 2596 }, { "epoch": 0.34654390178809713, "grad_norm": 1.0080854625304823, "learning_rate": 1.5189656850044853e-05, "loss": 1.6254, "step": 2597 }, { "epoch": 0.3466773418734988, "grad_norm": 1.2017908924230127, "learning_rate": 1.5185962022479954e-05, "loss": 1.6323, "step": 2598 }, { "epoch": 0.34681078195890047, "grad_norm": 1.0124260951271844, "learning_rate": 1.5182266226236394e-05, "loss": 1.6621, "step": 2599 }, { "epoch": 0.3469442220443021, "grad_norm": 1.2316810096284962, "learning_rate": 1.5178569462004506e-05, "loss": 1.5827, "step": 2600 }, { "epoch": 0.34707766212970376, "grad_norm": 1.0242976500030199, "learning_rate": 1.51748717304748e-05, "loss": 1.6034, "step": 2601 }, { "epoch": 0.3472111022151054, "grad_norm": 1.0030046362345064, "learning_rate": 1.5171173032337976e-05, "loss": 1.654, "step": 2602 }, { "epoch": 0.3473445423005071, "grad_norm": 1.0713133664664767, "learning_rate": 1.5167473368284902e-05, "loss": 1.6352, "step": 2603 }, { "epoch": 0.3474779823859087, "grad_norm": 1.0969747552844828, "learning_rate": 1.516377273900664e-05, "loss": 1.6475, "step": 2604 }, { "epoch": 0.3476114224713104, "grad_norm": 0.9899374388035455, "learning_rate": 1.5160071145194422e-05, "loss": 1.6286, "step": 2605 }, { "epoch": 0.34774486255671205, "grad_norm": 0.9679955249669937, "learning_rate": 1.515636858753966e-05, "loss": 1.6383, "step": 2606 }, { "epoch": 0.34787830264211367, "grad_norm": 1.0249461107418476, "learning_rate": 1.5152665066733957e-05, "loss": 1.6083, "step": 2607 }, { "epoch": 0.34801174272751534, "grad_norm": 1.303220194256495, "learning_rate": 1.5148960583469085e-05, "loss": 1.6688, "step": 2608 }, { "epoch": 0.348145182812917, "grad_norm": 1.0188653868990143, "learning_rate": 1.5145255138436999e-05, "loss": 1.6288, "step": 2609 }, { "epoch": 0.3482786228983187, "grad_norm": 1.2584157965822476, "learning_rate": 1.5141548732329831e-05, "loss": 1.6492, "step": 2610 }, { "epoch": 0.3484120629837203, "grad_norm": 1.0306474809508248, "learning_rate": 1.5137841365839901e-05, "loss": 1.6854, "step": 2611 }, { "epoch": 0.34854550306912196, "grad_norm": 1.056381467093589, "learning_rate": 1.51341330396597e-05, "loss": 1.6271, "step": 2612 }, { "epoch": 0.34867894315452364, "grad_norm": 1.0295530745478738, "learning_rate": 1.51304237544819e-05, "loss": 1.6633, "step": 2613 }, { "epoch": 0.34881238323992525, "grad_norm": 1.0398915950232068, "learning_rate": 1.5126713510999359e-05, "loss": 1.627, "step": 2614 }, { "epoch": 0.3489458233253269, "grad_norm": 1.0146998246386771, "learning_rate": 1.51230023099051e-05, "loss": 1.627, "step": 2615 }, { "epoch": 0.3490792634107286, "grad_norm": 1.0172109821324007, "learning_rate": 1.5119290151892339e-05, "loss": 1.5836, "step": 2616 }, { "epoch": 0.34921270349613026, "grad_norm": 1.0267520201741256, "learning_rate": 1.5115577037654464e-05, "loss": 1.6439, "step": 2617 }, { "epoch": 0.3493461435815319, "grad_norm": 1.0457857231337446, "learning_rate": 1.5111862967885036e-05, "loss": 1.6157, "step": 2618 }, { "epoch": 0.34947958366693355, "grad_norm": 1.0555824876624866, "learning_rate": 1.5108147943277813e-05, "loss": 1.5927, "step": 2619 }, { "epoch": 0.3496130237523352, "grad_norm": 0.9824068117065244, "learning_rate": 1.5104431964526707e-05, "loss": 1.6322, "step": 2620 }, { "epoch": 0.34974646383773683, "grad_norm": 1.0882477650288422, "learning_rate": 1.510071503232583e-05, "loss": 1.5678, "step": 2621 }, { "epoch": 0.3498799039231385, "grad_norm": 0.966848710204604, "learning_rate": 1.5096997147369457e-05, "loss": 1.6656, "step": 2622 }, { "epoch": 0.3500133440085402, "grad_norm": 0.9955616430245504, "learning_rate": 1.509327831035205e-05, "loss": 1.6429, "step": 2623 }, { "epoch": 0.35014678409394184, "grad_norm": 1.5091711649173347, "learning_rate": 1.5089558521968242e-05, "loss": 1.7588, "step": 2624 }, { "epoch": 0.35028022417934346, "grad_norm": 0.9707332773761684, "learning_rate": 1.5085837782912851e-05, "loss": 1.6116, "step": 2625 }, { "epoch": 0.35041366426474513, "grad_norm": 0.9642328468057494, "learning_rate": 1.5082116093880862e-05, "loss": 1.6222, "step": 2626 }, { "epoch": 0.3505471043501468, "grad_norm": 1.0194989516497073, "learning_rate": 1.5078393455567452e-05, "loss": 1.6335, "step": 2627 }, { "epoch": 0.3506805444355484, "grad_norm": 0.9709628625829926, "learning_rate": 1.5074669868667964e-05, "loss": 1.6335, "step": 2628 }, { "epoch": 0.3508139845209501, "grad_norm": 1.0095163214896312, "learning_rate": 1.5070945333877923e-05, "loss": 1.6442, "step": 2629 }, { "epoch": 0.35094742460635175, "grad_norm": 1.1931846380684248, "learning_rate": 1.506721985189303e-05, "loss": 1.638, "step": 2630 }, { "epoch": 0.3510808646917534, "grad_norm": 0.9839265090005194, "learning_rate": 1.5063493423409154e-05, "loss": 1.6196, "step": 2631 }, { "epoch": 0.35121430477715504, "grad_norm": 1.1186506149798139, "learning_rate": 1.5059766049122364e-05, "loss": 1.6225, "step": 2632 }, { "epoch": 0.3513477448625567, "grad_norm": 1.0864720980811629, "learning_rate": 1.5056037729728878e-05, "loss": 1.5935, "step": 2633 }, { "epoch": 0.3514811849479584, "grad_norm": 1.2246349786822723, "learning_rate": 1.5052308465925115e-05, "loss": 1.5767, "step": 2634 }, { "epoch": 0.35161462503336, "grad_norm": 1.0333745712542706, "learning_rate": 1.5048578258407646e-05, "loss": 1.6637, "step": 2635 }, { "epoch": 0.35174806511876167, "grad_norm": 1.0018496493629654, "learning_rate": 1.5044847107873246e-05, "loss": 1.6278, "step": 2636 }, { "epoch": 0.35188150520416334, "grad_norm": 1.1758224822257188, "learning_rate": 1.5041115015018841e-05, "loss": 1.6303, "step": 2637 }, { "epoch": 0.352014945289565, "grad_norm": 1.250987136440272, "learning_rate": 1.5037381980541546e-05, "loss": 1.6571, "step": 2638 }, { "epoch": 0.3521483853749666, "grad_norm": 1.2181097297996704, "learning_rate": 1.503364800513865e-05, "loss": 1.704, "step": 2639 }, { "epoch": 0.3522818254603683, "grad_norm": 1.0786935661772743, "learning_rate": 1.5029913089507617e-05, "loss": 1.5949, "step": 2640 }, { "epoch": 0.35241526554576996, "grad_norm": 1.0531753164421735, "learning_rate": 1.5026177234346087e-05, "loss": 1.6373, "step": 2641 }, { "epoch": 0.3525487056311716, "grad_norm": 1.3629803662802447, "learning_rate": 1.5022440440351873e-05, "loss": 1.6762, "step": 2642 }, { "epoch": 0.35268214571657325, "grad_norm": 1.1620679753807144, "learning_rate": 1.501870270822297e-05, "loss": 1.6133, "step": 2643 }, { "epoch": 0.3528155858019749, "grad_norm": 1.0348126543518374, "learning_rate": 1.5014964038657538e-05, "loss": 1.654, "step": 2644 }, { "epoch": 0.3529490258873766, "grad_norm": 1.1304213137830432, "learning_rate": 1.5011224432353924e-05, "loss": 1.6386, "step": 2645 }, { "epoch": 0.3530824659727782, "grad_norm": 1.1277308613925912, "learning_rate": 1.5007483890010642e-05, "loss": 1.6194, "step": 2646 }, { "epoch": 0.3532159060581799, "grad_norm": 1.0163090940302686, "learning_rate": 1.5003742412326377e-05, "loss": 1.6575, "step": 2647 }, { "epoch": 0.35334934614358154, "grad_norm": 1.0125879795963375, "learning_rate": 1.5000000000000002e-05, "loss": 1.6453, "step": 2648 }, { "epoch": 0.35348278622898316, "grad_norm": 1.0098185677548541, "learning_rate": 1.4996256653730554e-05, "loss": 1.6068, "step": 2649 }, { "epoch": 0.35361622631438483, "grad_norm": 1.2735854212695745, "learning_rate": 1.4992512374217247e-05, "loss": 1.6487, "step": 2650 }, { "epoch": 0.3537496663997865, "grad_norm": 1.0932430437817762, "learning_rate": 1.498876716215947e-05, "loss": 1.6284, "step": 2651 }, { "epoch": 0.35388310648518817, "grad_norm": 0.9777781439946768, "learning_rate": 1.4985021018256786e-05, "loss": 1.625, "step": 2652 }, { "epoch": 0.3540165465705898, "grad_norm": 1.1136916517793387, "learning_rate": 1.4981273943208937e-05, "loss": 1.6708, "step": 2653 }, { "epoch": 0.35414998665599146, "grad_norm": 1.373624233594401, "learning_rate": 1.4977525937715824e-05, "loss": 1.6216, "step": 2654 }, { "epoch": 0.3542834267413931, "grad_norm": 1.0595448685922348, "learning_rate": 1.4973777002477538e-05, "loss": 1.5722, "step": 2655 }, { "epoch": 0.3544168668267948, "grad_norm": 1.2378751910850914, "learning_rate": 1.4970027138194336e-05, "loss": 1.6311, "step": 2656 }, { "epoch": 0.3545503069121964, "grad_norm": 1.1786205067232547, "learning_rate": 1.4966276345566654e-05, "loss": 1.6456, "step": 2657 }, { "epoch": 0.3546837469975981, "grad_norm": 1.0572423131869906, "learning_rate": 1.4962524625295092e-05, "loss": 1.6392, "step": 2658 }, { "epoch": 0.35481718708299975, "grad_norm": 1.0148567443614354, "learning_rate": 1.495877197808043e-05, "loss": 1.6173, "step": 2659 }, { "epoch": 0.35495062716840137, "grad_norm": 0.9578348594491616, "learning_rate": 1.4955018404623623e-05, "loss": 1.628, "step": 2660 }, { "epoch": 0.35508406725380304, "grad_norm": 1.1181766176664125, "learning_rate": 1.4951263905625788e-05, "loss": 1.5787, "step": 2661 }, { "epoch": 0.3552175073392047, "grad_norm": 1.1415697297087068, "learning_rate": 1.4947508481788231e-05, "loss": 1.5782, "step": 2662 }, { "epoch": 0.3553509474246064, "grad_norm": 0.9866478663748992, "learning_rate": 1.494375213381242e-05, "loss": 1.5652, "step": 2663 }, { "epoch": 0.355484387510008, "grad_norm": 1.2168790215873122, "learning_rate": 1.4939994862399996e-05, "loss": 1.6349, "step": 2664 }, { "epoch": 0.35561782759540966, "grad_norm": 1.0419160584710712, "learning_rate": 1.4936236668252772e-05, "loss": 1.625, "step": 2665 }, { "epoch": 0.35575126768081133, "grad_norm": 1.0033295412035523, "learning_rate": 1.4932477552072745e-05, "loss": 1.6232, "step": 2666 }, { "epoch": 0.35588470776621295, "grad_norm": 0.9869871754447705, "learning_rate": 1.4928717514562066e-05, "loss": 1.5835, "step": 2667 }, { "epoch": 0.3560181478516146, "grad_norm": 0.992408467300245, "learning_rate": 1.4924956556423072e-05, "loss": 1.603, "step": 2668 }, { "epoch": 0.3561515879370163, "grad_norm": 0.9824615329438933, "learning_rate": 1.4921194678358266e-05, "loss": 1.5976, "step": 2669 }, { "epoch": 0.35628502802241796, "grad_norm": 0.9934973089076394, "learning_rate": 1.4917431881070323e-05, "loss": 1.595, "step": 2670 }, { "epoch": 0.3564184681078196, "grad_norm": 0.9868126452811748, "learning_rate": 1.4913668165262095e-05, "loss": 1.6287, "step": 2671 }, { "epoch": 0.35655190819322125, "grad_norm": 1.0146369534155588, "learning_rate": 1.4909903531636593e-05, "loss": 1.5831, "step": 2672 }, { "epoch": 0.3566853482786229, "grad_norm": 1.0128166092521045, "learning_rate": 1.4906137980897017e-05, "loss": 1.6241, "step": 2673 }, { "epoch": 0.35681878836402453, "grad_norm": 1.0086684442228309, "learning_rate": 1.4902371513746723e-05, "loss": 1.6162, "step": 2674 }, { "epoch": 0.3569522284494262, "grad_norm": 1.0430890218619906, "learning_rate": 1.4898604130889243e-05, "loss": 1.6444, "step": 2675 }, { "epoch": 0.35708566853482787, "grad_norm": 0.9873271173986554, "learning_rate": 1.489483583302829e-05, "loss": 1.6058, "step": 2676 }, { "epoch": 0.35721910862022954, "grad_norm": 0.9633834898755523, "learning_rate": 1.4891066620867726e-05, "loss": 1.5722, "step": 2677 }, { "epoch": 0.35735254870563116, "grad_norm": 1.0004128851077128, "learning_rate": 1.4887296495111611e-05, "loss": 1.6337, "step": 2678 }, { "epoch": 0.3574859887910328, "grad_norm": 1.1889045283167872, "learning_rate": 1.4883525456464152e-05, "loss": 1.5775, "step": 2679 }, { "epoch": 0.3576194288764345, "grad_norm": 2.5683847190418736, "learning_rate": 1.4879753505629739e-05, "loss": 1.6822, "step": 2680 }, { "epoch": 0.3577528689618361, "grad_norm": 1.0263603075923868, "learning_rate": 1.4875980643312931e-05, "loss": 1.6556, "step": 2681 }, { "epoch": 0.3578863090472378, "grad_norm": 1.0275465262931753, "learning_rate": 1.4872206870218451e-05, "loss": 1.6586, "step": 2682 }, { "epoch": 0.35801974913263945, "grad_norm": 1.0578558588998055, "learning_rate": 1.4868432187051201e-05, "loss": 1.6126, "step": 2683 }, { "epoch": 0.3581531892180411, "grad_norm": 1.0209797685600523, "learning_rate": 1.4864656594516245e-05, "loss": 1.6051, "step": 2684 }, { "epoch": 0.35828662930344274, "grad_norm": 1.0698279049277513, "learning_rate": 1.4860880093318827e-05, "loss": 1.6162, "step": 2685 }, { "epoch": 0.3584200693888444, "grad_norm": 1.0175812497772847, "learning_rate": 1.4857102684164349e-05, "loss": 1.6055, "step": 2686 }, { "epoch": 0.3585535094742461, "grad_norm": 1.0104871621067544, "learning_rate": 1.485332436775839e-05, "loss": 1.6041, "step": 2687 }, { "epoch": 0.3586869495596477, "grad_norm": 1.1287495893065134, "learning_rate": 1.4849545144806697e-05, "loss": 1.603, "step": 2688 }, { "epoch": 0.35882038964504936, "grad_norm": 1.0368374324423373, "learning_rate": 1.4845765016015183e-05, "loss": 1.6295, "step": 2689 }, { "epoch": 0.35895382973045104, "grad_norm": 1.0590887497081614, "learning_rate": 1.4841983982089936e-05, "loss": 1.6473, "step": 2690 }, { "epoch": 0.3590872698158527, "grad_norm": 1.1066849585601135, "learning_rate": 1.4838202043737209e-05, "loss": 1.6567, "step": 2691 }, { "epoch": 0.3592207099012543, "grad_norm": 1.018947820472309, "learning_rate": 1.4834419201663429e-05, "loss": 1.6562, "step": 2692 }, { "epoch": 0.359354149986656, "grad_norm": 1.128393294375381, "learning_rate": 1.483063545657518e-05, "loss": 1.6634, "step": 2693 }, { "epoch": 0.35948759007205766, "grad_norm": 1.0975481758375052, "learning_rate": 1.4826850809179228e-05, "loss": 1.6274, "step": 2694 }, { "epoch": 0.3596210301574593, "grad_norm": 1.1035342439097737, "learning_rate": 1.4823065260182499e-05, "loss": 1.6199, "step": 2695 }, { "epoch": 0.35975447024286095, "grad_norm": 1.0823826339805787, "learning_rate": 1.4819278810292096e-05, "loss": 1.5614, "step": 2696 }, { "epoch": 0.3598879103282626, "grad_norm": 1.0810219041790743, "learning_rate": 1.4815491460215277e-05, "loss": 1.6457, "step": 2697 }, { "epoch": 0.3600213504136643, "grad_norm": 1.008665774442881, "learning_rate": 1.481170321065948e-05, "loss": 1.6252, "step": 2698 }, { "epoch": 0.3601547904990659, "grad_norm": 1.0280588719731665, "learning_rate": 1.4807914062332307e-05, "loss": 1.6583, "step": 2699 }, { "epoch": 0.3602882305844676, "grad_norm": 1.0944712865857813, "learning_rate": 1.4804124015941528e-05, "loss": 1.6016, "step": 2700 }, { "epoch": 0.36042167066986924, "grad_norm": 1.0990375701438984, "learning_rate": 1.480033307219508e-05, "loss": 1.5977, "step": 2701 }, { "epoch": 0.36055511075527086, "grad_norm": 1.0149776394646597, "learning_rate": 1.4796541231801068e-05, "loss": 1.6027, "step": 2702 }, { "epoch": 0.36068855084067253, "grad_norm": 1.1355704225231722, "learning_rate": 1.4792748495467763e-05, "loss": 1.5975, "step": 2703 }, { "epoch": 0.3608219909260742, "grad_norm": 2.0682004186698784, "learning_rate": 1.4788954863903608e-05, "loss": 1.5899, "step": 2704 }, { "epoch": 0.36095543101147587, "grad_norm": 1.0954874367416438, "learning_rate": 1.478516033781721e-05, "loss": 1.5893, "step": 2705 }, { "epoch": 0.3610888710968775, "grad_norm": 1.09549046341114, "learning_rate": 1.4781364917917339e-05, "loss": 1.6175, "step": 2706 }, { "epoch": 0.36122231118227915, "grad_norm": 1.0665598499326367, "learning_rate": 1.477756860491294e-05, "loss": 1.6201, "step": 2707 }, { "epoch": 0.3613557512676808, "grad_norm": 1.0431226101624222, "learning_rate": 1.4773771399513122e-05, "loss": 1.5921, "step": 2708 }, { "epoch": 0.36148919135308244, "grad_norm": 1.0347916658313776, "learning_rate": 1.4769973302427154e-05, "loss": 1.5922, "step": 2709 }, { "epoch": 0.3616226314384841, "grad_norm": 1.188173513439476, "learning_rate": 1.4766174314364486e-05, "loss": 1.6227, "step": 2710 }, { "epoch": 0.3617560715238858, "grad_norm": 1.1816851578943308, "learning_rate": 1.476237443603472e-05, "loss": 1.6272, "step": 2711 }, { "epoch": 0.36188951160928745, "grad_norm": 1.047966632364183, "learning_rate": 1.475857366814763e-05, "loss": 1.6298, "step": 2712 }, { "epoch": 0.36202295169468907, "grad_norm": 1.134317205650707, "learning_rate": 1.4754772011413154e-05, "loss": 1.5883, "step": 2713 }, { "epoch": 0.36215639178009074, "grad_norm": 1.0021173494097093, "learning_rate": 1.4750969466541407e-05, "loss": 1.6369, "step": 2714 }, { "epoch": 0.3622898318654924, "grad_norm": 1.1709267393721203, "learning_rate": 1.4747166034242652e-05, "loss": 1.5887, "step": 2715 }, { "epoch": 0.362423271950894, "grad_norm": 1.1258130657900838, "learning_rate": 1.4743361715227331e-05, "loss": 1.6778, "step": 2716 }, { "epoch": 0.3625567120362957, "grad_norm": 0.9885486941912983, "learning_rate": 1.4739556510206047e-05, "loss": 1.6048, "step": 2717 }, { "epoch": 0.36269015212169736, "grad_norm": 1.0268383631231532, "learning_rate": 1.473575041988957e-05, "loss": 1.594, "step": 2718 }, { "epoch": 0.36282359220709903, "grad_norm": 1.190424029103695, "learning_rate": 1.4731943444988831e-05, "loss": 1.63, "step": 2719 }, { "epoch": 0.36295703229250065, "grad_norm": 1.0167446687335664, "learning_rate": 1.4728135586214933e-05, "loss": 1.6216, "step": 2720 }, { "epoch": 0.3630904723779023, "grad_norm": 1.0972474419145468, "learning_rate": 1.472432684427914e-05, "loss": 1.6385, "step": 2721 }, { "epoch": 0.363223912463304, "grad_norm": 1.1518104547155832, "learning_rate": 1.4720517219892881e-05, "loss": 1.6232, "step": 2722 }, { "epoch": 0.36335735254870566, "grad_norm": 7.034867431932981, "learning_rate": 1.4716706713767748e-05, "loss": 1.5632, "step": 2723 }, { "epoch": 0.3634907926341073, "grad_norm": 1.166832787708479, "learning_rate": 1.4712895326615505e-05, "loss": 1.607, "step": 2724 }, { "epoch": 0.36362423271950894, "grad_norm": 1.0638457063513032, "learning_rate": 1.4709083059148073e-05, "loss": 1.6328, "step": 2725 }, { "epoch": 0.3637576728049106, "grad_norm": 1.0079889035962148, "learning_rate": 1.470526991207754e-05, "loss": 1.6063, "step": 2726 }, { "epoch": 0.36389111289031223, "grad_norm": 1.0596287476120059, "learning_rate": 1.4701455886116159e-05, "loss": 1.6502, "step": 2727 }, { "epoch": 0.3640245529757139, "grad_norm": 0.9990070809309942, "learning_rate": 1.4697640981976347e-05, "loss": 1.6093, "step": 2728 }, { "epoch": 0.36415799306111557, "grad_norm": 1.1369821739015482, "learning_rate": 1.4693825200370682e-05, "loss": 1.6204, "step": 2729 }, { "epoch": 0.36429143314651724, "grad_norm": 1.1486606111537232, "learning_rate": 1.4690008542011912e-05, "loss": 1.656, "step": 2730 }, { "epoch": 0.36442487323191886, "grad_norm": 1.038173101624409, "learning_rate": 1.4686191007612945e-05, "loss": 1.5957, "step": 2731 }, { "epoch": 0.3645583133173205, "grad_norm": 1.3383555292824745, "learning_rate": 1.4682372597886851e-05, "loss": 1.6269, "step": 2732 }, { "epoch": 0.3646917534027222, "grad_norm": 1.1725746335257135, "learning_rate": 1.4678553313546869e-05, "loss": 1.6646, "step": 2733 }, { "epoch": 0.3648251934881238, "grad_norm": 1.0117764785870433, "learning_rate": 1.4674733155306392e-05, "loss": 1.5871, "step": 2734 }, { "epoch": 0.3649586335735255, "grad_norm": 1.139180055289969, "learning_rate": 1.4670912123878987e-05, "loss": 1.666, "step": 2735 }, { "epoch": 0.36509207365892715, "grad_norm": 1.160600503319934, "learning_rate": 1.4667090219978376e-05, "loss": 1.6344, "step": 2736 }, { "epoch": 0.3652255137443288, "grad_norm": 0.9804628531594025, "learning_rate": 1.4663267444318451e-05, "loss": 1.6504, "step": 2737 }, { "epoch": 0.36535895382973044, "grad_norm": 1.2244168997082292, "learning_rate": 1.465944379761326e-05, "loss": 1.6267, "step": 2738 }, { "epoch": 0.3654923939151321, "grad_norm": 1.142784430348776, "learning_rate": 1.4655619280577015e-05, "loss": 1.6029, "step": 2739 }, { "epoch": 0.3656258340005338, "grad_norm": 1.1856642918086469, "learning_rate": 1.46517938939241e-05, "loss": 1.6084, "step": 2740 }, { "epoch": 0.3657592740859354, "grad_norm": 1.1684003107608707, "learning_rate": 1.4647967638369043e-05, "loss": 1.673, "step": 2741 }, { "epoch": 0.36589271417133706, "grad_norm": 0.9808643566324687, "learning_rate": 1.4644140514626553e-05, "loss": 1.6068, "step": 2742 }, { "epoch": 0.36602615425673873, "grad_norm": 1.0096442361014633, "learning_rate": 1.4640312523411491e-05, "loss": 1.6395, "step": 2743 }, { "epoch": 0.3661595943421404, "grad_norm": 1.0898350041048794, "learning_rate": 1.4636483665438878e-05, "loss": 1.7007, "step": 2744 }, { "epoch": 0.366293034427542, "grad_norm": 0.9501423520521707, "learning_rate": 1.4632653941423911e-05, "loss": 1.5732, "step": 2745 }, { "epoch": 0.3664264745129437, "grad_norm": 1.001779649980241, "learning_rate": 1.4628823352081929e-05, "loss": 1.6178, "step": 2746 }, { "epoch": 0.36655991459834536, "grad_norm": 1.0032387536870389, "learning_rate": 1.4624991898128445e-05, "loss": 1.6319, "step": 2747 }, { "epoch": 0.366693354683747, "grad_norm": 0.9476089503337908, "learning_rate": 1.4621159580279129e-05, "loss": 1.5986, "step": 2748 }, { "epoch": 0.36682679476914865, "grad_norm": 0.9672551178709096, "learning_rate": 1.4617326399249823e-05, "loss": 1.6172, "step": 2749 }, { "epoch": 0.3669602348545503, "grad_norm": 0.9999506532379091, "learning_rate": 1.4613492355756514e-05, "loss": 1.6411, "step": 2750 }, { "epoch": 0.367093674939952, "grad_norm": 1.0229999120124786, "learning_rate": 1.460965745051536e-05, "loss": 1.6105, "step": 2751 }, { "epoch": 0.3672271150253536, "grad_norm": 1.008641986491956, "learning_rate": 1.4605821684242674e-05, "loss": 1.6731, "step": 2752 }, { "epoch": 0.36736055511075527, "grad_norm": 1.042758475874373, "learning_rate": 1.460198505765494e-05, "loss": 1.554, "step": 2753 }, { "epoch": 0.36749399519615694, "grad_norm": 1.1472564831402514, "learning_rate": 1.4598147571468791e-05, "loss": 1.6019, "step": 2754 }, { "epoch": 0.36762743528155856, "grad_norm": 0.9737879253423936, "learning_rate": 1.4594309226401027e-05, "loss": 1.6319, "step": 2755 }, { "epoch": 0.3677608753669602, "grad_norm": 0.9632772149963865, "learning_rate": 1.4590470023168611e-05, "loss": 1.5865, "step": 2756 }, { "epoch": 0.3678943154523619, "grad_norm": 1.1241458546313505, "learning_rate": 1.4586629962488654e-05, "loss": 1.6882, "step": 2757 }, { "epoch": 0.36802775553776357, "grad_norm": 1.0034203806903836, "learning_rate": 1.4582789045078445e-05, "loss": 1.6177, "step": 2758 }, { "epoch": 0.3681611956231652, "grad_norm": 0.9619488800895231, "learning_rate": 1.4578947271655417e-05, "loss": 1.6104, "step": 2759 }, { "epoch": 0.36829463570856685, "grad_norm": 1.0707252512115273, "learning_rate": 1.4575104642937175e-05, "loss": 1.5266, "step": 2760 }, { "epoch": 0.3684280757939685, "grad_norm": 1.0221303660590428, "learning_rate": 1.4571261159641472e-05, "loss": 1.6157, "step": 2761 }, { "epoch": 0.36856151587937014, "grad_norm": 1.1856740576654383, "learning_rate": 1.4567416822486233e-05, "loss": 1.5843, "step": 2762 }, { "epoch": 0.3686949559647718, "grad_norm": 1.057753625247735, "learning_rate": 1.456357163218953e-05, "loss": 1.6437, "step": 2763 }, { "epoch": 0.3688283960501735, "grad_norm": 1.167606648878092, "learning_rate": 1.4559725589469609e-05, "loss": 1.62, "step": 2764 }, { "epoch": 0.36896183613557515, "grad_norm": 1.0332270116796345, "learning_rate": 1.4555878695044859e-05, "loss": 1.5791, "step": 2765 }, { "epoch": 0.36909527622097676, "grad_norm": 1.0273288838594588, "learning_rate": 1.4552030949633839e-05, "loss": 1.6575, "step": 2766 }, { "epoch": 0.36922871630637843, "grad_norm": 1.0275198082813934, "learning_rate": 1.4548182353955267e-05, "loss": 1.626, "step": 2767 }, { "epoch": 0.3693621563917801, "grad_norm": 1.009577517124903, "learning_rate": 1.4544332908728011e-05, "loss": 1.6139, "step": 2768 }, { "epoch": 0.3694955964771817, "grad_norm": 0.9669422230954393, "learning_rate": 1.454048261467111e-05, "loss": 1.6449, "step": 2769 }, { "epoch": 0.3696290365625834, "grad_norm": 1.2874584717461675, "learning_rate": 1.453663147250375e-05, "loss": 1.5875, "step": 2770 }, { "epoch": 0.36976247664798506, "grad_norm": 1.025068484975409, "learning_rate": 1.4532779482945283e-05, "loss": 1.6252, "step": 2771 }, { "epoch": 0.36989591673338673, "grad_norm": 1.005806168229964, "learning_rate": 1.4528926646715215e-05, "loss": 1.6093, "step": 2772 }, { "epoch": 0.37002935681878835, "grad_norm": 1.208605409648711, "learning_rate": 1.4525072964533213e-05, "loss": 1.6543, "step": 2773 }, { "epoch": 0.37016279690419, "grad_norm": 1.0895321534730344, "learning_rate": 1.4521218437119105e-05, "loss": 1.6162, "step": 2774 }, { "epoch": 0.3702962369895917, "grad_norm": 0.9832287315998608, "learning_rate": 1.4517363065192865e-05, "loss": 1.5716, "step": 2775 }, { "epoch": 0.3704296770749933, "grad_norm": 1.2767029129562657, "learning_rate": 1.4513506849474639e-05, "loss": 1.6252, "step": 2776 }, { "epoch": 0.370563117160395, "grad_norm": 1.0660572092116536, "learning_rate": 1.450964979068472e-05, "loss": 1.5639, "step": 2777 }, { "epoch": 0.37069655724579664, "grad_norm": 1.0431072621084843, "learning_rate": 1.4505791889543565e-05, "loss": 1.6989, "step": 2778 }, { "epoch": 0.3708299973311983, "grad_norm": 1.1890314142284257, "learning_rate": 1.4501933146771785e-05, "loss": 1.6471, "step": 2779 }, { "epoch": 0.37096343741659993, "grad_norm": 1.1319054159207953, "learning_rate": 1.449807356309015e-05, "loss": 1.5851, "step": 2780 }, { "epoch": 0.3710968775020016, "grad_norm": 1.0145901459250226, "learning_rate": 1.4494213139219587e-05, "loss": 1.6279, "step": 2781 }, { "epoch": 0.37123031758740327, "grad_norm": 0.9600581548472692, "learning_rate": 1.4490351875881174e-05, "loss": 1.6292, "step": 2782 }, { "epoch": 0.3713637576728049, "grad_norm": 1.1450325048073626, "learning_rate": 1.448648977379616e-05, "loss": 1.5946, "step": 2783 }, { "epoch": 0.37149719775820655, "grad_norm": 0.945594443926737, "learning_rate": 1.4482626833685936e-05, "loss": 1.6552, "step": 2784 }, { "epoch": 0.3716306378436082, "grad_norm": 1.168278859108492, "learning_rate": 1.4478763056272054e-05, "loss": 1.6365, "step": 2785 }, { "epoch": 0.3717640779290099, "grad_norm": 1.108937200609111, "learning_rate": 1.4474898442276226e-05, "loss": 1.5822, "step": 2786 }, { "epoch": 0.3718975180144115, "grad_norm": 0.9571836543010801, "learning_rate": 1.447103299242032e-05, "loss": 1.5651, "step": 2787 }, { "epoch": 0.3720309580998132, "grad_norm": 10.157439322526868, "learning_rate": 1.4467166707426357e-05, "loss": 1.6417, "step": 2788 }, { "epoch": 0.37216439818521485, "grad_norm": 1.0795077239195907, "learning_rate": 1.4463299588016508e-05, "loss": 1.6007, "step": 2789 }, { "epoch": 0.37229783827061647, "grad_norm": 1.02760083176124, "learning_rate": 1.4459431634913118e-05, "loss": 1.5834, "step": 2790 }, { "epoch": 0.37243127835601814, "grad_norm": 1.052624541464461, "learning_rate": 1.4455562848838666e-05, "loss": 1.5611, "step": 2791 }, { "epoch": 0.3725647184414198, "grad_norm": 1.0230420491562466, "learning_rate": 1.4451693230515807e-05, "loss": 1.6217, "step": 2792 }, { "epoch": 0.3726981585268215, "grad_norm": 0.9764342262244122, "learning_rate": 1.4447822780667332e-05, "loss": 1.6139, "step": 2793 }, { "epoch": 0.3728315986122231, "grad_norm": 1.128603754261654, "learning_rate": 1.4443951500016207e-05, "loss": 1.6407, "step": 2794 }, { "epoch": 0.37296503869762476, "grad_norm": 1.1192050512936182, "learning_rate": 1.4440079389285535e-05, "loss": 1.6054, "step": 2795 }, { "epoch": 0.37309847878302643, "grad_norm": 1.0094200838680423, "learning_rate": 1.4436206449198584e-05, "loss": 1.584, "step": 2796 }, { "epoch": 0.3732319188684281, "grad_norm": 1.0820268405383773, "learning_rate": 1.443233268047878e-05, "loss": 1.6154, "step": 2797 }, { "epoch": 0.3733653589538297, "grad_norm": 1.0454550915071419, "learning_rate": 1.4428458083849693e-05, "loss": 1.5913, "step": 2798 }, { "epoch": 0.3734987990392314, "grad_norm": 1.0114425617387492, "learning_rate": 1.4424582660035059e-05, "loss": 1.6053, "step": 2799 }, { "epoch": 0.37363223912463306, "grad_norm": 1.0867361007007368, "learning_rate": 1.4420706409758753e-05, "loss": 1.6393, "step": 2800 }, { "epoch": 0.3737656792100347, "grad_norm": 1.1845819474365904, "learning_rate": 1.4416829333744828e-05, "loss": 1.6132, "step": 2801 }, { "epoch": 0.37389911929543634, "grad_norm": 1.0205258166513704, "learning_rate": 1.4412951432717467e-05, "loss": 1.6227, "step": 2802 }, { "epoch": 0.374032559380838, "grad_norm": 1.0438780709981974, "learning_rate": 1.4409072707401024e-05, "loss": 1.636, "step": 2803 }, { "epoch": 0.3741659994662397, "grad_norm": 1.0589699296179769, "learning_rate": 1.4405193158519998e-05, "loss": 1.6119, "step": 2804 }, { "epoch": 0.3742994395516413, "grad_norm": 0.9619237433348188, "learning_rate": 1.4401312786799044e-05, "loss": 1.6066, "step": 2805 }, { "epoch": 0.37443287963704297, "grad_norm": 1.0797086642870346, "learning_rate": 1.4397431592962974e-05, "loss": 1.5915, "step": 2806 }, { "epoch": 0.37456631972244464, "grad_norm": 0.9662518764136966, "learning_rate": 1.439354957773675e-05, "loss": 1.6207, "step": 2807 }, { "epoch": 0.37469975980784626, "grad_norm": 1.1237578053297503, "learning_rate": 1.4389666741845485e-05, "loss": 1.6194, "step": 2808 }, { "epoch": 0.3748331998932479, "grad_norm": 0.9840735377034793, "learning_rate": 1.4385783086014451e-05, "loss": 1.6028, "step": 2809 }, { "epoch": 0.3749666399786496, "grad_norm": 1.0118695314963424, "learning_rate": 1.4381898610969071e-05, "loss": 1.5789, "step": 2810 }, { "epoch": 0.37510008006405127, "grad_norm": 1.2580488826742735, "learning_rate": 1.437801331743492e-05, "loss": 1.6479, "step": 2811 }, { "epoch": 0.3752335201494529, "grad_norm": 1.0726283441727469, "learning_rate": 1.4374127206137727e-05, "loss": 1.6275, "step": 2812 }, { "epoch": 0.37536696023485455, "grad_norm": 1.0383966783564913, "learning_rate": 1.4370240277803374e-05, "loss": 1.6389, "step": 2813 }, { "epoch": 0.3755004003202562, "grad_norm": 1.323435758142834, "learning_rate": 1.4366352533157893e-05, "loss": 1.5959, "step": 2814 }, { "epoch": 0.37563384040565784, "grad_norm": 1.1012025284439741, "learning_rate": 1.4362463972927472e-05, "loss": 1.5828, "step": 2815 }, { "epoch": 0.3757672804910595, "grad_norm": 1.1171657662365868, "learning_rate": 1.4358574597838448e-05, "loss": 1.6421, "step": 2816 }, { "epoch": 0.3759007205764612, "grad_norm": 1.0544176694187513, "learning_rate": 1.4354684408617316e-05, "loss": 1.6215, "step": 2817 }, { "epoch": 0.37603416066186285, "grad_norm": 1.0414279460927407, "learning_rate": 1.4350793405990716e-05, "loss": 1.5807, "step": 2818 }, { "epoch": 0.37616760074726446, "grad_norm": 1.1376881831993566, "learning_rate": 1.434690159068544e-05, "loss": 1.6808, "step": 2819 }, { "epoch": 0.37630104083266613, "grad_norm": 1.0250101070023419, "learning_rate": 1.4343008963428442e-05, "loss": 1.6021, "step": 2820 }, { "epoch": 0.3764344809180678, "grad_norm": 1.1002348920804779, "learning_rate": 1.4339115524946815e-05, "loss": 1.6074, "step": 2821 }, { "epoch": 0.3765679210034694, "grad_norm": 1.0123930431013084, "learning_rate": 1.4335221275967812e-05, "loss": 1.6426, "step": 2822 }, { "epoch": 0.3767013610888711, "grad_norm": 1.2209497008197363, "learning_rate": 1.433132621721883e-05, "loss": 1.6471, "step": 2823 }, { "epoch": 0.37683480117427276, "grad_norm": 1.0189131304238113, "learning_rate": 1.432743034942743e-05, "loss": 1.6218, "step": 2824 }, { "epoch": 0.37696824125967443, "grad_norm": 1.0178791123141788, "learning_rate": 1.4323533673321304e-05, "loss": 1.6242, "step": 2825 }, { "epoch": 0.37710168134507605, "grad_norm": 0.9991762214209088, "learning_rate": 1.4319636189628316e-05, "loss": 1.6042, "step": 2826 }, { "epoch": 0.3772351214304777, "grad_norm": 1.016918051574137, "learning_rate": 1.431573789907647e-05, "loss": 1.6255, "step": 2827 }, { "epoch": 0.3773685615158794, "grad_norm": 0.968894964593842, "learning_rate": 1.4311838802393921e-05, "loss": 1.5977, "step": 2828 }, { "epoch": 0.377502001601281, "grad_norm": 0.9868572481236255, "learning_rate": 1.4307938900308971e-05, "loss": 1.6564, "step": 2829 }, { "epoch": 0.37763544168668267, "grad_norm": 1.6711098134309899, "learning_rate": 1.4304038193550087e-05, "loss": 1.6036, "step": 2830 }, { "epoch": 0.37776888177208434, "grad_norm": 1.2720681532176408, "learning_rate": 1.4300136682845874e-05, "loss": 1.6172, "step": 2831 }, { "epoch": 0.377902321857486, "grad_norm": 0.9972549450196833, "learning_rate": 1.4296234368925082e-05, "loss": 1.6582, "step": 2832 }, { "epoch": 0.3780357619428876, "grad_norm": 1.033670572949725, "learning_rate": 1.4292331252516627e-05, "loss": 1.598, "step": 2833 }, { "epoch": 0.3781692020282893, "grad_norm": 1.3209056443094034, "learning_rate": 1.4288427334349562e-05, "loss": 1.5872, "step": 2834 }, { "epoch": 0.37830264211369097, "grad_norm": 1.00727971762687, "learning_rate": 1.4284522615153098e-05, "loss": 1.6444, "step": 2835 }, { "epoch": 0.3784360821990926, "grad_norm": 1.1341291826363633, "learning_rate": 1.4280617095656591e-05, "loss": 1.6396, "step": 2836 }, { "epoch": 0.37856952228449425, "grad_norm": 1.088875710596739, "learning_rate": 1.4276710776589546e-05, "loss": 1.5717, "step": 2837 }, { "epoch": 0.3787029623698959, "grad_norm": 1.5266027482348503, "learning_rate": 1.4272803658681622e-05, "loss": 1.6019, "step": 2838 }, { "epoch": 0.3788364024552976, "grad_norm": 1.1012146674109902, "learning_rate": 1.4268895742662618e-05, "loss": 1.6891, "step": 2839 }, { "epoch": 0.3789698425406992, "grad_norm": 1.2571923095061812, "learning_rate": 1.4264987029262497e-05, "loss": 1.637, "step": 2840 }, { "epoch": 0.3791032826261009, "grad_norm": 1.0068568438867096, "learning_rate": 1.4261077519211358e-05, "loss": 1.5796, "step": 2841 }, { "epoch": 0.37923672271150255, "grad_norm": 1.008009465786665, "learning_rate": 1.4257167213239451e-05, "loss": 1.636, "step": 2842 }, { "epoch": 0.37937016279690416, "grad_norm": 1.0065398200515008, "learning_rate": 1.4253256112077176e-05, "loss": 1.6736, "step": 2843 }, { "epoch": 0.37950360288230583, "grad_norm": 0.9492387619872094, "learning_rate": 1.4249344216455085e-05, "loss": 1.632, "step": 2844 }, { "epoch": 0.3796370429677075, "grad_norm": 0.9873644452435473, "learning_rate": 1.4245431527103879e-05, "loss": 1.5646, "step": 2845 }, { "epoch": 0.3797704830531092, "grad_norm": 0.9940129192913516, "learning_rate": 1.4241518044754397e-05, "loss": 1.6068, "step": 2846 }, { "epoch": 0.3799039231385108, "grad_norm": 0.9786673958035172, "learning_rate": 1.4237603770137635e-05, "loss": 1.6508, "step": 2847 }, { "epoch": 0.38003736322391246, "grad_norm": 1.0669464495332486, "learning_rate": 1.4233688703984737e-05, "loss": 1.5973, "step": 2848 }, { "epoch": 0.38017080330931413, "grad_norm": 0.9699464474368997, "learning_rate": 1.4229772847026993e-05, "loss": 1.5928, "step": 2849 }, { "epoch": 0.38030424339471575, "grad_norm": 0.9602245627811605, "learning_rate": 1.4225856199995836e-05, "loss": 1.6669, "step": 2850 }, { "epoch": 0.3804376834801174, "grad_norm": 1.0000207901051725, "learning_rate": 1.4221938763622856e-05, "loss": 1.5871, "step": 2851 }, { "epoch": 0.3805711235655191, "grad_norm": 1.1884986045536179, "learning_rate": 1.4218020538639782e-05, "loss": 1.6184, "step": 2852 }, { "epoch": 0.38070456365092076, "grad_norm": 1.0365928654144692, "learning_rate": 1.4214101525778495e-05, "loss": 1.665, "step": 2853 }, { "epoch": 0.3808380037363224, "grad_norm": 0.9983952327474825, "learning_rate": 1.4210181725771026e-05, "loss": 1.6284, "step": 2854 }, { "epoch": 0.38097144382172404, "grad_norm": 1.0013533383795308, "learning_rate": 1.420626113934954e-05, "loss": 1.6141, "step": 2855 }, { "epoch": 0.3811048839071257, "grad_norm": 1.1423554042414863, "learning_rate": 1.4202339767246367e-05, "loss": 1.6336, "step": 2856 }, { "epoch": 0.38123832399252733, "grad_norm": 0.9703881563875031, "learning_rate": 1.419841761019397e-05, "loss": 1.6393, "step": 2857 }, { "epoch": 0.381371764077929, "grad_norm": 0.9642652272603265, "learning_rate": 1.4194494668924966e-05, "loss": 1.6384, "step": 2858 }, { "epoch": 0.38150520416333067, "grad_norm": 1.131226033110534, "learning_rate": 1.419057094417211e-05, "loss": 1.6435, "step": 2859 }, { "epoch": 0.38163864424873234, "grad_norm": 0.9917335521919108, "learning_rate": 1.4186646436668312e-05, "loss": 1.6202, "step": 2860 }, { "epoch": 0.38177208433413395, "grad_norm": 1.097375636507755, "learning_rate": 1.4182721147146633e-05, "loss": 1.7061, "step": 2861 }, { "epoch": 0.3819055244195356, "grad_norm": 0.9975696907135774, "learning_rate": 1.4178795076340259e-05, "loss": 1.6118, "step": 2862 }, { "epoch": 0.3820389645049373, "grad_norm": 0.9870041753998313, "learning_rate": 1.4174868224982547e-05, "loss": 1.6137, "step": 2863 }, { "epoch": 0.38217240459033897, "grad_norm": 1.165292789080929, "learning_rate": 1.417094059380698e-05, "loss": 1.6004, "step": 2864 }, { "epoch": 0.3823058446757406, "grad_norm": 1.145639325521145, "learning_rate": 1.4167012183547203e-05, "loss": 1.6066, "step": 2865 }, { "epoch": 0.38243928476114225, "grad_norm": 1.1503077171125726, "learning_rate": 1.4163082994936988e-05, "loss": 1.615, "step": 2866 }, { "epoch": 0.3825727248465439, "grad_norm": 1.0404542214973824, "learning_rate": 1.4159153028710268e-05, "loss": 1.6169, "step": 2867 }, { "epoch": 0.38270616493194554, "grad_norm": 0.996004145579702, "learning_rate": 1.415522228560112e-05, "loss": 1.6029, "step": 2868 }, { "epoch": 0.3828396050173472, "grad_norm": 1.2254062428716475, "learning_rate": 1.4151290766343752e-05, "loss": 1.6211, "step": 2869 }, { "epoch": 0.3829730451027489, "grad_norm": 0.992077891417151, "learning_rate": 1.4147358471672541e-05, "loss": 1.5512, "step": 2870 }, { "epoch": 0.38310648518815055, "grad_norm": 0.952569959666935, "learning_rate": 1.4143425402321982e-05, "loss": 1.6052, "step": 2871 }, { "epoch": 0.38323992527355216, "grad_norm": 1.2306890442084908, "learning_rate": 1.4139491559026732e-05, "loss": 1.5909, "step": 2872 }, { "epoch": 0.38337336535895383, "grad_norm": 0.9759054239574055, "learning_rate": 1.4135556942521594e-05, "loss": 1.6061, "step": 2873 }, { "epoch": 0.3835068054443555, "grad_norm": 0.9913047688419279, "learning_rate": 1.4131621553541499e-05, "loss": 1.6104, "step": 2874 }, { "epoch": 0.3836402455297571, "grad_norm": 1.246242626432296, "learning_rate": 1.412768539282154e-05, "loss": 1.6091, "step": 2875 }, { "epoch": 0.3837736856151588, "grad_norm": 0.9433807734418067, "learning_rate": 1.4123748461096942e-05, "loss": 1.6305, "step": 2876 }, { "epoch": 0.38390712570056046, "grad_norm": 0.982212397205281, "learning_rate": 1.4119810759103088e-05, "loss": 1.6221, "step": 2877 }, { "epoch": 0.38404056578596213, "grad_norm": 1.0575127644185438, "learning_rate": 1.4115872287575485e-05, "loss": 1.602, "step": 2878 }, { "epoch": 0.38417400587136374, "grad_norm": 1.1374999128456018, "learning_rate": 1.4111933047249802e-05, "loss": 1.6081, "step": 2879 }, { "epoch": 0.3843074459567654, "grad_norm": 9.571339510483169, "learning_rate": 1.410799303886184e-05, "loss": 1.6055, "step": 2880 }, { "epoch": 0.3844408860421671, "grad_norm": 1.145304347974641, "learning_rate": 1.4104052263147553e-05, "loss": 1.6612, "step": 2881 }, { "epoch": 0.3845743261275687, "grad_norm": 1.099466074032018, "learning_rate": 1.4100110720843025e-05, "loss": 1.5704, "step": 2882 }, { "epoch": 0.38470776621297037, "grad_norm": 1.0509418929003724, "learning_rate": 1.4096168412684497e-05, "loss": 1.648, "step": 2883 }, { "epoch": 0.38484120629837204, "grad_norm": 1.1230867470007317, "learning_rate": 1.409222533940835e-05, "loss": 1.6401, "step": 2884 }, { "epoch": 0.3849746463837737, "grad_norm": 1.007807394086856, "learning_rate": 1.4088281501751095e-05, "loss": 1.6452, "step": 2885 }, { "epoch": 0.3851080864691753, "grad_norm": 1.061247642675541, "learning_rate": 1.408433690044941e-05, "loss": 1.6049, "step": 2886 }, { "epoch": 0.385241526554577, "grad_norm": 1.0601502148961404, "learning_rate": 1.4080391536240088e-05, "loss": 1.6375, "step": 2887 }, { "epoch": 0.38537496663997867, "grad_norm": 0.9817836229390783, "learning_rate": 1.4076445409860086e-05, "loss": 1.6106, "step": 2888 }, { "epoch": 0.3855084067253803, "grad_norm": 1.2826160394488593, "learning_rate": 1.4072498522046494e-05, "loss": 1.6531, "step": 2889 }, { "epoch": 0.38564184681078195, "grad_norm": 1.201976231365931, "learning_rate": 1.4068550873536543e-05, "loss": 1.6206, "step": 2890 }, { "epoch": 0.3857752868961836, "grad_norm": 1.026733475170239, "learning_rate": 1.4064602465067613e-05, "loss": 1.6505, "step": 2891 }, { "epoch": 0.3859087269815853, "grad_norm": 1.236834192825807, "learning_rate": 1.406065329737722e-05, "loss": 1.6342, "step": 2892 }, { "epoch": 0.3860421670669869, "grad_norm": 1.150715310952092, "learning_rate": 1.4056703371203023e-05, "loss": 1.6205, "step": 2893 }, { "epoch": 0.3861756071523886, "grad_norm": 1.0642046173241948, "learning_rate": 1.4052752687282824e-05, "loss": 1.6052, "step": 2894 }, { "epoch": 0.38630904723779025, "grad_norm": 1.0978042763206004, "learning_rate": 1.4048801246354567e-05, "loss": 1.6058, "step": 2895 }, { "epoch": 0.38644248732319186, "grad_norm": 1.177880101416564, "learning_rate": 1.4044849049156328e-05, "loss": 1.6127, "step": 2896 }, { "epoch": 0.38657592740859353, "grad_norm": 1.017132075267852, "learning_rate": 1.4040896096426346e-05, "loss": 1.6143, "step": 2897 }, { "epoch": 0.3867093674939952, "grad_norm": 1.0501232630820656, "learning_rate": 1.4036942388902976e-05, "loss": 1.5509, "step": 2898 }, { "epoch": 0.3868428075793969, "grad_norm": 1.2571254734715342, "learning_rate": 1.403298792732473e-05, "loss": 1.6453, "step": 2899 }, { "epoch": 0.3869762476647985, "grad_norm": 0.9875221484898435, "learning_rate": 1.4029032712430262e-05, "loss": 1.596, "step": 2900 }, { "epoch": 0.38710968775020016, "grad_norm": 1.1285508086710352, "learning_rate": 1.4025076744958348e-05, "loss": 1.6236, "step": 2901 }, { "epoch": 0.38724312783560183, "grad_norm": 1.2197035948818218, "learning_rate": 1.4021120025647932e-05, "loss": 1.5706, "step": 2902 }, { "epoch": 0.38737656792100345, "grad_norm": 1.0890256309645825, "learning_rate": 1.4017162555238072e-05, "loss": 1.5571, "step": 2903 }, { "epoch": 0.3875100080064051, "grad_norm": 1.080695243503158, "learning_rate": 1.4013204334467987e-05, "loss": 1.6454, "step": 2904 }, { "epoch": 0.3876434480918068, "grad_norm": 0.9814101368260878, "learning_rate": 1.4009245364077024e-05, "loss": 1.6153, "step": 2905 }, { "epoch": 0.38777688817720846, "grad_norm": 0.9904485921133632, "learning_rate": 1.4005285644804673e-05, "loss": 1.6276, "step": 2906 }, { "epoch": 0.38791032826261007, "grad_norm": 0.9972089960040826, "learning_rate": 1.4001325177390565e-05, "loss": 1.6205, "step": 2907 }, { "epoch": 0.38804376834801174, "grad_norm": 1.2730345239020744, "learning_rate": 1.3997363962574473e-05, "loss": 1.6412, "step": 2908 }, { "epoch": 0.3881772084334134, "grad_norm": 0.9675278590077582, "learning_rate": 1.3993402001096304e-05, "loss": 1.6351, "step": 2909 }, { "epoch": 0.388310648518815, "grad_norm": 1.0292534070839274, "learning_rate": 1.3989439293696105e-05, "loss": 1.6089, "step": 2910 }, { "epoch": 0.3884440886042167, "grad_norm": 1.019759996442744, "learning_rate": 1.3985475841114071e-05, "loss": 1.63, "step": 2911 }, { "epoch": 0.38857752868961837, "grad_norm": 0.9807744844897855, "learning_rate": 1.3981511644090523e-05, "loss": 1.6103, "step": 2912 }, { "epoch": 0.38871096877502004, "grad_norm": 1.025209057661364, "learning_rate": 1.3977546703365934e-05, "loss": 1.5834, "step": 2913 }, { "epoch": 0.38884440886042165, "grad_norm": 1.089409574851773, "learning_rate": 1.3973581019680906e-05, "loss": 1.707, "step": 2914 }, { "epoch": 0.3889778489458233, "grad_norm": 0.9724916772280182, "learning_rate": 1.3969614593776184e-05, "loss": 1.6204, "step": 2915 }, { "epoch": 0.389111289031225, "grad_norm": 1.007702700819211, "learning_rate": 1.3965647426392653e-05, "loss": 1.6122, "step": 2916 }, { "epoch": 0.3892447291166266, "grad_norm": 0.9419094811442577, "learning_rate": 1.396167951827133e-05, "loss": 1.5903, "step": 2917 }, { "epoch": 0.3893781692020283, "grad_norm": 1.0374726700116226, "learning_rate": 1.395771087015338e-05, "loss": 1.6066, "step": 2918 }, { "epoch": 0.38951160928742995, "grad_norm": 1.0328706825533176, "learning_rate": 1.3953741482780102e-05, "loss": 1.5566, "step": 2919 }, { "epoch": 0.3896450493728316, "grad_norm": 1.2728567621408549, "learning_rate": 1.3949771356892927e-05, "loss": 1.6384, "step": 2920 }, { "epoch": 0.38977848945823323, "grad_norm": 1.0123347440431487, "learning_rate": 1.3945800493233432e-05, "loss": 1.5981, "step": 2921 }, { "epoch": 0.3899119295436349, "grad_norm": 0.991927710941825, "learning_rate": 1.3941828892543332e-05, "loss": 1.6323, "step": 2922 }, { "epoch": 0.3900453696290366, "grad_norm": 0.9860434334934471, "learning_rate": 1.3937856555564472e-05, "loss": 1.5993, "step": 2923 }, { "epoch": 0.3901788097144382, "grad_norm": 1.1272050680874104, "learning_rate": 1.3933883483038843e-05, "loss": 1.6889, "step": 2924 }, { "epoch": 0.39031224979983986, "grad_norm": 1.013248475393018, "learning_rate": 1.392990967570857e-05, "loss": 1.5832, "step": 2925 }, { "epoch": 0.39044568988524153, "grad_norm": 0.9972907037837129, "learning_rate": 1.392593513431591e-05, "loss": 1.6312, "step": 2926 }, { "epoch": 0.3905791299706432, "grad_norm": 1.0368488326806515, "learning_rate": 1.392195985960327e-05, "loss": 1.593, "step": 2927 }, { "epoch": 0.3907125700560448, "grad_norm": 0.9818098293390851, "learning_rate": 1.3917983852313174e-05, "loss": 1.6771, "step": 2928 }, { "epoch": 0.3908460101414465, "grad_norm": 0.9843368644401832, "learning_rate": 1.391400711318831e-05, "loss": 1.6287, "step": 2929 }, { "epoch": 0.39097945022684816, "grad_norm": 1.1575289949746161, "learning_rate": 1.3910029642971473e-05, "loss": 1.5908, "step": 2930 }, { "epoch": 0.3911128903122498, "grad_norm": 1.0086519987992577, "learning_rate": 1.390605144240562e-05, "loss": 1.5931, "step": 2931 }, { "epoch": 0.39124633039765144, "grad_norm": 0.9903875800619729, "learning_rate": 1.390207251223383e-05, "loss": 1.5842, "step": 2932 }, { "epoch": 0.3913797704830531, "grad_norm": 0.9577055345082642, "learning_rate": 1.3898092853199318e-05, "loss": 1.6194, "step": 2933 }, { "epoch": 0.3915132105684548, "grad_norm": 1.2357946602483, "learning_rate": 1.3894112466045448e-05, "loss": 1.6087, "step": 2934 }, { "epoch": 0.3916466506538564, "grad_norm": 1.0769210602839947, "learning_rate": 1.3890131351515703e-05, "loss": 1.6153, "step": 2935 }, { "epoch": 0.39178009073925807, "grad_norm": 1.10534271606038, "learning_rate": 1.388614951035371e-05, "loss": 1.6172, "step": 2936 }, { "epoch": 0.39191353082465974, "grad_norm": 1.0791728229209796, "learning_rate": 1.3882166943303239e-05, "loss": 1.5885, "step": 2937 }, { "epoch": 0.3920469709100614, "grad_norm": 1.048943382922718, "learning_rate": 1.387818365110818e-05, "loss": 1.6466, "step": 2938 }, { "epoch": 0.392180410995463, "grad_norm": 0.9775623637977353, "learning_rate": 1.3874199634512568e-05, "loss": 1.6842, "step": 2939 }, { "epoch": 0.3923138510808647, "grad_norm": 1.2823226827585823, "learning_rate": 1.3870214894260577e-05, "loss": 1.6346, "step": 2940 }, { "epoch": 0.39244729116626637, "grad_norm": 2.9554542149643392, "learning_rate": 1.3866229431096506e-05, "loss": 1.631, "step": 2941 }, { "epoch": 0.392580731251668, "grad_norm": 1.4313180292149659, "learning_rate": 1.3862243245764795e-05, "loss": 1.5588, "step": 2942 }, { "epoch": 0.39271417133706965, "grad_norm": 1.0226581878161995, "learning_rate": 1.3858256339010021e-05, "loss": 1.5696, "step": 2943 }, { "epoch": 0.3928476114224713, "grad_norm": 1.203432785432225, "learning_rate": 1.385426871157689e-05, "loss": 1.5932, "step": 2944 }, { "epoch": 0.392981051507873, "grad_norm": 1.2211552689992726, "learning_rate": 1.3850280364210246e-05, "loss": 1.6112, "step": 2945 }, { "epoch": 0.3931144915932746, "grad_norm": 1.0952032810522179, "learning_rate": 1.3846291297655066e-05, "loss": 1.6057, "step": 2946 }, { "epoch": 0.3932479316786763, "grad_norm": 1.1552204218064228, "learning_rate": 1.3842301512656465e-05, "loss": 1.6422, "step": 2947 }, { "epoch": 0.39338137176407795, "grad_norm": 1.0183412684546411, "learning_rate": 1.3838311009959686e-05, "loss": 1.5689, "step": 2948 }, { "epoch": 0.39351481184947956, "grad_norm": 1.047132191195357, "learning_rate": 1.3834319790310113e-05, "loss": 1.6462, "step": 2949 }, { "epoch": 0.39364825193488123, "grad_norm": 0.9755539464048916, "learning_rate": 1.3830327854453258e-05, "loss": 1.6675, "step": 2950 }, { "epoch": 0.3937816920202829, "grad_norm": 1.035910398586731, "learning_rate": 1.3826335203134768e-05, "loss": 1.6491, "step": 2951 }, { "epoch": 0.3939151321056846, "grad_norm": 1.0231501045177207, "learning_rate": 1.382234183710043e-05, "loss": 1.644, "step": 2952 }, { "epoch": 0.3940485721910862, "grad_norm": 1.0110762267018458, "learning_rate": 1.3818347757096152e-05, "loss": 1.6056, "step": 2953 }, { "epoch": 0.39418201227648786, "grad_norm": 1.9202250806838923, "learning_rate": 1.381435296386799e-05, "loss": 1.5915, "step": 2954 }, { "epoch": 0.39431545236188953, "grad_norm": 1.06141099974795, "learning_rate": 1.381035745816212e-05, "loss": 1.6049, "step": 2955 }, { "epoch": 0.39444889244729114, "grad_norm": 1.0770580734734054, "learning_rate": 1.3806361240724862e-05, "loss": 1.6341, "step": 2956 }, { "epoch": 0.3945823325326928, "grad_norm": 1.0439072213146445, "learning_rate": 1.3802364312302659e-05, "loss": 1.6512, "step": 2957 }, { "epoch": 0.3947157726180945, "grad_norm": 1.0133974448550223, "learning_rate": 1.3798366673642095e-05, "loss": 1.6098, "step": 2958 }, { "epoch": 0.39484921270349616, "grad_norm": 1.0228026592925512, "learning_rate": 1.3794368325489881e-05, "loss": 1.6292, "step": 2959 }, { "epoch": 0.39498265278889777, "grad_norm": 1.2383549797377225, "learning_rate": 1.3790369268592865e-05, "loss": 1.6379, "step": 2960 }, { "epoch": 0.39511609287429944, "grad_norm": 1.0527485547831446, "learning_rate": 1.3786369503698024e-05, "loss": 1.6094, "step": 2961 }, { "epoch": 0.3952495329597011, "grad_norm": 1.2613117991115332, "learning_rate": 1.3782369031552469e-05, "loss": 1.6241, "step": 2962 }, { "epoch": 0.3953829730451027, "grad_norm": 1.0530326147165556, "learning_rate": 1.3778367852903443e-05, "loss": 1.6379, "step": 2963 }, { "epoch": 0.3955164131305044, "grad_norm": 2.2697531891999043, "learning_rate": 1.3774365968498323e-05, "loss": 1.6122, "step": 2964 }, { "epoch": 0.39564985321590607, "grad_norm": 1.0088488119923664, "learning_rate": 1.377036337908461e-05, "loss": 1.6069, "step": 2965 }, { "epoch": 0.39578329330130774, "grad_norm": 1.1919777045176008, "learning_rate": 1.3766360085409947e-05, "loss": 1.6282, "step": 2966 }, { "epoch": 0.39591673338670935, "grad_norm": 1.0394509374153975, "learning_rate": 1.37623560882221e-05, "loss": 1.6261, "step": 2967 }, { "epoch": 0.396050173472111, "grad_norm": 1.4778314816259879, "learning_rate": 1.375835138826897e-05, "loss": 1.5999, "step": 2968 }, { "epoch": 0.3961836135575127, "grad_norm": 1.076372619162072, "learning_rate": 1.3754345986298594e-05, "loss": 1.63, "step": 2969 }, { "epoch": 0.3963170536429143, "grad_norm": 1.0029057794554, "learning_rate": 1.3750339883059132e-05, "loss": 1.5545, "step": 2970 }, { "epoch": 0.396450493728316, "grad_norm": 0.9763005134113368, "learning_rate": 1.3746333079298882e-05, "loss": 1.6233, "step": 2971 }, { "epoch": 0.39658393381371765, "grad_norm": 0.9997766940306253, "learning_rate": 1.374232557576626e-05, "loss": 1.6042, "step": 2972 }, { "epoch": 0.3967173738991193, "grad_norm": 1.081981802944108, "learning_rate": 1.3738317373209833e-05, "loss": 1.6644, "step": 2973 }, { "epoch": 0.39685081398452093, "grad_norm": 0.9784210671800242, "learning_rate": 1.3734308472378281e-05, "loss": 1.6678, "step": 2974 }, { "epoch": 0.3969842540699226, "grad_norm": 1.0089465304994234, "learning_rate": 1.3730298874020424e-05, "loss": 1.5872, "step": 2975 }, { "epoch": 0.3971176941553243, "grad_norm": 0.9897744114028344, "learning_rate": 1.372628857888521e-05, "loss": 1.6752, "step": 2976 }, { "epoch": 0.3972511342407259, "grad_norm": 1.0113015313370548, "learning_rate": 1.3722277587721713e-05, "loss": 1.6196, "step": 2977 }, { "epoch": 0.39738457432612756, "grad_norm": 1.248353362601478, "learning_rate": 1.3718265901279144e-05, "loss": 1.6446, "step": 2978 }, { "epoch": 0.39751801441152923, "grad_norm": 0.9976910628273125, "learning_rate": 1.3714253520306835e-05, "loss": 1.6559, "step": 2979 }, { "epoch": 0.3976514544969309, "grad_norm": 0.9395020657372218, "learning_rate": 1.371024044555426e-05, "loss": 1.5507, "step": 2980 }, { "epoch": 0.3977848945823325, "grad_norm": 1.059218486991219, "learning_rate": 1.3706226677771011e-05, "loss": 1.6096, "step": 2981 }, { "epoch": 0.3979183346677342, "grad_norm": 1.1913088141870016, "learning_rate": 1.3702212217706819e-05, "loss": 1.605, "step": 2982 }, { "epoch": 0.39805177475313586, "grad_norm": 1.0112436026820137, "learning_rate": 1.3698197066111529e-05, "loss": 1.6008, "step": 2983 }, { "epoch": 0.39818521483853747, "grad_norm": 1.2284860321693414, "learning_rate": 1.3694181223735138e-05, "loss": 1.6154, "step": 2984 }, { "epoch": 0.39831865492393914, "grad_norm": 1.0098640773316505, "learning_rate": 1.3690164691327752e-05, "loss": 1.6321, "step": 2985 }, { "epoch": 0.3984520950093408, "grad_norm": 0.9717411694382783, "learning_rate": 1.3686147469639616e-05, "loss": 1.6847, "step": 2986 }, { "epoch": 0.3985855350947425, "grad_norm": 0.9284579466010013, "learning_rate": 1.3682129559421102e-05, "loss": 1.5653, "step": 2987 }, { "epoch": 0.3987189751801441, "grad_norm": 1.0645254671871938, "learning_rate": 1.3678110961422708e-05, "loss": 1.5928, "step": 2988 }, { "epoch": 0.39885241526554577, "grad_norm": 1.1099047312189834, "learning_rate": 1.3674091676395067e-05, "loss": 1.5968, "step": 2989 }, { "epoch": 0.39898585535094744, "grad_norm": 1.0039809381615434, "learning_rate": 1.3670071705088925e-05, "loss": 1.5968, "step": 2990 }, { "epoch": 0.39911929543634905, "grad_norm": 1.1183712241615666, "learning_rate": 1.366605104825518e-05, "loss": 1.5859, "step": 2991 }, { "epoch": 0.3992527355217507, "grad_norm": 1.2317478828162558, "learning_rate": 1.3662029706644834e-05, "loss": 1.633, "step": 2992 }, { "epoch": 0.3993861756071524, "grad_norm": 1.0317396851316578, "learning_rate": 1.3658007681009038e-05, "loss": 1.604, "step": 2993 }, { "epoch": 0.39951961569255406, "grad_norm": 1.0355968618370723, "learning_rate": 1.365398497209905e-05, "loss": 1.6271, "step": 2994 }, { "epoch": 0.3996530557779557, "grad_norm": 0.9930994963511364, "learning_rate": 1.3649961580666274e-05, "loss": 1.6196, "step": 2995 }, { "epoch": 0.39978649586335735, "grad_norm": 1.2269862375772342, "learning_rate": 1.364593750746223e-05, "loss": 1.6219, "step": 2996 }, { "epoch": 0.399919935948759, "grad_norm": 0.9700028478438667, "learning_rate": 1.3641912753238572e-05, "loss": 1.6564, "step": 2997 }, { "epoch": 0.40005337603416063, "grad_norm": 0.9545800135062432, "learning_rate": 1.3637887318747077e-05, "loss": 1.6116, "step": 2998 }, { "epoch": 0.4001868161195623, "grad_norm": 0.9570154971534128, "learning_rate": 1.3633861204739647e-05, "loss": 1.6712, "step": 2999 }, { "epoch": 0.400320256204964, "grad_norm": 1.2079133599715546, "learning_rate": 1.3629834411968317e-05, "loss": 1.6293, "step": 3000 }, { "epoch": 0.40045369629036565, "grad_norm": 0.9836480600801262, "learning_rate": 1.362580694118525e-05, "loss": 1.5987, "step": 3001 }, { "epoch": 0.40058713637576726, "grad_norm": 0.9910032459354412, "learning_rate": 1.3621778793142721e-05, "loss": 1.6064, "step": 3002 }, { "epoch": 0.40072057646116893, "grad_norm": 1.0967755414857783, "learning_rate": 1.3617749968593155e-05, "loss": 1.6292, "step": 3003 }, { "epoch": 0.4008540165465706, "grad_norm": 1.0003163865850948, "learning_rate": 1.3613720468289083e-05, "loss": 1.6362, "step": 3004 }, { "epoch": 0.40098745663197227, "grad_norm": 1.0315705480717272, "learning_rate": 1.3609690292983171e-05, "loss": 1.5829, "step": 3005 }, { "epoch": 0.4011208967173739, "grad_norm": 0.976139700381007, "learning_rate": 1.3605659443428208e-05, "loss": 1.6224, "step": 3006 }, { "epoch": 0.40125433680277556, "grad_norm": 0.9736571196522579, "learning_rate": 1.3601627920377114e-05, "loss": 1.5872, "step": 3007 }, { "epoch": 0.40138777688817723, "grad_norm": 1.0002994090288835, "learning_rate": 1.359759572458293e-05, "loss": 1.6186, "step": 3008 }, { "epoch": 0.40152121697357884, "grad_norm": 1.045079222117039, "learning_rate": 1.3593562856798828e-05, "loss": 1.5786, "step": 3009 }, { "epoch": 0.4016546570589805, "grad_norm": 0.9688502047059884, "learning_rate": 1.3589529317778097e-05, "loss": 1.6257, "step": 3010 }, { "epoch": 0.4017880971443822, "grad_norm": 1.0897993308963512, "learning_rate": 1.3585495108274155e-05, "loss": 1.6415, "step": 3011 }, { "epoch": 0.40192153722978385, "grad_norm": 1.024133361111547, "learning_rate": 1.3581460229040552e-05, "loss": 1.6256, "step": 3012 }, { "epoch": 0.40205497731518547, "grad_norm": 1.0901551656265192, "learning_rate": 1.3577424680830953e-05, "loss": 1.6085, "step": 3013 }, { "epoch": 0.40218841740058714, "grad_norm": 1.0074753288531135, "learning_rate": 1.3573388464399158e-05, "loss": 1.6159, "step": 3014 }, { "epoch": 0.4023218574859888, "grad_norm": 1.0808434730186502, "learning_rate": 1.3569351580499077e-05, "loss": 1.6081, "step": 3015 }, { "epoch": 0.4024552975713904, "grad_norm": 1.3390539579090508, "learning_rate": 1.3565314029884764e-05, "loss": 1.6476, "step": 3016 }, { "epoch": 0.4025887376567921, "grad_norm": 1.1325494027036593, "learning_rate": 1.356127581331038e-05, "loss": 1.6408, "step": 3017 }, { "epoch": 0.40272217774219377, "grad_norm": 1.1561856323405204, "learning_rate": 1.3557236931530223e-05, "loss": 1.6112, "step": 3018 }, { "epoch": 0.40285561782759544, "grad_norm": 1.0096598053584744, "learning_rate": 1.3553197385298704e-05, "loss": 1.6228, "step": 3019 }, { "epoch": 0.40298905791299705, "grad_norm": 0.9619782469364923, "learning_rate": 1.3549157175370374e-05, "loss": 1.6378, "step": 3020 }, { "epoch": 0.4031224979983987, "grad_norm": 1.0828538858146166, "learning_rate": 1.3545116302499888e-05, "loss": 1.6662, "step": 3021 }, { "epoch": 0.4032559380838004, "grad_norm": 1.1612286498560225, "learning_rate": 1.3541074767442039e-05, "loss": 1.6038, "step": 3022 }, { "epoch": 0.403389378169202, "grad_norm": 1.0469641391213618, "learning_rate": 1.3537032570951742e-05, "loss": 1.5947, "step": 3023 }, { "epoch": 0.4035228182546037, "grad_norm": 1.1061755741793078, "learning_rate": 1.353298971378403e-05, "loss": 1.541, "step": 3024 }, { "epoch": 0.40365625834000535, "grad_norm": 1.255991398258131, "learning_rate": 1.3528946196694067e-05, "loss": 1.6509, "step": 3025 }, { "epoch": 0.403789698425407, "grad_norm": 1.2128774118932317, "learning_rate": 1.352490202043713e-05, "loss": 1.6228, "step": 3026 }, { "epoch": 0.40392313851080863, "grad_norm": 0.9952887623145266, "learning_rate": 1.3520857185768627e-05, "loss": 1.647, "step": 3027 }, { "epoch": 0.4040565785962103, "grad_norm": 0.9867845852620436, "learning_rate": 1.3516811693444092e-05, "loss": 1.5929, "step": 3028 }, { "epoch": 0.404190018681612, "grad_norm": 1.0634687182480442, "learning_rate": 1.3512765544219168e-05, "loss": 1.6571, "step": 3029 }, { "epoch": 0.4043234587670136, "grad_norm": 0.9923107764534385, "learning_rate": 1.350871873884964e-05, "loss": 1.6509, "step": 3030 }, { "epoch": 0.40445689885241526, "grad_norm": 0.9418612013435159, "learning_rate": 1.3504671278091396e-05, "loss": 1.6459, "step": 3031 }, { "epoch": 0.40459033893781693, "grad_norm": 0.9743708699323149, "learning_rate": 1.3500623162700464e-05, "loss": 1.5773, "step": 3032 }, { "epoch": 0.4047237790232186, "grad_norm": 1.0021337250013098, "learning_rate": 1.3496574393432978e-05, "loss": 1.6072, "step": 3033 }, { "epoch": 0.4048572191086202, "grad_norm": 0.9905407512325389, "learning_rate": 1.3492524971045202e-05, "loss": 1.5955, "step": 3034 }, { "epoch": 0.4049906591940219, "grad_norm": 0.9829445518512612, "learning_rate": 1.3488474896293531e-05, "loss": 1.6473, "step": 3035 }, { "epoch": 0.40512409927942356, "grad_norm": 1.071551407117045, "learning_rate": 1.3484424169934465e-05, "loss": 1.6365, "step": 3036 }, { "epoch": 0.40525753936482517, "grad_norm": 1.0858770844558168, "learning_rate": 1.3480372792724636e-05, "loss": 1.5908, "step": 3037 }, { "epoch": 0.40539097945022684, "grad_norm": 1.097690642719979, "learning_rate": 1.3476320765420794e-05, "loss": 1.6341, "step": 3038 }, { "epoch": 0.4055244195356285, "grad_norm": 0.9550194281013512, "learning_rate": 1.3472268088779814e-05, "loss": 1.6558, "step": 3039 }, { "epoch": 0.4056578596210302, "grad_norm": 0.9831862531483857, "learning_rate": 1.3468214763558686e-05, "loss": 1.5536, "step": 3040 }, { "epoch": 0.4057912997064318, "grad_norm": 1.1319350865381137, "learning_rate": 1.3464160790514531e-05, "loss": 1.6586, "step": 3041 }, { "epoch": 0.40592473979183347, "grad_norm": 0.988139658682674, "learning_rate": 1.3460106170404579e-05, "loss": 1.5955, "step": 3042 }, { "epoch": 0.40605817987723514, "grad_norm": 1.0084348562604524, "learning_rate": 1.3456050903986189e-05, "loss": 1.6381, "step": 3043 }, { "epoch": 0.40619161996263675, "grad_norm": 1.0391432220005692, "learning_rate": 1.3451994992016839e-05, "loss": 1.5705, "step": 3044 }, { "epoch": 0.4063250600480384, "grad_norm": 0.9843503035286154, "learning_rate": 1.3447938435254127e-05, "loss": 1.5664, "step": 3045 }, { "epoch": 0.4064585001334401, "grad_norm": 0.9833405027126961, "learning_rate": 1.3443881234455772e-05, "loss": 1.6586, "step": 3046 }, { "epoch": 0.40659194021884176, "grad_norm": 0.9898970019998985, "learning_rate": 1.3439823390379609e-05, "loss": 1.6347, "step": 3047 }, { "epoch": 0.4067253803042434, "grad_norm": 1.0000597042602213, "learning_rate": 1.3435764903783605e-05, "loss": 1.6239, "step": 3048 }, { "epoch": 0.40685882038964505, "grad_norm": 0.9928896355998765, "learning_rate": 1.3431705775425835e-05, "loss": 1.5948, "step": 3049 }, { "epoch": 0.4069922604750467, "grad_norm": 1.2336188070965246, "learning_rate": 1.3427646006064492e-05, "loss": 1.5953, "step": 3050 }, { "epoch": 0.40712570056044833, "grad_norm": 1.0034026633786906, "learning_rate": 1.3423585596457906e-05, "loss": 1.6316, "step": 3051 }, { "epoch": 0.40725914064585, "grad_norm": 2.1481053784351336, "learning_rate": 1.3419524547364506e-05, "loss": 1.6295, "step": 3052 }, { "epoch": 0.4073925807312517, "grad_norm": 1.1897086059631425, "learning_rate": 1.3415462859542856e-05, "loss": 1.5635, "step": 3053 }, { "epoch": 0.40752602081665334, "grad_norm": 1.023418328395939, "learning_rate": 1.3411400533751628e-05, "loss": 1.6415, "step": 3054 }, { "epoch": 0.40765946090205496, "grad_norm": 0.98213814874442, "learning_rate": 1.3407337570749622e-05, "loss": 1.6428, "step": 3055 }, { "epoch": 0.40779290098745663, "grad_norm": 1.283169457369164, "learning_rate": 1.3403273971295749e-05, "loss": 1.6235, "step": 3056 }, { "epoch": 0.4079263410728583, "grad_norm": 0.9925266060021142, "learning_rate": 1.339920973614905e-05, "loss": 1.621, "step": 3057 }, { "epoch": 0.4080597811582599, "grad_norm": 0.9614672523532065, "learning_rate": 1.3395144866068673e-05, "loss": 1.5774, "step": 3058 }, { "epoch": 0.4081932212436616, "grad_norm": 0.9815628673629643, "learning_rate": 1.3391079361813888e-05, "loss": 1.5849, "step": 3059 }, { "epoch": 0.40832666132906326, "grad_norm": 1.0926187808935628, "learning_rate": 1.338701322414409e-05, "loss": 1.6139, "step": 3060 }, { "epoch": 0.4084601014144649, "grad_norm": 0.9687836831287908, "learning_rate": 1.3382946453818782e-05, "loss": 1.5821, "step": 3061 }, { "epoch": 0.40859354149986654, "grad_norm": 1.2615662453906389, "learning_rate": 1.3378879051597594e-05, "loss": 1.6187, "step": 3062 }, { "epoch": 0.4087269815852682, "grad_norm": 1.143934867223233, "learning_rate": 1.3374811018240268e-05, "loss": 1.6114, "step": 3063 }, { "epoch": 0.4088604216706699, "grad_norm": 0.9474085642193837, "learning_rate": 1.3370742354506667e-05, "loss": 1.6128, "step": 3064 }, { "epoch": 0.4089938617560715, "grad_norm": 0.9872797898071304, "learning_rate": 1.3366673061156775e-05, "loss": 1.6521, "step": 3065 }, { "epoch": 0.40912730184147317, "grad_norm": 1.3660459817420605, "learning_rate": 1.3362603138950681e-05, "loss": 1.5566, "step": 3066 }, { "epoch": 0.40926074192687484, "grad_norm": 1.0188587037661874, "learning_rate": 1.335853258864861e-05, "loss": 1.6181, "step": 3067 }, { "epoch": 0.4093941820122765, "grad_norm": 0.9730036168336966, "learning_rate": 1.3354461411010887e-05, "loss": 1.6294, "step": 3068 }, { "epoch": 0.4095276220976781, "grad_norm": 1.0088183488719165, "learning_rate": 1.3350389606797966e-05, "loss": 1.599, "step": 3069 }, { "epoch": 0.4096610621830798, "grad_norm": 0.9662028481495397, "learning_rate": 1.3346317176770409e-05, "loss": 1.6385, "step": 3070 }, { "epoch": 0.40979450226848146, "grad_norm": 0.9917563809796904, "learning_rate": 1.3342244121688905e-05, "loss": 1.6225, "step": 3071 }, { "epoch": 0.4099279423538831, "grad_norm": 0.9830125253461315, "learning_rate": 1.3338170442314254e-05, "loss": 1.6107, "step": 3072 }, { "epoch": 0.41006138243928475, "grad_norm": 0.9826012171089086, "learning_rate": 1.3334096139407368e-05, "loss": 1.5969, "step": 3073 }, { "epoch": 0.4101948225246864, "grad_norm": 1.3174009396986306, "learning_rate": 1.3330021213729283e-05, "loss": 1.5944, "step": 3074 }, { "epoch": 0.4103282626100881, "grad_norm": 0.971753779897976, "learning_rate": 1.332594566604115e-05, "loss": 1.6118, "step": 3075 }, { "epoch": 0.4104617026954897, "grad_norm": 0.9913292187671285, "learning_rate": 1.3321869497104233e-05, "loss": 1.6234, "step": 3076 }, { "epoch": 0.4105951427808914, "grad_norm": 1.1276395807046564, "learning_rate": 1.3317792707679915e-05, "loss": 1.6527, "step": 3077 }, { "epoch": 0.41072858286629305, "grad_norm": 0.9668291003462994, "learning_rate": 1.3313715298529697e-05, "loss": 1.6186, "step": 3078 }, { "epoch": 0.4108620229516947, "grad_norm": 0.995376867257694, "learning_rate": 1.3309637270415185e-05, "loss": 1.6667, "step": 3079 }, { "epoch": 0.41099546303709633, "grad_norm": 1.0129402139207564, "learning_rate": 1.3305558624098116e-05, "loss": 1.6623, "step": 3080 }, { "epoch": 0.411128903122498, "grad_norm": 1.1770451338395234, "learning_rate": 1.3301479360340329e-05, "loss": 1.6662, "step": 3081 }, { "epoch": 0.41126234320789967, "grad_norm": 1.0658383068003052, "learning_rate": 1.3297399479903787e-05, "loss": 1.6057, "step": 3082 }, { "epoch": 0.4113957832933013, "grad_norm": 1.1694647133067295, "learning_rate": 1.3293318983550563e-05, "loss": 1.6122, "step": 3083 }, { "epoch": 0.41152922337870296, "grad_norm": 0.942161713707869, "learning_rate": 1.3289237872042851e-05, "loss": 1.606, "step": 3084 }, { "epoch": 0.41166266346410463, "grad_norm": 0.9757692839225567, "learning_rate": 1.3285156146142954e-05, "loss": 1.6343, "step": 3085 }, { "epoch": 0.4117961035495063, "grad_norm": 0.9486089863227384, "learning_rate": 1.3281073806613289e-05, "loss": 1.5431, "step": 3086 }, { "epoch": 0.4119295436349079, "grad_norm": 0.9732494030371487, "learning_rate": 1.3276990854216396e-05, "loss": 1.6314, "step": 3087 }, { "epoch": 0.4120629837203096, "grad_norm": 1.1111387987861114, "learning_rate": 1.3272907289714918e-05, "loss": 1.6326, "step": 3088 }, { "epoch": 0.41219642380571125, "grad_norm": 0.9652044439453622, "learning_rate": 1.3268823113871627e-05, "loss": 1.6681, "step": 3089 }, { "epoch": 0.41232986389111287, "grad_norm": 0.9561565798940396, "learning_rate": 1.3264738327449389e-05, "loss": 1.5918, "step": 3090 }, { "epoch": 0.41246330397651454, "grad_norm": 1.3152845877647086, "learning_rate": 1.3260652931211207e-05, "loss": 1.607, "step": 3091 }, { "epoch": 0.4125967440619162, "grad_norm": 0.989537074682015, "learning_rate": 1.325656692592018e-05, "loss": 1.6011, "step": 3092 }, { "epoch": 0.4127301841473179, "grad_norm": 0.9734140085573458, "learning_rate": 1.3252480312339526e-05, "loss": 1.648, "step": 3093 }, { "epoch": 0.4128636242327195, "grad_norm": 0.9558460700959762, "learning_rate": 1.3248393091232583e-05, "loss": 1.5901, "step": 3094 }, { "epoch": 0.41299706431812117, "grad_norm": 0.9913733635852103, "learning_rate": 1.3244305263362796e-05, "loss": 1.6253, "step": 3095 }, { "epoch": 0.41313050440352284, "grad_norm": 1.1740036558110662, "learning_rate": 1.3240216829493722e-05, "loss": 1.5879, "step": 3096 }, { "epoch": 0.41326394448892445, "grad_norm": 1.1257399618437767, "learning_rate": 1.3236127790389036e-05, "loss": 1.617, "step": 3097 }, { "epoch": 0.4133973845743261, "grad_norm": 0.9756326130850466, "learning_rate": 1.323203814681252e-05, "loss": 1.596, "step": 3098 }, { "epoch": 0.4135308246597278, "grad_norm": 0.91654844990653, "learning_rate": 1.3227947899528081e-05, "loss": 1.5737, "step": 3099 }, { "epoch": 0.41366426474512946, "grad_norm": 1.0075079473682016, "learning_rate": 1.3223857049299724e-05, "loss": 1.5862, "step": 3100 }, { "epoch": 0.4137977048305311, "grad_norm": 1.0128738725898727, "learning_rate": 1.3219765596891576e-05, "loss": 1.6742, "step": 3101 }, { "epoch": 0.41393114491593275, "grad_norm": 4.5247192473377655, "learning_rate": 1.321567354306787e-05, "loss": 1.6274, "step": 3102 }, { "epoch": 0.4140645850013344, "grad_norm": 1.2025566547291768, "learning_rate": 1.3211580888592964e-05, "loss": 1.6561, "step": 3103 }, { "epoch": 0.41419802508673603, "grad_norm": 1.2057864440073822, "learning_rate": 1.3207487634231308e-05, "loss": 1.6046, "step": 3104 }, { "epoch": 0.4143314651721377, "grad_norm": 1.032603380515401, "learning_rate": 1.3203393780747482e-05, "loss": 1.6494, "step": 3105 }, { "epoch": 0.4144649052575394, "grad_norm": 0.984221055123714, "learning_rate": 1.3199299328906173e-05, "loss": 1.5627, "step": 3106 }, { "epoch": 0.41459834534294104, "grad_norm": 1.0080633070266374, "learning_rate": 1.3195204279472171e-05, "loss": 1.6455, "step": 3107 }, { "epoch": 0.41473178542834266, "grad_norm": 1.0172031612333243, "learning_rate": 1.319110863321039e-05, "loss": 1.6523, "step": 3108 }, { "epoch": 0.41486522551374433, "grad_norm": 0.9819246274039858, "learning_rate": 1.3187012390885844e-05, "loss": 1.6192, "step": 3109 }, { "epoch": 0.414998665599146, "grad_norm": 1.0218273012540824, "learning_rate": 1.3182915553263676e-05, "loss": 1.5381, "step": 3110 }, { "epoch": 0.4151321056845476, "grad_norm": 1.1870404877073484, "learning_rate": 1.3178818121109116e-05, "loss": 1.5987, "step": 3111 }, { "epoch": 0.4152655457699493, "grad_norm": 0.9665697428225051, "learning_rate": 1.3174720095187527e-05, "loss": 1.5904, "step": 3112 }, { "epoch": 0.41539898585535096, "grad_norm": 1.0024983274761543, "learning_rate": 1.3170621476264368e-05, "loss": 1.5948, "step": 3113 }, { "epoch": 0.4155324259407526, "grad_norm": 1.3283967395783622, "learning_rate": 1.3166522265105216e-05, "loss": 1.6199, "step": 3114 }, { "epoch": 0.41566586602615424, "grad_norm": 1.0606323988381896, "learning_rate": 1.3162422462475757e-05, "loss": 1.5634, "step": 3115 }, { "epoch": 0.4157993061115559, "grad_norm": 1.0009105274760155, "learning_rate": 1.3158322069141788e-05, "loss": 1.6536, "step": 3116 }, { "epoch": 0.4159327461969576, "grad_norm": 0.998531188875352, "learning_rate": 1.3154221085869215e-05, "loss": 1.569, "step": 3117 }, { "epoch": 0.4160661862823592, "grad_norm": 1.0318215900037766, "learning_rate": 1.3150119513424054e-05, "loss": 1.5927, "step": 3118 }, { "epoch": 0.41619962636776087, "grad_norm": 1.09023864908779, "learning_rate": 1.3146017352572435e-05, "loss": 1.588, "step": 3119 }, { "epoch": 0.41633306645316254, "grad_norm": 1.1300901695201984, "learning_rate": 1.3141914604080593e-05, "loss": 1.5848, "step": 3120 }, { "epoch": 0.4164665065385642, "grad_norm": 0.9765411604695532, "learning_rate": 1.3137811268714875e-05, "loss": 1.5884, "step": 3121 }, { "epoch": 0.4165999466239658, "grad_norm": 0.9917987982620767, "learning_rate": 1.3133707347241735e-05, "loss": 1.6489, "step": 3122 }, { "epoch": 0.4167333867093675, "grad_norm": 1.0549323113529163, "learning_rate": 1.3129602840427741e-05, "loss": 1.6262, "step": 3123 }, { "epoch": 0.41686682679476916, "grad_norm": 1.0110367235245075, "learning_rate": 1.3125497749039574e-05, "loss": 1.6089, "step": 3124 }, { "epoch": 0.4170002668801708, "grad_norm": 1.1439338907133554, "learning_rate": 1.312139207384401e-05, "loss": 1.5526, "step": 3125 }, { "epoch": 0.41713370696557245, "grad_norm": 1.2855762643940758, "learning_rate": 1.3117285815607943e-05, "loss": 1.6033, "step": 3126 }, { "epoch": 0.4172671470509741, "grad_norm": 1.1679452250656743, "learning_rate": 1.311317897509838e-05, "loss": 1.6454, "step": 3127 }, { "epoch": 0.4174005871363758, "grad_norm": 1.0069767600727495, "learning_rate": 1.3109071553082426e-05, "loss": 1.6761, "step": 3128 }, { "epoch": 0.4175340272217774, "grad_norm": 0.9913179933467611, "learning_rate": 1.3104963550327307e-05, "loss": 1.5818, "step": 3129 }, { "epoch": 0.4176674673071791, "grad_norm": 1.008746692632273, "learning_rate": 1.3100854967600346e-05, "loss": 1.5777, "step": 3130 }, { "epoch": 0.41780090739258074, "grad_norm": 1.1253948107175336, "learning_rate": 1.3096745805668985e-05, "loss": 1.6108, "step": 3131 }, { "epoch": 0.41793434747798236, "grad_norm": 1.0118014744129107, "learning_rate": 1.309263606530076e-05, "loss": 1.5923, "step": 3132 }, { "epoch": 0.41806778756338403, "grad_norm": 1.0052085925079868, "learning_rate": 1.3088525747263334e-05, "loss": 1.617, "step": 3133 }, { "epoch": 0.4182012276487857, "grad_norm": 1.028648913612512, "learning_rate": 1.308441485232446e-05, "loss": 1.5913, "step": 3134 }, { "epoch": 0.41833466773418737, "grad_norm": 0.9714092683461435, "learning_rate": 1.308030338125201e-05, "loss": 1.5615, "step": 3135 }, { "epoch": 0.418468107819589, "grad_norm": 0.9640330982395612, "learning_rate": 1.307619133481396e-05, "loss": 1.5793, "step": 3136 }, { "epoch": 0.41860154790499066, "grad_norm": 0.9654481412303503, "learning_rate": 1.3072078713778391e-05, "loss": 1.5752, "step": 3137 }, { "epoch": 0.4187349879903923, "grad_norm": 1.0550083483656565, "learning_rate": 1.3067965518913495e-05, "loss": 1.6224, "step": 3138 }, { "epoch": 0.41886842807579394, "grad_norm": 1.0212917389559222, "learning_rate": 1.3063851750987566e-05, "loss": 1.657, "step": 3139 }, { "epoch": 0.4190018681611956, "grad_norm": 7.254823593563437, "learning_rate": 1.305973741076902e-05, "loss": 1.6833, "step": 3140 }, { "epoch": 0.4191353082465973, "grad_norm": 1.192322814644137, "learning_rate": 1.3055622499026358e-05, "loss": 1.6178, "step": 3141 }, { "epoch": 0.41926874833199895, "grad_norm": 1.0727747759744528, "learning_rate": 1.3051507016528206e-05, "loss": 1.6121, "step": 3142 }, { "epoch": 0.41940218841740057, "grad_norm": 1.090188535676899, "learning_rate": 1.3047390964043282e-05, "loss": 1.6112, "step": 3143 }, { "epoch": 0.41953562850280224, "grad_norm": 1.0358843384129968, "learning_rate": 1.3043274342340426e-05, "loss": 1.6308, "step": 3144 }, { "epoch": 0.4196690685882039, "grad_norm": 1.0321088750194614, "learning_rate": 1.3039157152188569e-05, "loss": 1.6028, "step": 3145 }, { "epoch": 0.4198025086736055, "grad_norm": 0.9448257397915428, "learning_rate": 1.3035039394356761e-05, "loss": 1.5707, "step": 3146 }, { "epoch": 0.4199359487590072, "grad_norm": 1.1440735516549028, "learning_rate": 1.3030921069614145e-05, "loss": 1.6633, "step": 3147 }, { "epoch": 0.42006938884440886, "grad_norm": 1.690173809325389, "learning_rate": 1.3026802178729985e-05, "loss": 1.643, "step": 3148 }, { "epoch": 0.42020282892981053, "grad_norm": 0.9819001583539148, "learning_rate": 1.302268272247364e-05, "loss": 1.613, "step": 3149 }, { "epoch": 0.42033626901521215, "grad_norm": 1.2486781524480748, "learning_rate": 1.3018562701614572e-05, "loss": 1.6, "step": 3150 }, { "epoch": 0.4204697091006138, "grad_norm": 1.075695894476628, "learning_rate": 1.3014442116922363e-05, "loss": 1.6246, "step": 3151 }, { "epoch": 0.4206031491860155, "grad_norm": 1.0010043297382096, "learning_rate": 1.3010320969166688e-05, "loss": 1.6071, "step": 3152 }, { "epoch": 0.42073658927141716, "grad_norm": 1.0686067267812616, "learning_rate": 1.300619925911733e-05, "loss": 1.5974, "step": 3153 }, { "epoch": 0.4208700293568188, "grad_norm": 1.5326383589995518, "learning_rate": 1.3002076987544173e-05, "loss": 1.616, "step": 3154 }, { "epoch": 0.42100346944222045, "grad_norm": 1.1448204497310523, "learning_rate": 1.2997954155217216e-05, "loss": 1.6295, "step": 3155 }, { "epoch": 0.4211369095276221, "grad_norm": 1.0093147440921253, "learning_rate": 1.2993830762906558e-05, "loss": 1.5872, "step": 3156 }, { "epoch": 0.42127034961302373, "grad_norm": 1.1083677568054542, "learning_rate": 1.29897068113824e-05, "loss": 1.6149, "step": 3157 }, { "epoch": 0.4214037896984254, "grad_norm": 1.0172339428080068, "learning_rate": 1.2985582301415045e-05, "loss": 1.6115, "step": 3158 }, { "epoch": 0.42153722978382707, "grad_norm": 1.122382756050523, "learning_rate": 1.298145723377491e-05, "loss": 1.5786, "step": 3159 }, { "epoch": 0.42167066986922874, "grad_norm": 1.0389932694756445, "learning_rate": 1.2977331609232511e-05, "loss": 1.634, "step": 3160 }, { "epoch": 0.42180410995463036, "grad_norm": 1.0581673343897444, "learning_rate": 1.2973205428558461e-05, "loss": 1.5624, "step": 3161 }, { "epoch": 0.42193755004003203, "grad_norm": 1.7234834631306422, "learning_rate": 1.2969078692523491e-05, "loss": 1.6275, "step": 3162 }, { "epoch": 0.4220709901254337, "grad_norm": 0.9813583900737928, "learning_rate": 1.2964951401898427e-05, "loss": 1.6535, "step": 3163 }, { "epoch": 0.4222044302108353, "grad_norm": 0.9898306998533509, "learning_rate": 1.2960823557454196e-05, "loss": 1.6149, "step": 3164 }, { "epoch": 0.422337870296237, "grad_norm": 1.0720992088919126, "learning_rate": 1.2956695159961835e-05, "loss": 1.6335, "step": 3165 }, { "epoch": 0.42247131038163865, "grad_norm": 0.9836128501217848, "learning_rate": 1.2952566210192483e-05, "loss": 1.6103, "step": 3166 }, { "epoch": 0.4226047504670403, "grad_norm": 1.0276047259882335, "learning_rate": 1.2948436708917377e-05, "loss": 1.529, "step": 3167 }, { "epoch": 0.42273819055244194, "grad_norm": 0.9856021354742732, "learning_rate": 1.2944306656907863e-05, "loss": 1.6331, "step": 3168 }, { "epoch": 0.4228716306378436, "grad_norm": 1.102227555293798, "learning_rate": 1.2940176054935392e-05, "loss": 1.6351, "step": 3169 }, { "epoch": 0.4230050707232453, "grad_norm": 1.163021632169652, "learning_rate": 1.2936044903771507e-05, "loss": 1.6315, "step": 3170 }, { "epoch": 0.4231385108086469, "grad_norm": 0.9872219387832729, "learning_rate": 1.293191320418786e-05, "loss": 1.5893, "step": 3171 }, { "epoch": 0.42327195089404857, "grad_norm": 0.9582985814406623, "learning_rate": 1.2927780956956208e-05, "loss": 1.5824, "step": 3172 }, { "epoch": 0.42340539097945024, "grad_norm": 1.1512050311999436, "learning_rate": 1.2923648162848407e-05, "loss": 1.5659, "step": 3173 }, { "epoch": 0.4235388310648519, "grad_norm": 0.9359070452550783, "learning_rate": 1.2919514822636419e-05, "loss": 1.5615, "step": 3174 }, { "epoch": 0.4236722711502535, "grad_norm": 1.095401798316347, "learning_rate": 1.29153809370923e-05, "loss": 1.5976, "step": 3175 }, { "epoch": 0.4238057112356552, "grad_norm": 1.148887324591379, "learning_rate": 1.2911246506988215e-05, "loss": 1.6529, "step": 3176 }, { "epoch": 0.42393915132105686, "grad_norm": 1.045994466470146, "learning_rate": 1.2907111533096429e-05, "loss": 1.6055, "step": 3177 }, { "epoch": 0.4240725914064585, "grad_norm": 1.0087477247067256, "learning_rate": 1.2902976016189304e-05, "loss": 1.6546, "step": 3178 }, { "epoch": 0.42420603149186015, "grad_norm": 1.016393015662281, "learning_rate": 1.2898839957039313e-05, "loss": 1.6213, "step": 3179 }, { "epoch": 0.4243394715772618, "grad_norm": 1.0626699808769553, "learning_rate": 1.2894703356419023e-05, "loss": 1.5953, "step": 3180 }, { "epoch": 0.4244729116626635, "grad_norm": 1.055001415948082, "learning_rate": 1.2890566215101103e-05, "loss": 1.6129, "step": 3181 }, { "epoch": 0.4246063517480651, "grad_norm": 1.289178506303072, "learning_rate": 1.2886428533858323e-05, "loss": 1.5734, "step": 3182 }, { "epoch": 0.4247397918334668, "grad_norm": 0.9875166066103552, "learning_rate": 1.2882290313463561e-05, "loss": 1.5684, "step": 3183 }, { "epoch": 0.42487323191886844, "grad_norm": 0.9865955139060382, "learning_rate": 1.2878151554689779e-05, "loss": 1.611, "step": 3184 }, { "epoch": 0.42500667200427006, "grad_norm": 0.9518489443348546, "learning_rate": 1.287401225831006e-05, "loss": 1.6425, "step": 3185 }, { "epoch": 0.42514011208967173, "grad_norm": 0.9998924842921293, "learning_rate": 1.286987242509757e-05, "loss": 1.5908, "step": 3186 }, { "epoch": 0.4252735521750734, "grad_norm": 0.9623933406546137, "learning_rate": 1.2865732055825584e-05, "loss": 1.6013, "step": 3187 }, { "epoch": 0.42540699226047507, "grad_norm": 1.014360008466468, "learning_rate": 1.2861591151267483e-05, "loss": 1.6092, "step": 3188 }, { "epoch": 0.4255404323458767, "grad_norm": 1.0008036520235628, "learning_rate": 1.2857449712196733e-05, "loss": 1.6417, "step": 3189 }, { "epoch": 0.42567387243127836, "grad_norm": 1.0695928415515632, "learning_rate": 1.2853307739386908e-05, "loss": 1.6225, "step": 3190 }, { "epoch": 0.42580731251668, "grad_norm": 0.9505901957555256, "learning_rate": 1.2849165233611687e-05, "loss": 1.5678, "step": 3191 }, { "epoch": 0.42594075260208164, "grad_norm": 1.0097739379690966, "learning_rate": 1.2845022195644838e-05, "loss": 1.6279, "step": 3192 }, { "epoch": 0.4260741926874833, "grad_norm": 0.9745534806645423, "learning_rate": 1.2840878626260231e-05, "loss": 1.6465, "step": 3193 }, { "epoch": 0.426207632772885, "grad_norm": 0.9315201229811604, "learning_rate": 1.2836734526231844e-05, "loss": 1.518, "step": 3194 }, { "epoch": 0.42634107285828665, "grad_norm": 0.9736368620203836, "learning_rate": 1.2832589896333747e-05, "loss": 1.5971, "step": 3195 }, { "epoch": 0.42647451294368827, "grad_norm": 1.1002775590993574, "learning_rate": 1.2828444737340105e-05, "loss": 1.6011, "step": 3196 }, { "epoch": 0.42660795302908994, "grad_norm": 1.0076905774463565, "learning_rate": 1.282429905002519e-05, "loss": 1.6084, "step": 3197 }, { "epoch": 0.4267413931144916, "grad_norm": 0.9760408811448265, "learning_rate": 1.2820152835163366e-05, "loss": 1.5717, "step": 3198 }, { "epoch": 0.4268748331998932, "grad_norm": 0.9853900063619548, "learning_rate": 1.2816006093529106e-05, "loss": 1.6631, "step": 3199 }, { "epoch": 0.4270082732852949, "grad_norm": 1.2084382692384266, "learning_rate": 1.2811858825896965e-05, "loss": 1.6046, "step": 3200 }, { "epoch": 0.42714171337069656, "grad_norm": 0.9676385504121742, "learning_rate": 1.2807711033041613e-05, "loss": 1.6724, "step": 3201 }, { "epoch": 0.42727515345609823, "grad_norm": 0.9778031832163321, "learning_rate": 1.2803562715737802e-05, "loss": 1.5844, "step": 3202 }, { "epoch": 0.42740859354149985, "grad_norm": 0.954310934487862, "learning_rate": 1.2799413874760398e-05, "loss": 1.5669, "step": 3203 }, { "epoch": 0.4275420336269015, "grad_norm": 1.102491655832009, "learning_rate": 1.2795264510884357e-05, "loss": 1.5134, "step": 3204 }, { "epoch": 0.4276754737123032, "grad_norm": 1.1945300325637898, "learning_rate": 1.2791114624884728e-05, "loss": 1.6208, "step": 3205 }, { "epoch": 0.4278089137977048, "grad_norm": 1.0204751169748503, "learning_rate": 1.2786964217536666e-05, "loss": 1.5952, "step": 3206 }, { "epoch": 0.4279423538831065, "grad_norm": 1.2582481061790463, "learning_rate": 1.278281328961542e-05, "loss": 1.5961, "step": 3207 }, { "epoch": 0.42807579396850814, "grad_norm": 1.0191124850439586, "learning_rate": 1.2778661841896333e-05, "loss": 1.6123, "step": 3208 }, { "epoch": 0.4282092340539098, "grad_norm": 1.135529578537383, "learning_rate": 1.277450987515485e-05, "loss": 1.6442, "step": 3209 }, { "epoch": 0.42834267413931143, "grad_norm": 1.099285147964999, "learning_rate": 1.2770357390166513e-05, "loss": 1.6706, "step": 3210 }, { "epoch": 0.4284761142247131, "grad_norm": 1.017546212676164, "learning_rate": 1.2766204387706955e-05, "loss": 1.582, "step": 3211 }, { "epoch": 0.42860955431011477, "grad_norm": 1.1530348114411721, "learning_rate": 1.2762050868551913e-05, "loss": 1.6239, "step": 3212 }, { "epoch": 0.4287429943955164, "grad_norm": 0.9715880360042976, "learning_rate": 1.275789683347722e-05, "loss": 1.578, "step": 3213 }, { "epoch": 0.42887643448091806, "grad_norm": 0.9866979071658567, "learning_rate": 1.2753742283258793e-05, "loss": 1.6167, "step": 3214 }, { "epoch": 0.4290098745663197, "grad_norm": 0.9922836437350845, "learning_rate": 1.2749587218672663e-05, "loss": 1.6215, "step": 3215 }, { "epoch": 0.4291433146517214, "grad_norm": 1.2015902362721134, "learning_rate": 1.2745431640494944e-05, "loss": 1.6423, "step": 3216 }, { "epoch": 0.429276754737123, "grad_norm": 1.0161108583369185, "learning_rate": 1.2741275549501853e-05, "loss": 1.6025, "step": 3217 }, { "epoch": 0.4294101948225247, "grad_norm": 0.9630893697181409, "learning_rate": 1.2737118946469697e-05, "loss": 1.5888, "step": 3218 }, { "epoch": 0.42954363490792635, "grad_norm": 0.9590114370085036, "learning_rate": 1.2732961832174888e-05, "loss": 1.5735, "step": 3219 }, { "epoch": 0.429677074993328, "grad_norm": 0.9698052625279515, "learning_rate": 1.2728804207393925e-05, "loss": 1.5758, "step": 3220 }, { "epoch": 0.42981051507872964, "grad_norm": 1.1113879856734203, "learning_rate": 1.2724646072903403e-05, "loss": 1.5833, "step": 3221 }, { "epoch": 0.4299439551641313, "grad_norm": 0.9741250433041043, "learning_rate": 1.2720487429480017e-05, "loss": 1.604, "step": 3222 }, { "epoch": 0.430077395249533, "grad_norm": 1.0434937344586053, "learning_rate": 1.2716328277900553e-05, "loss": 1.6282, "step": 3223 }, { "epoch": 0.4302108353349346, "grad_norm": 1.218598876665266, "learning_rate": 1.2712168618941895e-05, "loss": 1.6405, "step": 3224 }, { "epoch": 0.43034427542033626, "grad_norm": 0.9274834745968594, "learning_rate": 1.2708008453381015e-05, "loss": 1.5594, "step": 3225 }, { "epoch": 0.43047771550573793, "grad_norm": 0.9572245478353469, "learning_rate": 1.2703847781994988e-05, "loss": 1.5985, "step": 3226 }, { "epoch": 0.4306111555911396, "grad_norm": 0.9804393710597245, "learning_rate": 1.2699686605560984e-05, "loss": 1.6098, "step": 3227 }, { "epoch": 0.4307445956765412, "grad_norm": 0.9465366010602443, "learning_rate": 1.2695524924856252e-05, "loss": 1.6262, "step": 3228 }, { "epoch": 0.4308780357619429, "grad_norm": 1.1417073930345842, "learning_rate": 1.2691362740658162e-05, "loss": 1.6277, "step": 3229 }, { "epoch": 0.43101147584734456, "grad_norm": 1.078579390525697, "learning_rate": 1.2687200053744148e-05, "loss": 1.5878, "step": 3230 }, { "epoch": 0.4311449159327462, "grad_norm": 0.9835105604076124, "learning_rate": 1.2683036864891762e-05, "loss": 1.585, "step": 3231 }, { "epoch": 0.43127835601814785, "grad_norm": 0.9942457861542611, "learning_rate": 1.2678873174878637e-05, "loss": 1.6226, "step": 3232 }, { "epoch": 0.4314117961035495, "grad_norm": 1.077188400186942, "learning_rate": 1.2674708984482503e-05, "loss": 1.617, "step": 3233 }, { "epoch": 0.4315452361889512, "grad_norm": 0.9450353505746655, "learning_rate": 1.2670544294481184e-05, "loss": 1.5642, "step": 3234 }, { "epoch": 0.4316786762743528, "grad_norm": 1.0397917929273337, "learning_rate": 1.2666379105652593e-05, "loss": 1.5236, "step": 3235 }, { "epoch": 0.43181211635975447, "grad_norm": 1.2967775820644207, "learning_rate": 1.2662213418774747e-05, "loss": 1.6222, "step": 3236 }, { "epoch": 0.43194555644515614, "grad_norm": 0.9953395285218007, "learning_rate": 1.2658047234625741e-05, "loss": 1.6417, "step": 3237 }, { "epoch": 0.43207899653055776, "grad_norm": 1.0148344019514997, "learning_rate": 1.2653880553983777e-05, "loss": 1.6523, "step": 3238 }, { "epoch": 0.4322124366159594, "grad_norm": 0.919347172987927, "learning_rate": 1.264971337762714e-05, "loss": 1.5869, "step": 3239 }, { "epoch": 0.4323458767013611, "grad_norm": 0.9647756161069322, "learning_rate": 1.2645545706334213e-05, "loss": 1.6183, "step": 3240 }, { "epoch": 0.43247931678676277, "grad_norm": 0.9857220961118699, "learning_rate": 1.2641377540883469e-05, "loss": 1.5573, "step": 3241 }, { "epoch": 0.4326127568721644, "grad_norm": 1.004715345407096, "learning_rate": 1.2637208882053469e-05, "loss": 1.5961, "step": 3242 }, { "epoch": 0.43274619695756605, "grad_norm": 0.951945556321612, "learning_rate": 1.2633039730622883e-05, "loss": 1.5411, "step": 3243 }, { "epoch": 0.4328796370429677, "grad_norm": 0.9907591493074744, "learning_rate": 1.2628870087370446e-05, "loss": 1.5953, "step": 3244 }, { "epoch": 0.43301307712836934, "grad_norm": 0.9807352408063607, "learning_rate": 1.2624699953075015e-05, "loss": 1.5794, "step": 3245 }, { "epoch": 0.433146517213771, "grad_norm": 1.3426423736461461, "learning_rate": 1.262052932851551e-05, "loss": 1.6409, "step": 3246 }, { "epoch": 0.4332799572991727, "grad_norm": 0.9714093568492226, "learning_rate": 1.2616358214470967e-05, "loss": 1.5626, "step": 3247 }, { "epoch": 0.43341339738457435, "grad_norm": 1.2115238526991772, "learning_rate": 1.2612186611720494e-05, "loss": 1.5797, "step": 3248 }, { "epoch": 0.43354683746997597, "grad_norm": 1.1075061582793597, "learning_rate": 1.2608014521043305e-05, "loss": 1.6192, "step": 3249 }, { "epoch": 0.43368027755537764, "grad_norm": 0.9318065689517002, "learning_rate": 1.2603841943218695e-05, "loss": 1.5508, "step": 3250 }, { "epoch": 0.4338137176407793, "grad_norm": 0.9342106979423349, "learning_rate": 1.2599668879026057e-05, "loss": 1.5932, "step": 3251 }, { "epoch": 0.4339471577261809, "grad_norm": 0.9881294610347703, "learning_rate": 1.259549532924487e-05, "loss": 1.6374, "step": 3252 }, { "epoch": 0.4340805978115826, "grad_norm": 0.9842232409667695, "learning_rate": 1.259132129465471e-05, "loss": 1.5709, "step": 3253 }, { "epoch": 0.43421403789698426, "grad_norm": 1.0325015498789856, "learning_rate": 1.2587146776035233e-05, "loss": 1.6679, "step": 3254 }, { "epoch": 0.43434747798238593, "grad_norm": 0.9483677954324838, "learning_rate": 1.2582971774166195e-05, "loss": 1.6129, "step": 3255 }, { "epoch": 0.43448091806778755, "grad_norm": 1.0038948510108203, "learning_rate": 1.2578796289827437e-05, "loss": 1.639, "step": 3256 }, { "epoch": 0.4346143581531892, "grad_norm": 1.0870982465609627, "learning_rate": 1.2574620323798891e-05, "loss": 1.559, "step": 3257 }, { "epoch": 0.4347477982385909, "grad_norm": 1.1919615794250933, "learning_rate": 1.257044387686058e-05, "loss": 1.5857, "step": 3258 }, { "epoch": 0.4348812383239925, "grad_norm": 2.641937471598561, "learning_rate": 1.2566266949792625e-05, "loss": 1.5931, "step": 3259 }, { "epoch": 0.4350146784093942, "grad_norm": 0.9743834744800941, "learning_rate": 1.2562089543375215e-05, "loss": 1.6235, "step": 3260 }, { "epoch": 0.43514811849479584, "grad_norm": 1.1785303574207608, "learning_rate": 1.2557911658388655e-05, "loss": 1.6243, "step": 3261 }, { "epoch": 0.4352815585801975, "grad_norm": 1.3537174009151587, "learning_rate": 1.2553733295613314e-05, "loss": 1.5904, "step": 3262 }, { "epoch": 0.43541499866559913, "grad_norm": 1.0054913667026213, "learning_rate": 1.2549554455829676e-05, "loss": 1.5623, "step": 3263 }, { "epoch": 0.4355484387510008, "grad_norm": 1.1821789472170066, "learning_rate": 1.2545375139818287e-05, "loss": 1.5667, "step": 3264 }, { "epoch": 0.43568187883640247, "grad_norm": 1.0060542893441342, "learning_rate": 1.2541195348359805e-05, "loss": 1.6021, "step": 3265 }, { "epoch": 0.4358153189218041, "grad_norm": 0.9774430570620395, "learning_rate": 1.2537015082234963e-05, "loss": 1.6073, "step": 3266 }, { "epoch": 0.43594875900720576, "grad_norm": 1.031179584529375, "learning_rate": 1.253283434222459e-05, "loss": 1.6927, "step": 3267 }, { "epoch": 0.4360821990926074, "grad_norm": 1.66539793901367, "learning_rate": 1.2528653129109597e-05, "loss": 1.6507, "step": 3268 }, { "epoch": 0.4362156391780091, "grad_norm": 1.0734574677292559, "learning_rate": 1.2524471443670992e-05, "loss": 1.6163, "step": 3269 }, { "epoch": 0.4363490792634107, "grad_norm": 1.0252180431657283, "learning_rate": 1.2520289286689864e-05, "loss": 1.6268, "step": 3270 }, { "epoch": 0.4364825193488124, "grad_norm": 1.0638123845106042, "learning_rate": 1.2516106658947389e-05, "loss": 1.5639, "step": 3271 }, { "epoch": 0.43661595943421405, "grad_norm": 0.9457091191118885, "learning_rate": 1.251192356122484e-05, "loss": 1.6036, "step": 3272 }, { "epoch": 0.43674939951961567, "grad_norm": 0.9937359629999882, "learning_rate": 1.2507739994303564e-05, "loss": 1.6365, "step": 3273 }, { "epoch": 0.43688283960501734, "grad_norm": 1.1942032412216412, "learning_rate": 1.2503555958965014e-05, "loss": 1.6593, "step": 3274 }, { "epoch": 0.437016279690419, "grad_norm": 1.0002882913147546, "learning_rate": 1.2499371455990714e-05, "loss": 1.5772, "step": 3275 }, { "epoch": 0.4371497197758207, "grad_norm": 0.9629918020750007, "learning_rate": 1.2495186486162284e-05, "loss": 1.5488, "step": 3276 }, { "epoch": 0.4372831598612223, "grad_norm": 0.9857190069244037, "learning_rate": 1.2491001050261425e-05, "loss": 1.6248, "step": 3277 }, { "epoch": 0.43741659994662396, "grad_norm": 1.099517425168157, "learning_rate": 1.2486815149069928e-05, "loss": 1.5877, "step": 3278 }, { "epoch": 0.43755004003202563, "grad_norm": 1.1165501715980635, "learning_rate": 1.248262878336968e-05, "loss": 1.6246, "step": 3279 }, { "epoch": 0.43768348011742725, "grad_norm": 1.2759947966412557, "learning_rate": 1.2478441953942637e-05, "loss": 1.579, "step": 3280 }, { "epoch": 0.4378169202028289, "grad_norm": 1.042737881072034, "learning_rate": 1.2474254661570858e-05, "loss": 1.6238, "step": 3281 }, { "epoch": 0.4379503602882306, "grad_norm": 0.9997308223160407, "learning_rate": 1.2470066907036475e-05, "loss": 1.6247, "step": 3282 }, { "epoch": 0.43808380037363226, "grad_norm": 0.9873336853234015, "learning_rate": 1.2465878691121717e-05, "loss": 1.594, "step": 3283 }, { "epoch": 0.4382172404590339, "grad_norm": 1.073937141396183, "learning_rate": 1.2461690014608898e-05, "loss": 1.5442, "step": 3284 }, { "epoch": 0.43835068054443554, "grad_norm": 1.066440340364437, "learning_rate": 1.2457500878280408e-05, "loss": 1.5947, "step": 3285 }, { "epoch": 0.4384841206298372, "grad_norm": 0.9823058406854864, "learning_rate": 1.2453311282918738e-05, "loss": 1.5988, "step": 3286 }, { "epoch": 0.43861756071523883, "grad_norm": 0.9529342446435208, "learning_rate": 1.2449121229306449e-05, "loss": 1.5879, "step": 3287 }, { "epoch": 0.4387510008006405, "grad_norm": 1.2995317730664497, "learning_rate": 1.2444930718226201e-05, "loss": 1.6078, "step": 3288 }, { "epoch": 0.43888444088604217, "grad_norm": 1.0273219898248251, "learning_rate": 1.2440739750460728e-05, "loss": 1.5873, "step": 3289 }, { "epoch": 0.43901788097144384, "grad_norm": 0.9613704608116244, "learning_rate": 1.2436548326792858e-05, "loss": 1.5971, "step": 3290 }, { "epoch": 0.43915132105684546, "grad_norm": 0.9741621574671587, "learning_rate": 1.2432356448005507e-05, "loss": 1.5664, "step": 3291 }, { "epoch": 0.4392847611422471, "grad_norm": 0.9421233395790419, "learning_rate": 1.2428164114881663e-05, "loss": 1.6159, "step": 3292 }, { "epoch": 0.4394182012276488, "grad_norm": 0.9714375843061244, "learning_rate": 1.2423971328204407e-05, "loss": 1.5948, "step": 3293 }, { "epoch": 0.43955164131305047, "grad_norm": 0.9853929138462288, "learning_rate": 1.2419778088756904e-05, "loss": 1.6295, "step": 3294 }, { "epoch": 0.4396850813984521, "grad_norm": 0.9658869477722872, "learning_rate": 1.2415584397322406e-05, "loss": 1.6026, "step": 3295 }, { "epoch": 0.43981852148385375, "grad_norm": 0.9824798838337809, "learning_rate": 1.2411390254684246e-05, "loss": 1.6511, "step": 3296 }, { "epoch": 0.4399519615692554, "grad_norm": 1.1148315528827775, "learning_rate": 1.2407195661625838e-05, "loss": 1.6555, "step": 3297 }, { "epoch": 0.44008540165465704, "grad_norm": 0.9935944482329716, "learning_rate": 1.240300061893069e-05, "loss": 1.5694, "step": 3298 }, { "epoch": 0.4402188417400587, "grad_norm": 1.0104396213439037, "learning_rate": 1.2398805127382382e-05, "loss": 1.6266, "step": 3299 }, { "epoch": 0.4403522818254604, "grad_norm": 0.9406209717223495, "learning_rate": 1.2394609187764593e-05, "loss": 1.6403, "step": 3300 }, { "epoch": 0.44048572191086205, "grad_norm": 0.9669078454658077, "learning_rate": 1.2390412800861066e-05, "loss": 1.614, "step": 3301 }, { "epoch": 0.44061916199626366, "grad_norm": 0.9556156862945018, "learning_rate": 1.2386215967455648e-05, "loss": 1.5857, "step": 3302 }, { "epoch": 0.44075260208166533, "grad_norm": 0.9777122703402282, "learning_rate": 1.2382018688332251e-05, "loss": 1.6382, "step": 3303 }, { "epoch": 0.440886042167067, "grad_norm": 0.9515365211823855, "learning_rate": 1.2377820964274887e-05, "loss": 1.5879, "step": 3304 }, { "epoch": 0.4410194822524686, "grad_norm": 0.9807886665234027, "learning_rate": 1.2373622796067637e-05, "loss": 1.6029, "step": 3305 }, { "epoch": 0.4411529223378703, "grad_norm": 0.9635105279990112, "learning_rate": 1.2369424184494673e-05, "loss": 1.5992, "step": 3306 }, { "epoch": 0.44128636242327196, "grad_norm": 0.9665429167972764, "learning_rate": 1.236522513034025e-05, "loss": 1.5908, "step": 3307 }, { "epoch": 0.44141980250867363, "grad_norm": 1.0554959304610874, "learning_rate": 1.2361025634388701e-05, "loss": 1.6039, "step": 3308 }, { "epoch": 0.44155324259407525, "grad_norm": 1.206338234294267, "learning_rate": 1.2356825697424449e-05, "loss": 1.6125, "step": 3309 }, { "epoch": 0.4416866826794769, "grad_norm": 0.9701936211446273, "learning_rate": 1.2352625320231984e-05, "loss": 1.5516, "step": 3310 }, { "epoch": 0.4418201227648786, "grad_norm": 1.011429585012499, "learning_rate": 1.2348424503595898e-05, "loss": 1.6186, "step": 3311 }, { "epoch": 0.4419535628502802, "grad_norm": 1.1168012771237403, "learning_rate": 1.234422324830085e-05, "loss": 1.565, "step": 3312 }, { "epoch": 0.44208700293568187, "grad_norm": 1.0738983437774423, "learning_rate": 1.2340021555131592e-05, "loss": 1.6279, "step": 3313 }, { "epoch": 0.44222044302108354, "grad_norm": 0.9987855843220456, "learning_rate": 1.2335819424872948e-05, "loss": 1.5884, "step": 3314 }, { "epoch": 0.4423538831064852, "grad_norm": 1.1047058977235424, "learning_rate": 1.233161685830983e-05, "loss": 1.6259, "step": 3315 }, { "epoch": 0.4424873231918868, "grad_norm": 12.94394608036084, "learning_rate": 1.2327413856227231e-05, "loss": 1.6551, "step": 3316 }, { "epoch": 0.4426207632772885, "grad_norm": 0.9922991075854602, "learning_rate": 1.232321041941022e-05, "loss": 1.6057, "step": 3317 }, { "epoch": 0.44275420336269017, "grad_norm": 1.5832757455395445, "learning_rate": 1.2319006548643955e-05, "loss": 1.5688, "step": 3318 }, { "epoch": 0.4428876434480918, "grad_norm": 1.0990617479523308, "learning_rate": 1.2314802244713671e-05, "loss": 1.6046, "step": 3319 }, { "epoch": 0.44302108353349345, "grad_norm": 1.1847275698751005, "learning_rate": 1.2310597508404683e-05, "loss": 1.5834, "step": 3320 }, { "epoch": 0.4431545236188951, "grad_norm": 1.044830968363967, "learning_rate": 1.2306392340502382e-05, "loss": 1.5843, "step": 3321 }, { "epoch": 0.4432879637042968, "grad_norm": 1.2604211119820607, "learning_rate": 1.2302186741792255e-05, "loss": 1.6076, "step": 3322 }, { "epoch": 0.4434214037896984, "grad_norm": 1.041133538088134, "learning_rate": 1.2297980713059857e-05, "loss": 1.6237, "step": 3323 }, { "epoch": 0.4435548438751001, "grad_norm": 1.4649823119870107, "learning_rate": 1.229377425509082e-05, "loss": 1.6073, "step": 3324 }, { "epoch": 0.44368828396050175, "grad_norm": 0.9934245058649951, "learning_rate": 1.2289567368670873e-05, "loss": 1.5818, "step": 3325 }, { "epoch": 0.44382172404590337, "grad_norm": 0.9664114510742975, "learning_rate": 1.2285360054585807e-05, "loss": 1.644, "step": 3326 }, { "epoch": 0.44395516413130504, "grad_norm": 1.015993424077106, "learning_rate": 1.2281152313621505e-05, "loss": 1.5808, "step": 3327 }, { "epoch": 0.4440886042167067, "grad_norm": 1.0461329594907725, "learning_rate": 1.2276944146563918e-05, "loss": 1.6381, "step": 3328 }, { "epoch": 0.4442220443021084, "grad_norm": 1.0135143248090914, "learning_rate": 1.2272735554199091e-05, "loss": 1.577, "step": 3329 }, { "epoch": 0.44435548438751, "grad_norm": 1.0070141228550389, "learning_rate": 1.2268526537313142e-05, "loss": 1.5906, "step": 3330 }, { "epoch": 0.44448892447291166, "grad_norm": 0.9761989502121017, "learning_rate": 1.2264317096692257e-05, "loss": 1.5424, "step": 3331 }, { "epoch": 0.44462236455831333, "grad_norm": 0.9755643448669162, "learning_rate": 1.2260107233122724e-05, "loss": 1.621, "step": 3332 }, { "epoch": 0.44475580464371495, "grad_norm": 1.1020152124691254, "learning_rate": 1.2255896947390891e-05, "loss": 1.5605, "step": 3333 }, { "epoch": 0.4448892447291166, "grad_norm": 1.3925175705055546, "learning_rate": 1.2251686240283191e-05, "loss": 1.6224, "step": 3334 }, { "epoch": 0.4450226848145183, "grad_norm": 1.0243447818889067, "learning_rate": 1.224747511258614e-05, "loss": 1.6326, "step": 3335 }, { "epoch": 0.44515612489991996, "grad_norm": 1.064152850059643, "learning_rate": 1.2243263565086325e-05, "loss": 1.5844, "step": 3336 }, { "epoch": 0.4452895649853216, "grad_norm": 1.0995677570425262, "learning_rate": 1.2239051598570417e-05, "loss": 1.6008, "step": 3337 }, { "epoch": 0.44542300507072324, "grad_norm": 0.9935950777647963, "learning_rate": 1.2234839213825163e-05, "loss": 1.6462, "step": 3338 }, { "epoch": 0.4455564451561249, "grad_norm": 0.9919635923562781, "learning_rate": 1.2230626411637388e-05, "loss": 1.6029, "step": 3339 }, { "epoch": 0.44568988524152653, "grad_norm": 1.3321534487303814, "learning_rate": 1.2226413192793998e-05, "loss": 1.6439, "step": 3340 }, { "epoch": 0.4458233253269282, "grad_norm": 1.0388019535817785, "learning_rate": 1.222219955808197e-05, "loss": 1.6172, "step": 3341 }, { "epoch": 0.44595676541232987, "grad_norm": 0.9948371593169276, "learning_rate": 1.2217985508288366e-05, "loss": 1.5684, "step": 3342 }, { "epoch": 0.44609020549773154, "grad_norm": 0.9566696947998643, "learning_rate": 1.2213771044200323e-05, "loss": 1.5894, "step": 3343 }, { "epoch": 0.44622364558313315, "grad_norm": 1.0144760346896669, "learning_rate": 1.220955616660505e-05, "loss": 1.6187, "step": 3344 }, { "epoch": 0.4463570856685348, "grad_norm": 0.9783013624017898, "learning_rate": 1.2205340876289842e-05, "loss": 1.5798, "step": 3345 }, { "epoch": 0.4464905257539365, "grad_norm": 1.1775470120965925, "learning_rate": 1.220112517404207e-05, "loss": 1.6027, "step": 3346 }, { "epoch": 0.4466239658393381, "grad_norm": 1.0832460341808767, "learning_rate": 1.2196909060649173e-05, "loss": 1.6384, "step": 3347 }, { "epoch": 0.4467574059247398, "grad_norm": 1.5868021023209082, "learning_rate": 1.219269253689868e-05, "loss": 1.5758, "step": 3348 }, { "epoch": 0.44689084601014145, "grad_norm": 1.1241637872673438, "learning_rate": 1.2188475603578186e-05, "loss": 1.5948, "step": 3349 }, { "epoch": 0.4470242860955431, "grad_norm": 0.9779424708497318, "learning_rate": 1.2184258261475364e-05, "loss": 1.6002, "step": 3350 }, { "epoch": 0.44715772618094474, "grad_norm": 0.986774564760355, "learning_rate": 1.2180040511377966e-05, "loss": 1.5651, "step": 3351 }, { "epoch": 0.4472911662663464, "grad_norm": 0.9605234817512258, "learning_rate": 1.2175822354073826e-05, "loss": 1.5525, "step": 3352 }, { "epoch": 0.4474246063517481, "grad_norm": 0.9492182807666106, "learning_rate": 1.2171603790350836e-05, "loss": 1.5657, "step": 3353 }, { "epoch": 0.4475580464371497, "grad_norm": 1.004022400441443, "learning_rate": 1.2167384820996988e-05, "loss": 1.5807, "step": 3354 }, { "epoch": 0.44769148652255136, "grad_norm": 0.9732333755288013, "learning_rate": 1.2163165446800332e-05, "loss": 1.6453, "step": 3355 }, { "epoch": 0.44782492660795303, "grad_norm": 1.0036906910118235, "learning_rate": 1.2158945668548997e-05, "loss": 1.6128, "step": 3356 }, { "epoch": 0.4479583666933547, "grad_norm": 1.0085064642711343, "learning_rate": 1.215472548703119e-05, "loss": 1.613, "step": 3357 }, { "epoch": 0.4480918067787563, "grad_norm": 1.2764967944347796, "learning_rate": 1.2150504903035196e-05, "loss": 1.5606, "step": 3358 }, { "epoch": 0.448225246864158, "grad_norm": 0.995504684910624, "learning_rate": 1.2146283917349373e-05, "loss": 1.6166, "step": 3359 }, { "epoch": 0.44835868694955966, "grad_norm": 0.9568604455855414, "learning_rate": 1.214206253076215e-05, "loss": 1.5966, "step": 3360 }, { "epoch": 0.44849212703496133, "grad_norm": 1.1039131846221917, "learning_rate": 1.2137840744062032e-05, "loss": 1.5601, "step": 3361 }, { "epoch": 0.44862556712036294, "grad_norm": 1.005103856197314, "learning_rate": 1.2133618558037607e-05, "loss": 1.5586, "step": 3362 }, { "epoch": 0.4487590072057646, "grad_norm": 0.9893281059878032, "learning_rate": 1.2129395973477522e-05, "loss": 1.664, "step": 3363 }, { "epoch": 0.4488924472911663, "grad_norm": 1.3408090451344328, "learning_rate": 1.212517299117052e-05, "loss": 1.5822, "step": 3364 }, { "epoch": 0.4490258873765679, "grad_norm": 1.1012723616514362, "learning_rate": 1.2120949611905393e-05, "loss": 1.5357, "step": 3365 }, { "epoch": 0.44915932746196957, "grad_norm": 1.1116159284883955, "learning_rate": 1.2116725836471031e-05, "loss": 1.5744, "step": 3366 }, { "epoch": 0.44929276754737124, "grad_norm": 0.9861652273388498, "learning_rate": 1.211250166565638e-05, "loss": 1.6247, "step": 3367 }, { "epoch": 0.4494262076327729, "grad_norm": 1.1947078047934485, "learning_rate": 1.2108277100250472e-05, "loss": 1.6308, "step": 3368 }, { "epoch": 0.4495596477181745, "grad_norm": 0.946296734521657, "learning_rate": 1.2104052141042402e-05, "loss": 1.5815, "step": 3369 }, { "epoch": 0.4496930878035762, "grad_norm": 0.9521803217831275, "learning_rate": 1.2099826788821347e-05, "loss": 1.6103, "step": 3370 }, { "epoch": 0.44982652788897787, "grad_norm": 0.9822551188681323, "learning_rate": 1.2095601044376558e-05, "loss": 1.5977, "step": 3371 }, { "epoch": 0.4499599679743795, "grad_norm": 0.9614883569465209, "learning_rate": 1.2091374908497352e-05, "loss": 1.5943, "step": 3372 }, { "epoch": 0.45009340805978115, "grad_norm": 1.0694103742791419, "learning_rate": 1.2087148381973126e-05, "loss": 1.6066, "step": 3373 }, { "epoch": 0.4502268481451828, "grad_norm": 1.1709430242560819, "learning_rate": 1.2082921465593345e-05, "loss": 1.6409, "step": 3374 }, { "epoch": 0.4503602882305845, "grad_norm": 0.9918589860941452, "learning_rate": 1.2078694160147549e-05, "loss": 1.5869, "step": 3375 }, { "epoch": 0.4504937283159861, "grad_norm": 1.189016080561096, "learning_rate": 1.2074466466425348e-05, "loss": 1.6317, "step": 3376 }, { "epoch": 0.4506271684013878, "grad_norm": 1.0693280010712716, "learning_rate": 1.2070238385216431e-05, "loss": 1.5298, "step": 3377 }, { "epoch": 0.45076060848678945, "grad_norm": 0.9802287723386353, "learning_rate": 1.2066009917310557e-05, "loss": 1.5671, "step": 3378 }, { "epoch": 0.45089404857219106, "grad_norm": 1.0001980093487308, "learning_rate": 1.2061781063497549e-05, "loss": 1.57, "step": 3379 }, { "epoch": 0.45102748865759273, "grad_norm": 1.1240347451070862, "learning_rate": 1.2057551824567315e-05, "loss": 1.6214, "step": 3380 }, { "epoch": 0.4511609287429944, "grad_norm": 1.016259709659121, "learning_rate": 1.2053322201309827e-05, "loss": 1.5418, "step": 3381 }, { "epoch": 0.4512943688283961, "grad_norm": 0.9810480790681763, "learning_rate": 1.2049092194515129e-05, "loss": 1.5886, "step": 3382 }, { "epoch": 0.4514278089137977, "grad_norm": 1.0999439267701696, "learning_rate": 1.2044861804973339e-05, "loss": 1.6381, "step": 3383 }, { "epoch": 0.45156124899919936, "grad_norm": 0.9599654396714106, "learning_rate": 1.2040631033474645e-05, "loss": 1.5533, "step": 3384 }, { "epoch": 0.45169468908460103, "grad_norm": 0.9466851026826987, "learning_rate": 1.2036399880809307e-05, "loss": 1.5944, "step": 3385 }, { "epoch": 0.45182812917000265, "grad_norm": 0.9678219133269332, "learning_rate": 1.2032168347767656e-05, "loss": 1.6415, "step": 3386 }, { "epoch": 0.4519615692554043, "grad_norm": 1.0940832917339585, "learning_rate": 1.2027936435140097e-05, "loss": 1.5896, "step": 3387 }, { "epoch": 0.452095009340806, "grad_norm": 1.352412592314921, "learning_rate": 1.2023704143717099e-05, "loss": 1.5859, "step": 3388 }, { "epoch": 0.45222844942620766, "grad_norm": 1.0442749639629683, "learning_rate": 1.2019471474289209e-05, "loss": 1.603, "step": 3389 }, { "epoch": 0.45236188951160927, "grad_norm": 1.1089040085027504, "learning_rate": 1.2015238427647039e-05, "loss": 1.5855, "step": 3390 }, { "epoch": 0.45249532959701094, "grad_norm": 1.172896894033313, "learning_rate": 1.2011005004581275e-05, "loss": 1.5967, "step": 3391 }, { "epoch": 0.4526287696824126, "grad_norm": 1.0422201350270612, "learning_rate": 1.2006771205882673e-05, "loss": 1.588, "step": 3392 }, { "epoch": 0.4527622097678142, "grad_norm": 0.9833998073404502, "learning_rate": 1.2002537032342054e-05, "loss": 1.6304, "step": 3393 }, { "epoch": 0.4528956498532159, "grad_norm": 1.1092253938233356, "learning_rate": 1.1998302484750322e-05, "loss": 1.6304, "step": 3394 }, { "epoch": 0.45302908993861757, "grad_norm": 1.0958444605435562, "learning_rate": 1.1994067563898435e-05, "loss": 1.5521, "step": 3395 }, { "epoch": 0.45316253002401924, "grad_norm": 0.9941289195859265, "learning_rate": 1.1989832270577432e-05, "loss": 1.5917, "step": 3396 }, { "epoch": 0.45329597010942085, "grad_norm": 1.0144543411061597, "learning_rate": 1.1985596605578413e-05, "loss": 1.5596, "step": 3397 }, { "epoch": 0.4534294101948225, "grad_norm": 0.9660753780508365, "learning_rate": 1.198136056969256e-05, "loss": 1.5673, "step": 3398 }, { "epoch": 0.4535628502802242, "grad_norm": 1.1410534595473867, "learning_rate": 1.1977124163711108e-05, "loss": 1.6167, "step": 3399 }, { "epoch": 0.4536962903656258, "grad_norm": 0.9906940152019744, "learning_rate": 1.1972887388425374e-05, "loss": 1.5943, "step": 3400 }, { "epoch": 0.4538297304510275, "grad_norm": 1.1280922031572023, "learning_rate": 1.1968650244626733e-05, "loss": 1.6018, "step": 3401 }, { "epoch": 0.45396317053642915, "grad_norm": 1.2472440687519395, "learning_rate": 1.1964412733106648e-05, "loss": 1.5388, "step": 3402 }, { "epoch": 0.4540966106218308, "grad_norm": 1.0569580420732267, "learning_rate": 1.1960174854656623e-05, "loss": 1.5699, "step": 3403 }, { "epoch": 0.45423005070723244, "grad_norm": 1.0325493795551215, "learning_rate": 1.1955936610068257e-05, "loss": 1.6212, "step": 3404 }, { "epoch": 0.4543634907926341, "grad_norm": 0.9811755437676621, "learning_rate": 1.1951698000133203e-05, "loss": 1.6113, "step": 3405 }, { "epoch": 0.4544969308780358, "grad_norm": 1.2389436209658955, "learning_rate": 1.1947459025643177e-05, "loss": 1.6252, "step": 3406 }, { "epoch": 0.4546303709634374, "grad_norm": 1.0656924772924845, "learning_rate": 1.1943219687389984e-05, "loss": 1.6018, "step": 3407 }, { "epoch": 0.45476381104883906, "grad_norm": 1.1894263785585688, "learning_rate": 1.1938979986165476e-05, "loss": 1.6683, "step": 3408 }, { "epoch": 0.45489725113424073, "grad_norm": 0.9518835451824228, "learning_rate": 1.193473992276158e-05, "loss": 1.6263, "step": 3409 }, { "epoch": 0.4550306912196424, "grad_norm": 0.9790868312599547, "learning_rate": 1.1930499497970296e-05, "loss": 1.6023, "step": 3410 }, { "epoch": 0.455164131305044, "grad_norm": 1.0311273154214786, "learning_rate": 1.1926258712583685e-05, "loss": 1.6393, "step": 3411 }, { "epoch": 0.4552975713904457, "grad_norm": 0.9778706613876423, "learning_rate": 1.192201756739388e-05, "loss": 1.5959, "step": 3412 }, { "epoch": 0.45543101147584736, "grad_norm": 1.0265470549844626, "learning_rate": 1.1917776063193073e-05, "loss": 1.5936, "step": 3413 }, { "epoch": 0.455564451561249, "grad_norm": 1.0845896709498193, "learning_rate": 1.1913534200773536e-05, "loss": 1.5618, "step": 3414 }, { "epoch": 0.45569789164665064, "grad_norm": 0.9625869411364787, "learning_rate": 1.1909291980927592e-05, "loss": 1.5905, "step": 3415 }, { "epoch": 0.4558313317320523, "grad_norm": 1.072697736476755, "learning_rate": 1.1905049404447649e-05, "loss": 1.5894, "step": 3416 }, { "epoch": 0.455964771817454, "grad_norm": 1.142515736949019, "learning_rate": 1.1900806472126162e-05, "loss": 1.5963, "step": 3417 }, { "epoch": 0.4560982119028556, "grad_norm": 0.9816650958835474, "learning_rate": 1.189656318475567e-05, "loss": 1.6364, "step": 3418 }, { "epoch": 0.45623165198825727, "grad_norm": 0.9293454502463179, "learning_rate": 1.189231954312877e-05, "loss": 1.5843, "step": 3419 }, { "epoch": 0.45636509207365894, "grad_norm": 1.2000199775285119, "learning_rate": 1.188807554803812e-05, "loss": 1.6047, "step": 3420 }, { "epoch": 0.45649853215906055, "grad_norm": 0.9742674279366978, "learning_rate": 1.1883831200276459e-05, "loss": 1.6254, "step": 3421 }, { "epoch": 0.4566319722444622, "grad_norm": 1.1378422279774119, "learning_rate": 1.1879586500636574e-05, "loss": 1.5892, "step": 3422 }, { "epoch": 0.4567654123298639, "grad_norm": 1.1369928812081609, "learning_rate": 1.1875341449911333e-05, "loss": 1.5573, "step": 3423 }, { "epoch": 0.45689885241526557, "grad_norm": 0.9878064111116921, "learning_rate": 1.1871096048893662e-05, "loss": 1.6357, "step": 3424 }, { "epoch": 0.4570322925006672, "grad_norm": 1.0138596892355174, "learning_rate": 1.1866850298376549e-05, "loss": 1.6267, "step": 3425 }, { "epoch": 0.45716573258606885, "grad_norm": 1.03109296011423, "learning_rate": 1.1862604199153058e-05, "loss": 1.6326, "step": 3426 }, { "epoch": 0.4572991726714705, "grad_norm": 0.983321243406952, "learning_rate": 1.1858357752016307e-05, "loss": 1.6337, "step": 3427 }, { "epoch": 0.45743261275687214, "grad_norm": 1.069437667120727, "learning_rate": 1.1854110957759487e-05, "loss": 1.5948, "step": 3428 }, { "epoch": 0.4575660528422738, "grad_norm": 0.9630689249149076, "learning_rate": 1.1849863817175848e-05, "loss": 1.6009, "step": 3429 }, { "epoch": 0.4576994929276755, "grad_norm": 1.195697896821968, "learning_rate": 1.1845616331058714e-05, "loss": 1.6401, "step": 3430 }, { "epoch": 0.45783293301307715, "grad_norm": 1.0373880035420473, "learning_rate": 1.1841368500201457e-05, "loss": 1.5717, "step": 3431 }, { "epoch": 0.45796637309847876, "grad_norm": 1.0152163282536957, "learning_rate": 1.1837120325397533e-05, "loss": 1.6412, "step": 3432 }, { "epoch": 0.45809981318388043, "grad_norm": 0.9465550717350613, "learning_rate": 1.1832871807440448e-05, "loss": 1.6038, "step": 3433 }, { "epoch": 0.4582332532692821, "grad_norm": 0.9393643634807096, "learning_rate": 1.1828622947123774e-05, "loss": 1.542, "step": 3434 }, { "epoch": 0.4583666933546838, "grad_norm": 0.9998488486015676, "learning_rate": 1.1824373745241159e-05, "loss": 1.6714, "step": 3435 }, { "epoch": 0.4585001334400854, "grad_norm": 1.0463357172364034, "learning_rate": 1.1820124202586294e-05, "loss": 1.6897, "step": 3436 }, { "epoch": 0.45863357352548706, "grad_norm": 0.9977756148556246, "learning_rate": 1.1815874319952954e-05, "loss": 1.5722, "step": 3437 }, { "epoch": 0.45876701361088873, "grad_norm": 0.9781228945231666, "learning_rate": 1.1811624098134963e-05, "loss": 1.5516, "step": 3438 }, { "epoch": 0.45890045369629034, "grad_norm": 0.9515422839609833, "learning_rate": 1.180737353792622e-05, "loss": 1.6011, "step": 3439 }, { "epoch": 0.459033893781692, "grad_norm": 1.099348617644577, "learning_rate": 1.1803122640120675e-05, "loss": 1.5827, "step": 3440 }, { "epoch": 0.4591673338670937, "grad_norm": 1.1211811802963583, "learning_rate": 1.1798871405512352e-05, "loss": 1.6251, "step": 3441 }, { "epoch": 0.45930077395249536, "grad_norm": 0.9456734985827367, "learning_rate": 1.1794619834895329e-05, "loss": 1.5872, "step": 3442 }, { "epoch": 0.45943421403789697, "grad_norm": 1.0345174270836286, "learning_rate": 1.1790367929063756e-05, "loss": 1.5859, "step": 3443 }, { "epoch": 0.45956765412329864, "grad_norm": 1.1988418117321362, "learning_rate": 1.1786115688811836e-05, "loss": 1.6008, "step": 3444 }, { "epoch": 0.4597010942087003, "grad_norm": 0.9823921150047465, "learning_rate": 1.1781863114933845e-05, "loss": 1.5628, "step": 3445 }, { "epoch": 0.4598345342941019, "grad_norm": 0.9709054414773352, "learning_rate": 1.1777610208224107e-05, "loss": 1.592, "step": 3446 }, { "epoch": 0.4599679743795036, "grad_norm": 0.913819442194726, "learning_rate": 1.1773356969477023e-05, "loss": 1.5924, "step": 3447 }, { "epoch": 0.46010141446490527, "grad_norm": 0.9314527155118181, "learning_rate": 1.1769103399487047e-05, "loss": 1.5904, "step": 3448 }, { "epoch": 0.46023485455030694, "grad_norm": 1.132160179087773, "learning_rate": 1.1764849499048699e-05, "loss": 1.6422, "step": 3449 }, { "epoch": 0.46036829463570855, "grad_norm": 1.2291620905625844, "learning_rate": 1.1760595268956556e-05, "loss": 1.5999, "step": 3450 }, { "epoch": 0.4605017347211102, "grad_norm": 0.990821816789439, "learning_rate": 1.1756340710005264e-05, "loss": 1.6038, "step": 3451 }, { "epoch": 0.4606351748065119, "grad_norm": 1.1734233511272216, "learning_rate": 1.175208582298952e-05, "loss": 1.5486, "step": 3452 }, { "epoch": 0.4607686148919135, "grad_norm": 1.0603222014465778, "learning_rate": 1.1747830608704098e-05, "loss": 1.5833, "step": 3453 }, { "epoch": 0.4609020549773152, "grad_norm": 0.9788386077828661, "learning_rate": 1.1743575067943813e-05, "loss": 1.5973, "step": 3454 }, { "epoch": 0.46103549506271685, "grad_norm": 1.1654930199086528, "learning_rate": 1.1739319201503561e-05, "loss": 1.6585, "step": 3455 }, { "epoch": 0.4611689351481185, "grad_norm": 0.975258594253613, "learning_rate": 1.1735063010178283e-05, "loss": 1.5713, "step": 3456 }, { "epoch": 0.46130237523352013, "grad_norm": 1.2641032198352828, "learning_rate": 1.1730806494762987e-05, "loss": 1.5982, "step": 3457 }, { "epoch": 0.4614358153189218, "grad_norm": 1.242994584150048, "learning_rate": 1.1726549656052748e-05, "loss": 1.6169, "step": 3458 }, { "epoch": 0.4615692554043235, "grad_norm": 0.9635164521389502, "learning_rate": 1.1722292494842688e-05, "loss": 1.598, "step": 3459 }, { "epoch": 0.4617026954897251, "grad_norm": 0.9513310854478186, "learning_rate": 1.1718035011928002e-05, "loss": 1.5811, "step": 3460 }, { "epoch": 0.46183613557512676, "grad_norm": 0.946112789103914, "learning_rate": 1.1713777208103933e-05, "loss": 1.5948, "step": 3461 }, { "epoch": 0.46196957566052843, "grad_norm": 0.9946176475506616, "learning_rate": 1.1709519084165797e-05, "loss": 1.6119, "step": 3462 }, { "epoch": 0.4621030157459301, "grad_norm": 0.9874079907996892, "learning_rate": 1.1705260640908955e-05, "loss": 1.6237, "step": 3463 }, { "epoch": 0.4622364558313317, "grad_norm": 1.7891192519617156, "learning_rate": 1.1701001879128843e-05, "loss": 1.5662, "step": 3464 }, { "epoch": 0.4623698959167334, "grad_norm": 1.0421651256059734, "learning_rate": 1.1696742799620946e-05, "loss": 1.6307, "step": 3465 }, { "epoch": 0.46250333600213506, "grad_norm": 0.979356707410168, "learning_rate": 1.1692483403180814e-05, "loss": 1.607, "step": 3466 }, { "epoch": 0.46263677608753667, "grad_norm": 1.1226620423723173, "learning_rate": 1.1688223690604052e-05, "loss": 1.5689, "step": 3467 }, { "epoch": 0.46277021617293834, "grad_norm": 1.0965605702509604, "learning_rate": 1.1683963662686324e-05, "loss": 1.5973, "step": 3468 }, { "epoch": 0.46290365625834, "grad_norm": 1.0921547887155878, "learning_rate": 1.167970332022336e-05, "loss": 1.5853, "step": 3469 }, { "epoch": 0.4630370963437417, "grad_norm": 1.1736803003382774, "learning_rate": 1.1675442664010935e-05, "loss": 1.5885, "step": 3470 }, { "epoch": 0.4631705364291433, "grad_norm": 1.082987731836864, "learning_rate": 1.1671181694844897e-05, "loss": 1.6115, "step": 3471 }, { "epoch": 0.46330397651454497, "grad_norm": 0.9708478904094577, "learning_rate": 1.1666920413521146e-05, "loss": 1.597, "step": 3472 }, { "epoch": 0.46343741659994664, "grad_norm": 1.0126029044158875, "learning_rate": 1.1662658820835639e-05, "loss": 1.6249, "step": 3473 }, { "epoch": 0.46357085668534825, "grad_norm": 1.0207349433202457, "learning_rate": 1.1658396917584397e-05, "loss": 1.5908, "step": 3474 }, { "epoch": 0.4637042967707499, "grad_norm": 0.9372775427319033, "learning_rate": 1.1654134704563492e-05, "loss": 1.6199, "step": 3475 }, { "epoch": 0.4638377368561516, "grad_norm": 1.1272128181343017, "learning_rate": 1.1649872182569058e-05, "loss": 1.6617, "step": 3476 }, { "epoch": 0.46397117694155326, "grad_norm": 1.110304791632166, "learning_rate": 1.1645609352397282e-05, "loss": 1.6041, "step": 3477 }, { "epoch": 0.4641046170269549, "grad_norm": 1.0017766303846791, "learning_rate": 1.1641346214844417e-05, "loss": 1.6007, "step": 3478 }, { "epoch": 0.46423805711235655, "grad_norm": 0.9880107584390453, "learning_rate": 1.1637082770706764e-05, "loss": 1.6511, "step": 3479 }, { "epoch": 0.4643714971977582, "grad_norm": 1.2922751760310007, "learning_rate": 1.1632819020780693e-05, "loss": 1.6072, "step": 3480 }, { "epoch": 0.46450493728315984, "grad_norm": 0.9883475103271387, "learning_rate": 1.1628554965862615e-05, "loss": 1.6685, "step": 3481 }, { "epoch": 0.4646383773685615, "grad_norm": 1.1928187505375785, "learning_rate": 1.1624290606749012e-05, "loss": 1.6114, "step": 3482 }, { "epoch": 0.4647718174539632, "grad_norm": 1.0066345866358597, "learning_rate": 1.1620025944236418e-05, "loss": 1.6761, "step": 3483 }, { "epoch": 0.46490525753936485, "grad_norm": 1.0953896264697256, "learning_rate": 1.161576097912142e-05, "loss": 1.5629, "step": 3484 }, { "epoch": 0.46503869762476646, "grad_norm": 0.984883775282697, "learning_rate": 1.161149571220067e-05, "loss": 1.5929, "step": 3485 }, { "epoch": 0.46517213771016813, "grad_norm": 0.9445601434963037, "learning_rate": 1.1607230144270866e-05, "loss": 1.6208, "step": 3486 }, { "epoch": 0.4653055777955698, "grad_norm": 0.9240480810034248, "learning_rate": 1.1602964276128774e-05, "loss": 1.5202, "step": 3487 }, { "epoch": 0.4654390178809714, "grad_norm": 1.2349918808254974, "learning_rate": 1.1598698108571205e-05, "loss": 1.5377, "step": 3488 }, { "epoch": 0.4655724579663731, "grad_norm": 1.0096021895282354, "learning_rate": 1.1594431642395027e-05, "loss": 1.593, "step": 3489 }, { "epoch": 0.46570589805177476, "grad_norm": 1.0714154627752588, "learning_rate": 1.1590164878397174e-05, "loss": 1.5892, "step": 3490 }, { "epoch": 0.46583933813717643, "grad_norm": 0.9628486041384523, "learning_rate": 1.1585897817374628e-05, "loss": 1.5884, "step": 3491 }, { "epoch": 0.46597277822257804, "grad_norm": 0.9752330456513079, "learning_rate": 1.1581630460124424e-05, "loss": 1.576, "step": 3492 }, { "epoch": 0.4661062183079797, "grad_norm": 0.9802951196533614, "learning_rate": 1.1577362807443657e-05, "loss": 1.5754, "step": 3493 }, { "epoch": 0.4662396583933814, "grad_norm": 1.1032933740856454, "learning_rate": 1.1573094860129479e-05, "loss": 1.5705, "step": 3494 }, { "epoch": 0.466373098478783, "grad_norm": 0.9835441668655777, "learning_rate": 1.1568826618979087e-05, "loss": 1.6197, "step": 3495 }, { "epoch": 0.46650653856418467, "grad_norm": 0.9873490655801218, "learning_rate": 1.1564558084789749e-05, "loss": 1.6491, "step": 3496 }, { "epoch": 0.46663997864958634, "grad_norm": 1.1431057876156487, "learning_rate": 1.1560289258358773e-05, "loss": 1.6006, "step": 3497 }, { "epoch": 0.466773418734988, "grad_norm": 1.1765306955508206, "learning_rate": 1.1556020140483523e-05, "loss": 1.6422, "step": 3498 }, { "epoch": 0.4669068588203896, "grad_norm": 0.900400853861777, "learning_rate": 1.1551750731961433e-05, "loss": 1.575, "step": 3499 }, { "epoch": 0.4670402989057913, "grad_norm": 0.957183643780711, "learning_rate": 1.1547481033589971e-05, "loss": 1.5993, "step": 3500 }, { "epoch": 0.46717373899119297, "grad_norm": 1.1014233549217831, "learning_rate": 1.1543211046166672e-05, "loss": 1.5655, "step": 3501 }, { "epoch": 0.46730717907659464, "grad_norm": 0.9656596890342347, "learning_rate": 1.1538940770489118e-05, "loss": 1.5974, "step": 3502 }, { "epoch": 0.46744061916199625, "grad_norm": 1.1217113813358535, "learning_rate": 1.1534670207354952e-05, "loss": 1.6228, "step": 3503 }, { "epoch": 0.4675740592473979, "grad_norm": 1.0088247160067236, "learning_rate": 1.1530399357561861e-05, "loss": 1.6261, "step": 3504 }, { "epoch": 0.4677074993327996, "grad_norm": 1.0056946912456848, "learning_rate": 1.1526128221907595e-05, "loss": 1.572, "step": 3505 }, { "epoch": 0.4678409394182012, "grad_norm": 0.984689525859442, "learning_rate": 1.1521856801189954e-05, "loss": 1.5713, "step": 3506 }, { "epoch": 0.4679743795036029, "grad_norm": 0.9741585900265354, "learning_rate": 1.1517585096206788e-05, "loss": 1.587, "step": 3507 }, { "epoch": 0.46810781958900455, "grad_norm": 0.9889481536506061, "learning_rate": 1.1513313107756007e-05, "loss": 1.5535, "step": 3508 }, { "epoch": 0.4682412596744062, "grad_norm": 0.9996665079177771, "learning_rate": 1.1509040836635568e-05, "loss": 1.5746, "step": 3509 }, { "epoch": 0.46837469975980783, "grad_norm": 1.1930390389598884, "learning_rate": 1.1504768283643476e-05, "loss": 1.5963, "step": 3510 }, { "epoch": 0.4685081398452095, "grad_norm": 1.015228011873478, "learning_rate": 1.1500495449577806e-05, "loss": 1.5463, "step": 3511 }, { "epoch": 0.4686415799306112, "grad_norm": 0.9872673249391366, "learning_rate": 1.149622233523667e-05, "loss": 1.5713, "step": 3512 }, { "epoch": 0.4687750200160128, "grad_norm": 1.1412360147938814, "learning_rate": 1.1491948941418234e-05, "loss": 1.6021, "step": 3513 }, { "epoch": 0.46890846010141446, "grad_norm": 1.0170872902758004, "learning_rate": 1.1487675268920721e-05, "loss": 1.6049, "step": 3514 }, { "epoch": 0.46904190018681613, "grad_norm": 0.9653090890079009, "learning_rate": 1.148340131854241e-05, "loss": 1.5985, "step": 3515 }, { "epoch": 0.4691753402722178, "grad_norm": 1.0182190080319995, "learning_rate": 1.147912709108162e-05, "loss": 1.6062, "step": 3516 }, { "epoch": 0.4693087803576194, "grad_norm": 1.1931214012383156, "learning_rate": 1.1474852587336731e-05, "loss": 1.6188, "step": 3517 }, { "epoch": 0.4694422204430211, "grad_norm": 0.9521461072306361, "learning_rate": 1.147057780810617e-05, "loss": 1.546, "step": 3518 }, { "epoch": 0.46957566052842276, "grad_norm": 1.0407743299860555, "learning_rate": 1.1466302754188417e-05, "loss": 1.6001, "step": 3519 }, { "epoch": 0.46970910061382437, "grad_norm": 1.0007245963172906, "learning_rate": 1.1462027426382002e-05, "loss": 1.5495, "step": 3520 }, { "epoch": 0.46984254069922604, "grad_norm": 1.00485415407387, "learning_rate": 1.145775182548551e-05, "loss": 1.5786, "step": 3521 }, { "epoch": 0.4699759807846277, "grad_norm": 0.9919189415028729, "learning_rate": 1.1453475952297577e-05, "loss": 1.6291, "step": 3522 }, { "epoch": 0.4701094208700294, "grad_norm": 1.1020789294943216, "learning_rate": 1.1449199807616882e-05, "loss": 1.533, "step": 3523 }, { "epoch": 0.470242860955431, "grad_norm": 1.3158857310226424, "learning_rate": 1.1444923392242165e-05, "loss": 1.6232, "step": 3524 }, { "epoch": 0.47037630104083267, "grad_norm": 1.1748271233312169, "learning_rate": 1.1440646706972207e-05, "loss": 1.5839, "step": 3525 }, { "epoch": 0.47050974112623434, "grad_norm": 0.9926376613394198, "learning_rate": 1.143636975260585e-05, "loss": 1.6421, "step": 3526 }, { "epoch": 0.47064318121163595, "grad_norm": 0.9974971729341541, "learning_rate": 1.1432092529941972e-05, "loss": 1.6288, "step": 3527 }, { "epoch": 0.4707766212970376, "grad_norm": 1.07546662467877, "learning_rate": 1.142781503977952e-05, "loss": 1.597, "step": 3528 }, { "epoch": 0.4709100613824393, "grad_norm": 1.0889300149702186, "learning_rate": 1.1423537282917469e-05, "loss": 1.5977, "step": 3529 }, { "epoch": 0.47104350146784096, "grad_norm": 1.081280310987519, "learning_rate": 1.1419259260154864e-05, "loss": 1.5912, "step": 3530 }, { "epoch": 0.4711769415532426, "grad_norm": 1.0372628884781476, "learning_rate": 1.141498097229079e-05, "loss": 1.6334, "step": 3531 }, { "epoch": 0.47131038163864425, "grad_norm": 1.1883125180451641, "learning_rate": 1.1410702420124377e-05, "loss": 1.6159, "step": 3532 }, { "epoch": 0.4714438217240459, "grad_norm": 0.9457223363765193, "learning_rate": 1.1406423604454816e-05, "loss": 1.6048, "step": 3533 }, { "epoch": 0.47157726180944753, "grad_norm": 1.1775437736757077, "learning_rate": 1.1402144526081338e-05, "loss": 1.5696, "step": 3534 }, { "epoch": 0.4717107018948492, "grad_norm": 0.9405003660186544, "learning_rate": 1.1397865185803227e-05, "loss": 1.6001, "step": 3535 }, { "epoch": 0.4718441419802509, "grad_norm": 0.9593945256058269, "learning_rate": 1.1393585584419812e-05, "loss": 1.5745, "step": 3536 }, { "epoch": 0.47197758206565255, "grad_norm": 1.037264876154079, "learning_rate": 1.1389305722730478e-05, "loss": 1.5845, "step": 3537 }, { "epoch": 0.47211102215105416, "grad_norm": 1.0164422235337136, "learning_rate": 1.1385025601534654e-05, "loss": 1.5994, "step": 3538 }, { "epoch": 0.47224446223645583, "grad_norm": 1.0015580982419405, "learning_rate": 1.1380745221631813e-05, "loss": 1.5474, "step": 3539 }, { "epoch": 0.4723779023218575, "grad_norm": 0.9922512359099007, "learning_rate": 1.137646458382149e-05, "loss": 1.6085, "step": 3540 }, { "epoch": 0.4725113424072591, "grad_norm": 0.9513030545535126, "learning_rate": 1.137218368890325e-05, "loss": 1.583, "step": 3541 }, { "epoch": 0.4726447824926608, "grad_norm": 1.0137343494231201, "learning_rate": 1.1367902537676722e-05, "loss": 1.6333, "step": 3542 }, { "epoch": 0.47277822257806246, "grad_norm": 1.053043019819065, "learning_rate": 1.1363621130941573e-05, "loss": 1.5458, "step": 3543 }, { "epoch": 0.4729116626634641, "grad_norm": 0.991708813143814, "learning_rate": 1.1359339469497525e-05, "loss": 1.5617, "step": 3544 }, { "epoch": 0.47304510274886574, "grad_norm": 0.9881479349845488, "learning_rate": 1.1355057554144338e-05, "loss": 1.5599, "step": 3545 }, { "epoch": 0.4731785428342674, "grad_norm": 0.9677599502311968, "learning_rate": 1.1350775385681827e-05, "loss": 1.661, "step": 3546 }, { "epoch": 0.4733119829196691, "grad_norm": 0.9637140530562013, "learning_rate": 1.1346492964909856e-05, "loss": 1.5783, "step": 3547 }, { "epoch": 0.4734454230050707, "grad_norm": 1.0174979764772296, "learning_rate": 1.1342210292628327e-05, "loss": 1.6682, "step": 3548 }, { "epoch": 0.47357886309047237, "grad_norm": 0.956767731266162, "learning_rate": 1.1337927369637198e-05, "loss": 1.6158, "step": 3549 }, { "epoch": 0.47371230317587404, "grad_norm": 1.1908661498029431, "learning_rate": 1.1333644196736468e-05, "loss": 1.5445, "step": 3550 }, { "epoch": 0.4738457432612757, "grad_norm": 1.0665948861651355, "learning_rate": 1.132936077472619e-05, "loss": 1.6007, "step": 3551 }, { "epoch": 0.4739791833466773, "grad_norm": 0.9949979990909358, "learning_rate": 1.1325077104406455e-05, "loss": 1.6371, "step": 3552 }, { "epoch": 0.474112623432079, "grad_norm": 1.1119040943652865, "learning_rate": 1.1320793186577398e-05, "loss": 1.5935, "step": 3553 }, { "epoch": 0.47424606351748066, "grad_norm": 1.2024374524083807, "learning_rate": 1.1316509022039215e-05, "loss": 1.5906, "step": 3554 }, { "epoch": 0.4743795036028823, "grad_norm": 0.9908007603433848, "learning_rate": 1.1312224611592132e-05, "loss": 1.6032, "step": 3555 }, { "epoch": 0.47451294368828395, "grad_norm": 1.0257647339374822, "learning_rate": 1.1307939956036437e-05, "loss": 1.6118, "step": 3556 }, { "epoch": 0.4746463837736856, "grad_norm": 0.9890146650400216, "learning_rate": 1.1303655056172447e-05, "loss": 1.6011, "step": 3557 }, { "epoch": 0.4747798238590873, "grad_norm": 8.167075213418878, "learning_rate": 1.1299369912800537e-05, "loss": 1.5942, "step": 3558 }, { "epoch": 0.4749132639444889, "grad_norm": 0.9783430062632217, "learning_rate": 1.1295084526721119e-05, "loss": 1.5433, "step": 3559 }, { "epoch": 0.4750467040298906, "grad_norm": 1.0096065716017133, "learning_rate": 1.129079889873466e-05, "loss": 1.6455, "step": 3560 }, { "epoch": 0.47518014411529225, "grad_norm": 1.0537017716025912, "learning_rate": 1.1286513029641657e-05, "loss": 1.5721, "step": 3561 }, { "epoch": 0.47531358420069386, "grad_norm": 1.1698259882179534, "learning_rate": 1.1282226920242669e-05, "loss": 1.5693, "step": 3562 }, { "epoch": 0.47544702428609553, "grad_norm": 1.096536277070531, "learning_rate": 1.1277940571338296e-05, "loss": 1.651, "step": 3563 }, { "epoch": 0.4755804643714972, "grad_norm": 0.9870886196000643, "learning_rate": 1.1273653983729169e-05, "loss": 1.5927, "step": 3564 }, { "epoch": 0.4757139044568989, "grad_norm": 1.0227690927337232, "learning_rate": 1.1269367158215982e-05, "loss": 1.5799, "step": 3565 }, { "epoch": 0.4758473445423005, "grad_norm": 0.981433243463809, "learning_rate": 1.1265080095599459e-05, "loss": 1.6033, "step": 3566 }, { "epoch": 0.47598078462770216, "grad_norm": 0.9241540078980625, "learning_rate": 1.126079279668038e-05, "loss": 1.6059, "step": 3567 }, { "epoch": 0.47611422471310383, "grad_norm": 0.9966040467022464, "learning_rate": 1.1256505262259561e-05, "loss": 1.6017, "step": 3568 }, { "epoch": 0.47624766479850544, "grad_norm": 1.00788906341642, "learning_rate": 1.1252217493137863e-05, "loss": 1.6031, "step": 3569 }, { "epoch": 0.4763811048839071, "grad_norm": 1.1866583647363547, "learning_rate": 1.1247929490116198e-05, "loss": 1.5722, "step": 3570 }, { "epoch": 0.4765145449693088, "grad_norm": 1.0142892640611485, "learning_rate": 1.124364125399551e-05, "loss": 1.6128, "step": 3571 }, { "epoch": 0.47664798505471045, "grad_norm": 0.9882028516457761, "learning_rate": 1.1239352785576795e-05, "loss": 1.6019, "step": 3572 }, { "epoch": 0.47678142514011207, "grad_norm": 0.9485363827235442, "learning_rate": 1.1235064085661094e-05, "loss": 1.5451, "step": 3573 }, { "epoch": 0.47691486522551374, "grad_norm": 1.0423832728014304, "learning_rate": 1.1230775155049478e-05, "loss": 1.6573, "step": 3574 }, { "epoch": 0.4770483053109154, "grad_norm": 0.9522899518242469, "learning_rate": 1.122648599454308e-05, "loss": 1.5389, "step": 3575 }, { "epoch": 0.4771817453963171, "grad_norm": 1.0795989027553348, "learning_rate": 1.122219660494306e-05, "loss": 1.5245, "step": 3576 }, { "epoch": 0.4773151854817187, "grad_norm": 0.9703295676591189, "learning_rate": 1.121790698705063e-05, "loss": 1.6262, "step": 3577 }, { "epoch": 0.47744862556712037, "grad_norm": 0.9466306660145242, "learning_rate": 1.1213617141667042e-05, "loss": 1.5771, "step": 3578 }, { "epoch": 0.47758206565252204, "grad_norm": 1.1652671591625094, "learning_rate": 1.1209327069593587e-05, "loss": 1.6108, "step": 3579 }, { "epoch": 0.47771550573792365, "grad_norm": 0.9637891663914724, "learning_rate": 1.1205036771631606e-05, "loss": 1.6177, "step": 3580 }, { "epoch": 0.4778489458233253, "grad_norm": 1.0591406701546173, "learning_rate": 1.1200746248582478e-05, "loss": 1.6277, "step": 3581 }, { "epoch": 0.477982385908727, "grad_norm": 1.075013785861123, "learning_rate": 1.1196455501247619e-05, "loss": 1.6509, "step": 3582 }, { "epoch": 0.47811582599412866, "grad_norm": 1.0099560088443318, "learning_rate": 1.1192164530428495e-05, "loss": 1.5818, "step": 3583 }, { "epoch": 0.4782492660795303, "grad_norm": 1.2309318054357847, "learning_rate": 1.1187873336926609e-05, "loss": 1.5818, "step": 3584 }, { "epoch": 0.47838270616493195, "grad_norm": 1.0494190297239068, "learning_rate": 1.1183581921543507e-05, "loss": 1.5966, "step": 3585 }, { "epoch": 0.4785161462503336, "grad_norm": 0.9551043069869481, "learning_rate": 1.1179290285080782e-05, "loss": 1.5893, "step": 3586 }, { "epoch": 0.47864958633573523, "grad_norm": 0.9368775120300123, "learning_rate": 1.1174998428340055e-05, "loss": 1.6137, "step": 3587 }, { "epoch": 0.4787830264211369, "grad_norm": 1.1506412501949679, "learning_rate": 1.1170706352123002e-05, "loss": 1.6004, "step": 3588 }, { "epoch": 0.4789164665065386, "grad_norm": 1.0842690119366971, "learning_rate": 1.116641405723133e-05, "loss": 1.6812, "step": 3589 }, { "epoch": 0.47904990659194024, "grad_norm": 1.1775677070138726, "learning_rate": 1.1162121544466794e-05, "loss": 1.6091, "step": 3590 }, { "epoch": 0.47918334667734186, "grad_norm": 0.9567709845629377, "learning_rate": 1.1157828814631179e-05, "loss": 1.596, "step": 3591 }, { "epoch": 0.47931678676274353, "grad_norm": 0.970022267558387, "learning_rate": 1.115353586852633e-05, "loss": 1.657, "step": 3592 }, { "epoch": 0.4794502268481452, "grad_norm": 0.9647653037200512, "learning_rate": 1.1149242706954111e-05, "loss": 1.5747, "step": 3593 }, { "epoch": 0.4795836669335468, "grad_norm": 1.0040826902190914, "learning_rate": 1.1144949330716441e-05, "loss": 1.624, "step": 3594 }, { "epoch": 0.4797171070189485, "grad_norm": 7.924396978950378, "learning_rate": 1.1140655740615274e-05, "loss": 1.6323, "step": 3595 }, { "epoch": 0.47985054710435016, "grad_norm": 1.2546815828202986, "learning_rate": 1.1136361937452595e-05, "loss": 1.5693, "step": 3596 }, { "epoch": 0.4799839871897518, "grad_norm": 1.0142710081731812, "learning_rate": 1.113206792203045e-05, "loss": 1.5772, "step": 3597 }, { "epoch": 0.48011742727515344, "grad_norm": 0.9910079104514572, "learning_rate": 1.1127773695150904e-05, "loss": 1.6178, "step": 3598 }, { "epoch": 0.4802508673605551, "grad_norm": 0.9866567619524242, "learning_rate": 1.1123479257616072e-05, "loss": 1.5508, "step": 3599 }, { "epoch": 0.4803843074459568, "grad_norm": 1.0128213856045458, "learning_rate": 1.1119184610228103e-05, "loss": 1.5817, "step": 3600 }, { "epoch": 0.4805177475313584, "grad_norm": 1.07285721769306, "learning_rate": 1.1114889753789193e-05, "loss": 1.5987, "step": 3601 }, { "epoch": 0.48065118761676007, "grad_norm": 1.1570972062497342, "learning_rate": 1.1110594689101572e-05, "loss": 1.5853, "step": 3602 }, { "epoch": 0.48078462770216174, "grad_norm": 0.9546422737264942, "learning_rate": 1.1106299416967508e-05, "loss": 1.5883, "step": 3603 }, { "epoch": 0.4809180677875634, "grad_norm": 1.2179319826085693, "learning_rate": 1.1102003938189308e-05, "loss": 1.6357, "step": 3604 }, { "epoch": 0.481051507872965, "grad_norm": 1.016415996934519, "learning_rate": 1.1097708253569317e-05, "loss": 1.5237, "step": 3605 }, { "epoch": 0.4811849479583667, "grad_norm": 1.0641665463344543, "learning_rate": 1.1093412363909926e-05, "loss": 1.5878, "step": 3606 }, { "epoch": 0.48131838804376836, "grad_norm": 0.9711099512141003, "learning_rate": 1.1089116270013552e-05, "loss": 1.5518, "step": 3607 }, { "epoch": 0.48145182812917, "grad_norm": 1.2233276218733482, "learning_rate": 1.108481997268266e-05, "loss": 1.5926, "step": 3608 }, { "epoch": 0.48158526821457165, "grad_norm": 1.1743091714878038, "learning_rate": 1.1080523472719745e-05, "loss": 1.5618, "step": 3609 }, { "epoch": 0.4817187082999733, "grad_norm": 1.226731129476479, "learning_rate": 1.1076226770927349e-05, "loss": 1.5961, "step": 3610 }, { "epoch": 0.481852148385375, "grad_norm": 1.061342840791177, "learning_rate": 1.1071929868108046e-05, "loss": 1.6121, "step": 3611 }, { "epoch": 0.4819855884707766, "grad_norm": 1.0043889707036777, "learning_rate": 1.1067632765064449e-05, "loss": 1.559, "step": 3612 }, { "epoch": 0.4821190285561783, "grad_norm": 0.9334874178935394, "learning_rate": 1.1063335462599208e-05, "loss": 1.6363, "step": 3613 }, { "epoch": 0.48225246864157995, "grad_norm": 0.9541127453212594, "learning_rate": 1.1059037961515005e-05, "loss": 1.5859, "step": 3614 }, { "epoch": 0.48238590872698156, "grad_norm": 1.2659844329901926, "learning_rate": 1.1054740262614571e-05, "loss": 1.6083, "step": 3615 }, { "epoch": 0.48251934881238323, "grad_norm": 1.009394331400396, "learning_rate": 1.1050442366700666e-05, "loss": 1.5752, "step": 3616 }, { "epoch": 0.4826527888977849, "grad_norm": 0.9588149443609268, "learning_rate": 1.1046144274576085e-05, "loss": 1.5623, "step": 3617 }, { "epoch": 0.48278622898318657, "grad_norm": 1.1186126571235782, "learning_rate": 1.1041845987043664e-05, "loss": 1.5748, "step": 3618 }, { "epoch": 0.4829196690685882, "grad_norm": 1.1820677624653801, "learning_rate": 1.1037547504906275e-05, "loss": 1.5901, "step": 3619 }, { "epoch": 0.48305310915398986, "grad_norm": 0.9674980847745737, "learning_rate": 1.1033248828966825e-05, "loss": 1.5353, "step": 3620 }, { "epoch": 0.4831865492393915, "grad_norm": 1.037410201754664, "learning_rate": 1.1028949960028257e-05, "loss": 1.5884, "step": 3621 }, { "epoch": 0.48331998932479314, "grad_norm": 1.0179633311651575, "learning_rate": 1.1024650898893554e-05, "loss": 1.6185, "step": 3622 }, { "epoch": 0.4834534294101948, "grad_norm": 1.024857123528208, "learning_rate": 1.1020351646365726e-05, "loss": 1.6043, "step": 3623 }, { "epoch": 0.4835868694955965, "grad_norm": 1.0553654696487733, "learning_rate": 1.1016052203247829e-05, "loss": 1.5471, "step": 3624 }, { "epoch": 0.48372030958099815, "grad_norm": 0.9703280830869259, "learning_rate": 1.1011752570342949e-05, "loss": 1.4886, "step": 3625 }, { "epoch": 0.48385374966639977, "grad_norm": 0.9916678812963029, "learning_rate": 1.1007452748454206e-05, "loss": 1.6389, "step": 3626 }, { "epoch": 0.48398718975180144, "grad_norm": 1.1877794657449894, "learning_rate": 1.1003152738384762e-05, "loss": 1.6355, "step": 3627 }, { "epoch": 0.4841206298372031, "grad_norm": 1.1020608920211157, "learning_rate": 1.0998852540937806e-05, "loss": 1.6111, "step": 3628 }, { "epoch": 0.4842540699226047, "grad_norm": 0.9894785991525039, "learning_rate": 1.0994552156916569e-05, "loss": 1.5638, "step": 3629 }, { "epoch": 0.4843875100080064, "grad_norm": 1.3735777211891464, "learning_rate": 1.0990251587124313e-05, "loss": 1.5996, "step": 3630 }, { "epoch": 0.48452095009340806, "grad_norm": 1.2295882874045658, "learning_rate": 1.0985950832364333e-05, "loss": 1.6332, "step": 3631 }, { "epoch": 0.48465439017880974, "grad_norm": 1.0441878854812512, "learning_rate": 1.0981649893439965e-05, "loss": 1.6525, "step": 3632 }, { "epoch": 0.48478783026421135, "grad_norm": 1.0469977037741003, "learning_rate": 1.0977348771154572e-05, "loss": 1.5817, "step": 3633 }, { "epoch": 0.484921270349613, "grad_norm": 1.0948912787896075, "learning_rate": 1.0973047466311556e-05, "loss": 1.5934, "step": 3634 }, { "epoch": 0.4850547104350147, "grad_norm": 1.2238531316763264, "learning_rate": 1.0968745979714355e-05, "loss": 1.6424, "step": 3635 }, { "epoch": 0.4851881505204163, "grad_norm": 0.9811824398502238, "learning_rate": 1.0964444312166432e-05, "loss": 1.6459, "step": 3636 }, { "epoch": 0.485321590605818, "grad_norm": 1.011203803623103, "learning_rate": 1.0960142464471293e-05, "loss": 1.5845, "step": 3637 }, { "epoch": 0.48545503069121965, "grad_norm": 1.1695581236457981, "learning_rate": 1.0955840437432472e-05, "loss": 1.5702, "step": 3638 }, { "epoch": 0.4855884707766213, "grad_norm": 1.0248849738040775, "learning_rate": 1.095153823185354e-05, "loss": 1.5914, "step": 3639 }, { "epoch": 0.48572191086202293, "grad_norm": 1.028338773698459, "learning_rate": 1.0947235848538103e-05, "loss": 1.6658, "step": 3640 }, { "epoch": 0.4858553509474246, "grad_norm": 0.9565872039882299, "learning_rate": 1.094293328828979e-05, "loss": 1.6082, "step": 3641 }, { "epoch": 0.4859887910328263, "grad_norm": 0.9438603164321578, "learning_rate": 1.0938630551912275e-05, "loss": 1.6025, "step": 3642 }, { "epoch": 0.48612223111822794, "grad_norm": 1.1670924197530739, "learning_rate": 1.0934327640209264e-05, "loss": 1.5918, "step": 3643 }, { "epoch": 0.48625567120362956, "grad_norm": 0.990930041258616, "learning_rate": 1.0930024553984482e-05, "loss": 1.5648, "step": 3644 }, { "epoch": 0.48638911128903123, "grad_norm": 0.9674206698473387, "learning_rate": 1.0925721294041704e-05, "loss": 1.5957, "step": 3645 }, { "epoch": 0.4865225513744329, "grad_norm": 1.0275864571669882, "learning_rate": 1.0921417861184728e-05, "loss": 1.6259, "step": 3646 }, { "epoch": 0.4866559914598345, "grad_norm": 1.0030357183260572, "learning_rate": 1.091711425621739e-05, "loss": 1.6057, "step": 3647 }, { "epoch": 0.4867894315452362, "grad_norm": 1.1702999599033863, "learning_rate": 1.0912810479943546e-05, "loss": 1.6031, "step": 3648 }, { "epoch": 0.48692287163063785, "grad_norm": 1.09322465585248, "learning_rate": 1.0908506533167096e-05, "loss": 1.5765, "step": 3649 }, { "epoch": 0.4870563117160395, "grad_norm": 4.935499852254243, "learning_rate": 1.0904202416691973e-05, "loss": 1.6665, "step": 3650 }, { "epoch": 0.48718975180144114, "grad_norm": 1.0758505123866504, "learning_rate": 1.0899898131322131e-05, "loss": 1.5891, "step": 3651 }, { "epoch": 0.4873231918868428, "grad_norm": 0.9638129734070673, "learning_rate": 1.0895593677861564e-05, "loss": 1.6171, "step": 3652 }, { "epoch": 0.4874566319722445, "grad_norm": 0.9472603260400863, "learning_rate": 1.0891289057114297e-05, "loss": 1.5797, "step": 3653 }, { "epoch": 0.4875900720576461, "grad_norm": 1.266655084226231, "learning_rate": 1.088698426988438e-05, "loss": 1.5955, "step": 3654 }, { "epoch": 0.48772351214304777, "grad_norm": 1.0120153785951085, "learning_rate": 1.08826793169759e-05, "loss": 1.5924, "step": 3655 }, { "epoch": 0.48785695222844944, "grad_norm": 1.2704389458734962, "learning_rate": 1.0878374199192974e-05, "loss": 1.5805, "step": 3656 }, { "epoch": 0.4879903923138511, "grad_norm": 1.0968646982460992, "learning_rate": 1.0874068917339749e-05, "loss": 1.5939, "step": 3657 }, { "epoch": 0.4881238323992527, "grad_norm": 0.9666879260201952, "learning_rate": 1.08697634722204e-05, "loss": 1.5857, "step": 3658 }, { "epoch": 0.4882572724846544, "grad_norm": 1.0192846733338263, "learning_rate": 1.0865457864639139e-05, "loss": 1.5674, "step": 3659 }, { "epoch": 0.48839071257005606, "grad_norm": 0.9524173836018979, "learning_rate": 1.08611520954002e-05, "loss": 1.5765, "step": 3660 }, { "epoch": 0.4885241526554577, "grad_norm": 1.0159163718461748, "learning_rate": 1.0856846165307858e-05, "loss": 1.5984, "step": 3661 }, { "epoch": 0.48865759274085935, "grad_norm": 1.0174232310565294, "learning_rate": 1.0852540075166404e-05, "loss": 1.6555, "step": 3662 }, { "epoch": 0.488791032826261, "grad_norm": 1.1547851642140878, "learning_rate": 1.0848233825780171e-05, "loss": 1.6071, "step": 3663 }, { "epoch": 0.4889244729116627, "grad_norm": 1.0165575340976178, "learning_rate": 1.0843927417953517e-05, "loss": 1.6243, "step": 3664 }, { "epoch": 0.4890579129970643, "grad_norm": 1.0034096707629767, "learning_rate": 1.0839620852490831e-05, "loss": 1.5922, "step": 3665 }, { "epoch": 0.489191353082466, "grad_norm": 0.9459155508614493, "learning_rate": 1.083531413019653e-05, "loss": 1.595, "step": 3666 }, { "epoch": 0.48932479316786764, "grad_norm": 0.9796138002566875, "learning_rate": 1.0831007251875056e-05, "loss": 1.5732, "step": 3667 }, { "epoch": 0.48945823325326926, "grad_norm": 0.9370636379889555, "learning_rate": 1.0826700218330895e-05, "loss": 1.6168, "step": 3668 }, { "epoch": 0.48959167333867093, "grad_norm": 1.032334040488697, "learning_rate": 1.082239303036854e-05, "loss": 1.5999, "step": 3669 }, { "epoch": 0.4897251134240726, "grad_norm": 1.2833243357988338, "learning_rate": 1.0818085688792532e-05, "loss": 1.5513, "step": 3670 }, { "epoch": 0.48985855350947427, "grad_norm": 0.9893354575062938, "learning_rate": 1.0813778194407432e-05, "loss": 1.6065, "step": 3671 }, { "epoch": 0.4899919935948759, "grad_norm": 0.9731145363428029, "learning_rate": 1.0809470548017828e-05, "loss": 1.6603, "step": 3672 }, { "epoch": 0.49012543368027756, "grad_norm": 0.966161570856358, "learning_rate": 1.0805162750428345e-05, "loss": 1.5408, "step": 3673 }, { "epoch": 0.4902588737656792, "grad_norm": 1.1087789778186123, "learning_rate": 1.0800854802443626e-05, "loss": 1.5869, "step": 3674 }, { "epoch": 0.49039231385108084, "grad_norm": 1.012321782174784, "learning_rate": 1.0796546704868348e-05, "loss": 1.6429, "step": 3675 }, { "epoch": 0.4905257539364825, "grad_norm": 0.9858444455189594, "learning_rate": 1.0792238458507215e-05, "loss": 1.5603, "step": 3676 }, { "epoch": 0.4906591940218842, "grad_norm": 1.130805582793479, "learning_rate": 1.0787930064164959e-05, "loss": 1.6041, "step": 3677 }, { "epoch": 0.49079263410728585, "grad_norm": 0.9578865164171134, "learning_rate": 1.0783621522646336e-05, "loss": 1.5791, "step": 3678 }, { "epoch": 0.49092607419268747, "grad_norm": 0.9913348610814515, "learning_rate": 1.0779312834756134e-05, "loss": 1.6555, "step": 3679 }, { "epoch": 0.49105951427808914, "grad_norm": 0.9482971209808349, "learning_rate": 1.0775004001299173e-05, "loss": 1.5926, "step": 3680 }, { "epoch": 0.4911929543634908, "grad_norm": 0.9705291068336154, "learning_rate": 1.0770695023080282e-05, "loss": 1.6281, "step": 3681 }, { "epoch": 0.4913263944488924, "grad_norm": 1.1282798108079959, "learning_rate": 1.0766385900904337e-05, "loss": 1.5623, "step": 3682 }, { "epoch": 0.4914598345342941, "grad_norm": 0.994148436807919, "learning_rate": 1.0762076635576231e-05, "loss": 1.6486, "step": 3683 }, { "epoch": 0.49159327461969576, "grad_norm": 0.9583103245372037, "learning_rate": 1.0757767227900888e-05, "loss": 1.5759, "step": 3684 }, { "epoch": 0.49172671470509743, "grad_norm": 0.9434367640143331, "learning_rate": 1.075345767868325e-05, "loss": 1.6, "step": 3685 }, { "epoch": 0.49186015479049905, "grad_norm": 1.054046209894045, "learning_rate": 1.0749147988728302e-05, "loss": 1.6308, "step": 3686 }, { "epoch": 0.4919935948759007, "grad_norm": 0.9921895864830362, "learning_rate": 1.0744838158841034e-05, "loss": 1.6343, "step": 3687 }, { "epoch": 0.4921270349613024, "grad_norm": 1.2122795180169637, "learning_rate": 1.074052818982648e-05, "loss": 1.5849, "step": 3688 }, { "epoch": 0.492260475046704, "grad_norm": 0.9788260702681275, "learning_rate": 1.0736218082489691e-05, "loss": 1.6075, "step": 3689 }, { "epoch": 0.4923939151321057, "grad_norm": 1.3297573232605928, "learning_rate": 1.0731907837635747e-05, "loss": 1.6099, "step": 3690 }, { "epoch": 0.49252735521750735, "grad_norm": 0.9550880821538821, "learning_rate": 1.0727597456069755e-05, "loss": 1.6125, "step": 3691 }, { "epoch": 0.492660795302909, "grad_norm": 0.9619871054912161, "learning_rate": 1.0723286938596836e-05, "loss": 1.6079, "step": 3692 }, { "epoch": 0.49279423538831063, "grad_norm": 1.1849634487362217, "learning_rate": 1.0718976286022157e-05, "loss": 1.6274, "step": 3693 }, { "epoch": 0.4929276754737123, "grad_norm": 1.2585836751661978, "learning_rate": 1.0714665499150888e-05, "loss": 1.5792, "step": 3694 }, { "epoch": 0.49306111555911397, "grad_norm": 0.9682726552538469, "learning_rate": 1.0710354578788247e-05, "loss": 1.5719, "step": 3695 }, { "epoch": 0.4931945556445156, "grad_norm": 1.0111852850310221, "learning_rate": 1.0706043525739454e-05, "loss": 1.5881, "step": 3696 }, { "epoch": 0.49332799572991726, "grad_norm": 1.0158485435950086, "learning_rate": 1.070173234080977e-05, "loss": 1.557, "step": 3697 }, { "epoch": 0.4934614358153189, "grad_norm": 0.9601365629223316, "learning_rate": 1.0697421024804475e-05, "loss": 1.5992, "step": 3698 }, { "epoch": 0.4935948759007206, "grad_norm": 0.9711819226222418, "learning_rate": 1.0693109578528875e-05, "loss": 1.5762, "step": 3699 }, { "epoch": 0.4937283159861222, "grad_norm": 0.9747571113243313, "learning_rate": 1.0688798002788295e-05, "loss": 1.627, "step": 3700 }, { "epoch": 0.4938617560715239, "grad_norm": 0.9155209925749146, "learning_rate": 1.068448629838809e-05, "loss": 1.5424, "step": 3701 }, { "epoch": 0.49399519615692555, "grad_norm": 0.9680580273820439, "learning_rate": 1.0680174466133639e-05, "loss": 1.5782, "step": 3702 }, { "epoch": 0.49412863624232717, "grad_norm": 0.9685689662721528, "learning_rate": 1.067586250683034e-05, "loss": 1.5927, "step": 3703 }, { "epoch": 0.49426207632772884, "grad_norm": 0.9349199396735328, "learning_rate": 1.0671550421283618e-05, "loss": 1.6169, "step": 3704 }, { "epoch": 0.4943955164131305, "grad_norm": 0.9766752432473302, "learning_rate": 1.0667238210298927e-05, "loss": 1.6029, "step": 3705 }, { "epoch": 0.4945289564985322, "grad_norm": 1.1337519998185137, "learning_rate": 1.0662925874681733e-05, "loss": 1.5993, "step": 3706 }, { "epoch": 0.4946623965839338, "grad_norm": 0.9756943770828267, "learning_rate": 1.0658613415237535e-05, "loss": 1.6645, "step": 3707 }, { "epoch": 0.49479583666933546, "grad_norm": 0.9602283668761221, "learning_rate": 1.0654300832771847e-05, "loss": 1.6323, "step": 3708 }, { "epoch": 0.49492927675473714, "grad_norm": 1.2356580385421276, "learning_rate": 1.0649988128090216e-05, "loss": 1.5881, "step": 3709 }, { "epoch": 0.49506271684013875, "grad_norm": 0.9804354142593109, "learning_rate": 1.06456753019982e-05, "loss": 1.5797, "step": 3710 }, { "epoch": 0.4951961569255404, "grad_norm": 0.9413974484117791, "learning_rate": 1.0641362355301392e-05, "loss": 1.5959, "step": 3711 }, { "epoch": 0.4953295970109421, "grad_norm": 1.0173430778833028, "learning_rate": 1.0637049288805395e-05, "loss": 1.6835, "step": 3712 }, { "epoch": 0.49546303709634376, "grad_norm": 1.0230987750185572, "learning_rate": 1.0632736103315843e-05, "loss": 1.5686, "step": 3713 }, { "epoch": 0.4955964771817454, "grad_norm": 0.9776487226730937, "learning_rate": 1.0628422799638396e-05, "loss": 1.5768, "step": 3714 }, { "epoch": 0.49572991726714705, "grad_norm": 1.1209401946738562, "learning_rate": 1.0624109378578721e-05, "loss": 1.5674, "step": 3715 }, { "epoch": 0.4958633573525487, "grad_norm": 1.1825505150445272, "learning_rate": 1.0619795840942524e-05, "loss": 1.5712, "step": 3716 }, { "epoch": 0.4959967974379504, "grad_norm": 0.939437949764124, "learning_rate": 1.0615482187535515e-05, "loss": 1.5544, "step": 3717 }, { "epoch": 0.496130237523352, "grad_norm": 1.0394749330983024, "learning_rate": 1.0611168419163444e-05, "loss": 1.6918, "step": 3718 }, { "epoch": 0.4962636776087537, "grad_norm": 1.0963483530806055, "learning_rate": 1.060685453663207e-05, "loss": 1.5935, "step": 3719 }, { "epoch": 0.49639711769415534, "grad_norm": 0.9214028923141944, "learning_rate": 1.0602540540747179e-05, "loss": 1.605, "step": 3720 }, { "epoch": 0.49653055777955696, "grad_norm": 0.9811475135324195, "learning_rate": 1.0598226432314573e-05, "loss": 1.5914, "step": 3721 }, { "epoch": 0.49666399786495863, "grad_norm": 0.9866310168569355, "learning_rate": 1.0593912212140086e-05, "loss": 1.6091, "step": 3722 }, { "epoch": 0.4967974379503603, "grad_norm": 0.9398392471927407, "learning_rate": 1.0589597881029554e-05, "loss": 1.5977, "step": 3723 }, { "epoch": 0.49693087803576197, "grad_norm": 0.9946024082527848, "learning_rate": 1.0585283439788851e-05, "loss": 1.532, "step": 3724 }, { "epoch": 0.4970643181211636, "grad_norm": 0.9747426365894848, "learning_rate": 1.0580968889223868e-05, "loss": 1.5502, "step": 3725 }, { "epoch": 0.49719775820656525, "grad_norm": 1.0170841216880055, "learning_rate": 1.0576654230140508e-05, "loss": 1.5805, "step": 3726 }, { "epoch": 0.4973311982919669, "grad_norm": 0.9642570769540154, "learning_rate": 1.0572339463344707e-05, "loss": 1.5801, "step": 3727 }, { "epoch": 0.49746463837736854, "grad_norm": 0.9628913796090154, "learning_rate": 1.0568024589642408e-05, "loss": 1.605, "step": 3728 }, { "epoch": 0.4975980784627702, "grad_norm": 0.9946599596096027, "learning_rate": 1.0563709609839581e-05, "loss": 1.5589, "step": 3729 }, { "epoch": 0.4977315185481719, "grad_norm": 1.1095821154904126, "learning_rate": 1.055939452474222e-05, "loss": 1.5844, "step": 3730 }, { "epoch": 0.49786495863357355, "grad_norm": 1.1727773862931867, "learning_rate": 1.0555079335156328e-05, "loss": 1.5687, "step": 3731 }, { "epoch": 0.49799839871897517, "grad_norm": 1.0061224792693193, "learning_rate": 1.055076404188794e-05, "loss": 1.5969, "step": 3732 }, { "epoch": 0.49813183880437684, "grad_norm": 0.9895293534993739, "learning_rate": 1.0546448645743097e-05, "loss": 1.6145, "step": 3733 }, { "epoch": 0.4982652788897785, "grad_norm": 1.0574963661665566, "learning_rate": 1.054213314752787e-05, "loss": 1.6219, "step": 3734 }, { "epoch": 0.4983987189751801, "grad_norm": 1.0560776793903364, "learning_rate": 1.0537817548048341e-05, "loss": 1.577, "step": 3735 }, { "epoch": 0.4985321590605818, "grad_norm": 1.0083197939133182, "learning_rate": 1.0533501848110617e-05, "loss": 1.6201, "step": 3736 }, { "epoch": 0.49866559914598346, "grad_norm": 0.9934023783176211, "learning_rate": 1.0529186048520825e-05, "loss": 1.5933, "step": 3737 }, { "epoch": 0.49879903923138513, "grad_norm": 1.243448461460969, "learning_rate": 1.0524870150085103e-05, "loss": 1.6314, "step": 3738 }, { "epoch": 0.49893247931678675, "grad_norm": 1.1745919731057533, "learning_rate": 1.0520554153609613e-05, "loss": 1.6081, "step": 3739 }, { "epoch": 0.4990659194021884, "grad_norm": 0.9690359243672029, "learning_rate": 1.0516238059900532e-05, "loss": 1.6033, "step": 3740 }, { "epoch": 0.4991993594875901, "grad_norm": 0.9910492376976103, "learning_rate": 1.0511921869764062e-05, "loss": 1.6147, "step": 3741 }, { "epoch": 0.4993327995729917, "grad_norm": 0.9841002474852324, "learning_rate": 1.0507605584006413e-05, "loss": 1.5855, "step": 3742 }, { "epoch": 0.4994662396583934, "grad_norm": 0.9835960197152689, "learning_rate": 1.0503289203433822e-05, "loss": 1.5912, "step": 3743 }, { "epoch": 0.49959967974379504, "grad_norm": 0.9331205335074193, "learning_rate": 1.049897272885254e-05, "loss": 1.5957, "step": 3744 }, { "epoch": 0.4997331198291967, "grad_norm": 1.0670315368640506, "learning_rate": 1.0494656161068828e-05, "loss": 1.5724, "step": 3745 }, { "epoch": 0.49986655991459833, "grad_norm": 0.9964186881122445, "learning_rate": 1.049033950088898e-05, "loss": 1.6113, "step": 3746 }, { "epoch": 0.5, "grad_norm": 0.9622713227335437, "learning_rate": 1.0486022749119294e-05, "loss": 1.6103, "step": 3747 }, { "epoch": 0.5001334400854016, "grad_norm": 1.5585035104556884, "learning_rate": 1.0481705906566092e-05, "loss": 1.5407, "step": 3748 }, { "epoch": 0.5002668801708033, "grad_norm": 0.9525397308234141, "learning_rate": 1.0477388974035713e-05, "loss": 1.56, "step": 3749 }, { "epoch": 0.500400320256205, "grad_norm": 1.2183987988540275, "learning_rate": 1.0473071952334508e-05, "loss": 1.5559, "step": 3750 }, { "epoch": 0.5005337603416066, "grad_norm": 1.0009923680441615, "learning_rate": 1.0468754842268849e-05, "loss": 1.6096, "step": 3751 }, { "epoch": 0.5006672004270083, "grad_norm": 0.9445861127024189, "learning_rate": 1.046443764464512e-05, "loss": 1.6056, "step": 3752 }, { "epoch": 0.5008006405124099, "grad_norm": 0.9742127353900115, "learning_rate": 1.046012036026973e-05, "loss": 1.6246, "step": 3753 }, { "epoch": 0.5009340805978116, "grad_norm": 0.9520930850005337, "learning_rate": 1.0455802989949092e-05, "loss": 1.5292, "step": 3754 }, { "epoch": 0.5010675206832133, "grad_norm": 0.935444191000388, "learning_rate": 1.0451485534489649e-05, "loss": 1.5537, "step": 3755 }, { "epoch": 0.5012009607686149, "grad_norm": 0.9491673736063649, "learning_rate": 1.0447167994697846e-05, "loss": 1.5746, "step": 3756 }, { "epoch": 0.5013344008540166, "grad_norm": 0.9554087936976446, "learning_rate": 1.0442850371380155e-05, "loss": 1.6095, "step": 3757 }, { "epoch": 0.5014678409394182, "grad_norm": 1.2103781704529109, "learning_rate": 1.0438532665343053e-05, "loss": 1.5771, "step": 3758 }, { "epoch": 0.5016012810248198, "grad_norm": 1.2128718749710947, "learning_rate": 1.0434214877393045e-05, "loss": 1.5952, "step": 3759 }, { "epoch": 0.5017347211102215, "grad_norm": 0.9603242533460603, "learning_rate": 1.042989700833664e-05, "loss": 1.5571, "step": 3760 }, { "epoch": 0.5018681611956232, "grad_norm": 1.1013519805324967, "learning_rate": 1.042557905898037e-05, "loss": 1.5555, "step": 3761 }, { "epoch": 0.5020016012810248, "grad_norm": 1.113312267406771, "learning_rate": 1.0421261030130776e-05, "loss": 1.5986, "step": 3762 }, { "epoch": 0.5021350413664265, "grad_norm": 0.9630320933350655, "learning_rate": 1.041694292259442e-05, "loss": 1.5864, "step": 3763 }, { "epoch": 0.5022684814518281, "grad_norm": 0.9790488821683675, "learning_rate": 1.041262473717787e-05, "loss": 1.6064, "step": 3764 }, { "epoch": 0.5024019215372297, "grad_norm": 1.0221627458345286, "learning_rate": 1.0408306474687719e-05, "loss": 1.6228, "step": 3765 }, { "epoch": 0.5025353616226315, "grad_norm": 0.9477242589703042, "learning_rate": 1.0403988135930568e-05, "loss": 1.6004, "step": 3766 }, { "epoch": 0.5026688017080331, "grad_norm": 0.9934183349840355, "learning_rate": 1.039966972171303e-05, "loss": 1.554, "step": 3767 }, { "epoch": 0.5028022417934348, "grad_norm": 0.9582444414220985, "learning_rate": 1.0395351232841739e-05, "loss": 1.595, "step": 3768 }, { "epoch": 0.5029356818788364, "grad_norm": 1.0891630480558472, "learning_rate": 1.039103267012334e-05, "loss": 1.555, "step": 3769 }, { "epoch": 0.503069121964238, "grad_norm": 0.9957005065240585, "learning_rate": 1.038671403436449e-05, "loss": 1.6035, "step": 3770 }, { "epoch": 0.5032025620496398, "grad_norm": 1.2830993460776894, "learning_rate": 1.0382395326371861e-05, "loss": 1.6037, "step": 3771 }, { "epoch": 0.5033360021350414, "grad_norm": 0.9767721406060091, "learning_rate": 1.0378076546952138e-05, "loss": 1.5673, "step": 3772 }, { "epoch": 0.503469442220443, "grad_norm": 0.9734968119255416, "learning_rate": 1.0373757696912024e-05, "loss": 1.5924, "step": 3773 }, { "epoch": 0.5036028823058447, "grad_norm": 1.1161953113263952, "learning_rate": 1.0369438777058226e-05, "loss": 1.6249, "step": 3774 }, { "epoch": 0.5037363223912463, "grad_norm": 0.9572612259574967, "learning_rate": 1.0365119788197468e-05, "loss": 1.5998, "step": 3775 }, { "epoch": 0.5038697624766479, "grad_norm": 0.9432193177518614, "learning_rate": 1.0360800731136493e-05, "loss": 1.5321, "step": 3776 }, { "epoch": 0.5040032025620497, "grad_norm": 1.0074347003449116, "learning_rate": 1.0356481606682047e-05, "loss": 1.6185, "step": 3777 }, { "epoch": 0.5041366426474513, "grad_norm": 0.9454406764250083, "learning_rate": 1.0352162415640898e-05, "loss": 1.5624, "step": 3778 }, { "epoch": 0.5042700827328529, "grad_norm": 1.004743047178849, "learning_rate": 1.034784315881982e-05, "loss": 1.6052, "step": 3779 }, { "epoch": 0.5044035228182546, "grad_norm": 1.0081054951554982, "learning_rate": 1.0343523837025598e-05, "loss": 1.5967, "step": 3780 }, { "epoch": 0.5045369629036562, "grad_norm": 1.1995648160845978, "learning_rate": 1.0339204451065035e-05, "loss": 1.5653, "step": 3781 }, { "epoch": 0.504670402989058, "grad_norm": 1.0397558826608888, "learning_rate": 1.0334885001744943e-05, "loss": 1.6437, "step": 3782 }, { "epoch": 0.5048038430744596, "grad_norm": 0.9903660282060232, "learning_rate": 1.0330565489872144e-05, "loss": 1.5705, "step": 3783 }, { "epoch": 0.5049372831598612, "grad_norm": 0.9972288626686299, "learning_rate": 1.0326245916253478e-05, "loss": 1.5884, "step": 3784 }, { "epoch": 0.5050707232452629, "grad_norm": 0.9476343942885561, "learning_rate": 1.0321926281695787e-05, "loss": 1.6023, "step": 3785 }, { "epoch": 0.5052041633306645, "grad_norm": 0.9738449763592262, "learning_rate": 1.0317606587005936e-05, "loss": 1.5609, "step": 3786 }, { "epoch": 0.5053376034160662, "grad_norm": 1.039829392353301, "learning_rate": 1.0313286832990788e-05, "loss": 1.6218, "step": 3787 }, { "epoch": 0.5054710435014679, "grad_norm": 1.0381227642185753, "learning_rate": 1.0308967020457223e-05, "loss": 1.6251, "step": 3788 }, { "epoch": 0.5056044835868695, "grad_norm": 0.9867023264523529, "learning_rate": 1.0304647150212142e-05, "loss": 1.5399, "step": 3789 }, { "epoch": 0.5057379236722711, "grad_norm": 0.9867055454700514, "learning_rate": 1.0300327223062436e-05, "loss": 1.6344, "step": 3790 }, { "epoch": 0.5058713637576728, "grad_norm": 1.0315163736938113, "learning_rate": 1.029600723981503e-05, "loss": 1.6119, "step": 3791 }, { "epoch": 0.5060048038430744, "grad_norm": 0.9670113161953792, "learning_rate": 1.0291687201276837e-05, "loss": 1.5639, "step": 3792 }, { "epoch": 0.5061382439284761, "grad_norm": 0.9921317429814296, "learning_rate": 1.0287367108254796e-05, "loss": 1.5514, "step": 3793 }, { "epoch": 0.5062716840138778, "grad_norm": 1.0082597213286801, "learning_rate": 1.0283046961555854e-05, "loss": 1.5689, "step": 3794 }, { "epoch": 0.5064051240992794, "grad_norm": 1.394709462594669, "learning_rate": 1.027872676198696e-05, "loss": 1.6046, "step": 3795 }, { "epoch": 0.5065385641846811, "grad_norm": 1.0174333180253856, "learning_rate": 1.0274406510355082e-05, "loss": 1.5815, "step": 3796 }, { "epoch": 0.5066720042700827, "grad_norm": 1.0733270672727413, "learning_rate": 1.0270086207467188e-05, "loss": 1.5879, "step": 3797 }, { "epoch": 0.5068054443554844, "grad_norm": 1.0234017065005216, "learning_rate": 1.0265765854130272e-05, "loss": 1.6075, "step": 3798 }, { "epoch": 0.5069388844408861, "grad_norm": 1.08499646847553, "learning_rate": 1.0261445451151314e-05, "loss": 1.6305, "step": 3799 }, { "epoch": 0.5070723245262877, "grad_norm": 0.9704549685581058, "learning_rate": 1.0257124999337324e-05, "loss": 1.5948, "step": 3800 }, { "epoch": 0.5072057646116893, "grad_norm": 7.9206996649868, "learning_rate": 1.0252804499495314e-05, "loss": 1.6509, "step": 3801 }, { "epoch": 0.507339204697091, "grad_norm": 1.0387910709315784, "learning_rate": 1.02484839524323e-05, "loss": 1.5677, "step": 3802 }, { "epoch": 0.5074726447824927, "grad_norm": 1.050554110371522, "learning_rate": 1.0244163358955315e-05, "loss": 1.6384, "step": 3803 }, { "epoch": 0.5076060848678943, "grad_norm": 1.1559737239306573, "learning_rate": 1.023984271987139e-05, "loss": 1.6467, "step": 3804 }, { "epoch": 0.507739524953296, "grad_norm": 1.0907297235523432, "learning_rate": 1.0235522035987581e-05, "loss": 1.632, "step": 3805 }, { "epoch": 0.5078729650386976, "grad_norm": 1.0831891794610449, "learning_rate": 1.0231201308110936e-05, "loss": 1.6218, "step": 3806 }, { "epoch": 0.5080064051240992, "grad_norm": 0.9955232658741183, "learning_rate": 1.0226880537048518e-05, "loss": 1.6157, "step": 3807 }, { "epoch": 0.508139845209501, "grad_norm": 1.0436607505823476, "learning_rate": 1.02225597236074e-05, "loss": 1.5852, "step": 3808 }, { "epoch": 0.5082732852949026, "grad_norm": 1.0380692870921138, "learning_rate": 1.0218238868594656e-05, "loss": 1.4999, "step": 3809 }, { "epoch": 0.5084067253803043, "grad_norm": 0.95584778107239, "learning_rate": 1.021391797281738e-05, "loss": 1.5926, "step": 3810 }, { "epoch": 0.5085401654657059, "grad_norm": 1.13056350804676, "learning_rate": 1.0209597037082658e-05, "loss": 1.599, "step": 3811 }, { "epoch": 0.5086736055511075, "grad_norm": 1.0358679977491891, "learning_rate": 1.02052760621976e-05, "loss": 1.6291, "step": 3812 }, { "epoch": 0.5088070456365092, "grad_norm": 0.9752350063012711, "learning_rate": 1.0200955048969307e-05, "loss": 1.5868, "step": 3813 }, { "epoch": 0.5089404857219109, "grad_norm": 2.6417596103398284, "learning_rate": 1.0196633998204903e-05, "loss": 1.5943, "step": 3814 }, { "epoch": 0.5090739258073125, "grad_norm": 1.1501400580375445, "learning_rate": 1.0192312910711504e-05, "loss": 1.5856, "step": 3815 }, { "epoch": 0.5092073658927142, "grad_norm": 1.08515119526117, "learning_rate": 1.0187991787296243e-05, "loss": 1.6207, "step": 3816 }, { "epoch": 0.5093408059781158, "grad_norm": 0.9540437758466517, "learning_rate": 1.0183670628766258e-05, "loss": 1.643, "step": 3817 }, { "epoch": 0.5094742460635174, "grad_norm": 1.0299232805390495, "learning_rate": 1.017934943592869e-05, "loss": 1.6673, "step": 3818 }, { "epoch": 0.5096076861489192, "grad_norm": 1.0039257031904898, "learning_rate": 1.017502820959069e-05, "loss": 1.5761, "step": 3819 }, { "epoch": 0.5097411262343208, "grad_norm": 0.9346765769936345, "learning_rate": 1.017070695055941e-05, "loss": 1.5594, "step": 3820 }, { "epoch": 0.5098745663197225, "grad_norm": 0.9858372448777835, "learning_rate": 1.0166385659642017e-05, "loss": 1.5746, "step": 3821 }, { "epoch": 0.5100080064051241, "grad_norm": 1.0006528357635736, "learning_rate": 1.0162064337645678e-05, "loss": 1.6062, "step": 3822 }, { "epoch": 0.5101414464905257, "grad_norm": 0.9404463154241544, "learning_rate": 1.0157742985377567e-05, "loss": 1.6299, "step": 3823 }, { "epoch": 0.5102748865759275, "grad_norm": 1.0401934215904935, "learning_rate": 1.015342160364486e-05, "loss": 1.61, "step": 3824 }, { "epoch": 0.5104083266613291, "grad_norm": 0.9544524367700885, "learning_rate": 1.0149100193254744e-05, "loss": 1.5981, "step": 3825 }, { "epoch": 0.5105417667467307, "grad_norm": 1.1258447776031195, "learning_rate": 1.0144778755014411e-05, "loss": 1.5924, "step": 3826 }, { "epoch": 0.5106752068321324, "grad_norm": 1.048900303879943, "learning_rate": 1.0140457289731056e-05, "loss": 1.5822, "step": 3827 }, { "epoch": 0.510808646917534, "grad_norm": 0.9978329789351215, "learning_rate": 1.0136135798211874e-05, "loss": 1.5754, "step": 3828 }, { "epoch": 0.5109420870029356, "grad_norm": 1.0026431461629155, "learning_rate": 1.013181428126408e-05, "loss": 1.5149, "step": 3829 }, { "epoch": 0.5110755270883374, "grad_norm": 0.9910636307338526, "learning_rate": 1.012749273969488e-05, "loss": 1.6098, "step": 3830 }, { "epoch": 0.511208967173739, "grad_norm": 0.9607665572703091, "learning_rate": 1.0123171174311482e-05, "loss": 1.596, "step": 3831 }, { "epoch": 0.5113424072591406, "grad_norm": 0.9629876623056436, "learning_rate": 1.0118849585921114e-05, "loss": 1.5757, "step": 3832 }, { "epoch": 0.5114758473445423, "grad_norm": 0.9574400509477552, "learning_rate": 1.0114527975330997e-05, "loss": 1.5883, "step": 3833 }, { "epoch": 0.5116092874299439, "grad_norm": 1.0875584418211826, "learning_rate": 1.0110206343348354e-05, "loss": 1.5646, "step": 3834 }, { "epoch": 0.5117427275153457, "grad_norm": 1.169489430147777, "learning_rate": 1.0105884690780426e-05, "loss": 1.5489, "step": 3835 }, { "epoch": 0.5118761676007473, "grad_norm": 1.4633224716352873, "learning_rate": 1.0101563018434441e-05, "loss": 1.612, "step": 3836 }, { "epoch": 0.5120096076861489, "grad_norm": 0.9544832295384696, "learning_rate": 1.0097241327117642e-05, "loss": 1.5445, "step": 3837 }, { "epoch": 0.5121430477715506, "grad_norm": 1.0099497190113178, "learning_rate": 1.0092919617637267e-05, "loss": 1.6371, "step": 3838 }, { "epoch": 0.5122764878569522, "grad_norm": 0.9869374341847159, "learning_rate": 1.0088597890800568e-05, "loss": 1.5956, "step": 3839 }, { "epoch": 0.5124099279423538, "grad_norm": 0.9740701564399505, "learning_rate": 1.008427614741479e-05, "loss": 1.6369, "step": 3840 }, { "epoch": 0.5125433680277556, "grad_norm": 1.0435801092284192, "learning_rate": 1.0079954388287187e-05, "loss": 1.6168, "step": 3841 }, { "epoch": 0.5126768081131572, "grad_norm": 1.0717965001586232, "learning_rate": 1.007563261422502e-05, "loss": 1.5815, "step": 3842 }, { "epoch": 0.5128102481985588, "grad_norm": 0.951753888240989, "learning_rate": 1.0071310826035536e-05, "loss": 1.5753, "step": 3843 }, { "epoch": 0.5129436882839605, "grad_norm": 1.0427073825832405, "learning_rate": 1.0066989024526004e-05, "loss": 1.571, "step": 3844 }, { "epoch": 0.5130771283693621, "grad_norm": 0.9665144556917987, "learning_rate": 1.0062667210503682e-05, "loss": 1.5675, "step": 3845 }, { "epoch": 0.5132105684547638, "grad_norm": 0.9841126172568884, "learning_rate": 1.0058345384775843e-05, "loss": 1.566, "step": 3846 }, { "epoch": 0.5133440085401655, "grad_norm": 1.0231266781644845, "learning_rate": 1.0054023548149747e-05, "loss": 1.6019, "step": 3847 }, { "epoch": 0.5134774486255671, "grad_norm": 1.013759415570729, "learning_rate": 1.004970170143267e-05, "loss": 1.5977, "step": 3848 }, { "epoch": 0.5136108887109688, "grad_norm": 0.9596994251252524, "learning_rate": 1.0045379845431877e-05, "loss": 1.6183, "step": 3849 }, { "epoch": 0.5137443287963704, "grad_norm": 0.9562275485239146, "learning_rate": 1.004105798095465e-05, "loss": 1.5953, "step": 3850 }, { "epoch": 0.5138777688817721, "grad_norm": 1.0389139499008198, "learning_rate": 1.0036736108808258e-05, "loss": 1.6092, "step": 3851 }, { "epoch": 0.5140112089671738, "grad_norm": 1.1170799172414176, "learning_rate": 1.0032414229799978e-05, "loss": 1.6152, "step": 3852 }, { "epoch": 0.5141446490525754, "grad_norm": 1.05178593292583, "learning_rate": 1.0028092344737093e-05, "loss": 1.5869, "step": 3853 }, { "epoch": 0.514278089137977, "grad_norm": 0.9406322853227639, "learning_rate": 1.0023770454426873e-05, "loss": 1.5934, "step": 3854 }, { "epoch": 0.5144115292233787, "grad_norm": 1.0236251156656235, "learning_rate": 1.0019448559676605e-05, "loss": 1.5402, "step": 3855 }, { "epoch": 0.5145449693087804, "grad_norm": 0.9301092670030722, "learning_rate": 1.0015126661293566e-05, "loss": 1.5849, "step": 3856 }, { "epoch": 0.514678409394182, "grad_norm": 0.9761352830656781, "learning_rate": 1.0010804760085037e-05, "loss": 1.5598, "step": 3857 }, { "epoch": 0.5148118494795837, "grad_norm": 0.9784181556059924, "learning_rate": 1.0006482856858306e-05, "loss": 1.6337, "step": 3858 }, { "epoch": 0.5149452895649853, "grad_norm": 1.0310199190875282, "learning_rate": 1.000216095242065e-05, "loss": 1.5553, "step": 3859 }, { "epoch": 0.5150787296503869, "grad_norm": 0.9413946989014104, "learning_rate": 9.997839047579351e-06, "loss": 1.5955, "step": 3860 }, { "epoch": 0.5152121697357886, "grad_norm": 1.0310793127717695, "learning_rate": 9.993517143141695e-06, "loss": 1.5891, "step": 3861 }, { "epoch": 0.5153456098211903, "grad_norm": 1.1466677120724456, "learning_rate": 9.989195239914964e-06, "loss": 1.5896, "step": 3862 }, { "epoch": 0.515479049906592, "grad_norm": 2.0571376714105436, "learning_rate": 9.984873338706439e-06, "loss": 1.6166, "step": 3863 }, { "epoch": 0.5156124899919936, "grad_norm": 0.9772929955397268, "learning_rate": 9.980551440323398e-06, "loss": 1.6124, "step": 3864 }, { "epoch": 0.5157459300773952, "grad_norm": 0.995449303499809, "learning_rate": 9.97622954557313e-06, "loss": 1.55, "step": 3865 }, { "epoch": 0.515879370162797, "grad_norm": 0.9947089431138232, "learning_rate": 9.971907655262914e-06, "loss": 1.6228, "step": 3866 }, { "epoch": 0.5160128102481986, "grad_norm": 1.0584016266307699, "learning_rate": 9.967585770200023e-06, "loss": 1.5816, "step": 3867 }, { "epoch": 0.5161462503336002, "grad_norm": 3.1959955421300643, "learning_rate": 9.963263891191743e-06, "loss": 1.584, "step": 3868 }, { "epoch": 0.5162796904190019, "grad_norm": 1.0146744361794484, "learning_rate": 9.958942019045352e-06, "loss": 1.5777, "step": 3869 }, { "epoch": 0.5164131305044035, "grad_norm": 1.0815439584288364, "learning_rate": 9.954620154568125e-06, "loss": 1.5771, "step": 3870 }, { "epoch": 0.5165465705898051, "grad_norm": 0.9947735001779133, "learning_rate": 9.950298298567333e-06, "loss": 1.6323, "step": 3871 }, { "epoch": 0.5166800106752069, "grad_norm": 1.025350666289356, "learning_rate": 9.945976451850256e-06, "loss": 1.5726, "step": 3872 }, { "epoch": 0.5168134507606085, "grad_norm": 1.0309153481695548, "learning_rate": 9.941654615224162e-06, "loss": 1.6411, "step": 3873 }, { "epoch": 0.5169468908460101, "grad_norm": 0.9706291026832317, "learning_rate": 9.937332789496318e-06, "loss": 1.5344, "step": 3874 }, { "epoch": 0.5170803309314118, "grad_norm": 1.0442149306572364, "learning_rate": 9.933010975474e-06, "loss": 1.5868, "step": 3875 }, { "epoch": 0.5172137710168134, "grad_norm": 0.9801227554963917, "learning_rate": 9.928689173964467e-06, "loss": 1.584, "step": 3876 }, { "epoch": 0.5173472111022152, "grad_norm": 0.9612770789559671, "learning_rate": 9.924367385774987e-06, "loss": 1.5624, "step": 3877 }, { "epoch": 0.5174806511876168, "grad_norm": 12.372251835234461, "learning_rate": 9.920045611712812e-06, "loss": 1.6331, "step": 3878 }, { "epoch": 0.5176140912730184, "grad_norm": 1.0323071031650972, "learning_rate": 9.915723852585211e-06, "loss": 1.5644, "step": 3879 }, { "epoch": 0.5177475313584201, "grad_norm": 1.0939706773461586, "learning_rate": 9.911402109199437e-06, "loss": 1.5847, "step": 3880 }, { "epoch": 0.5178809714438217, "grad_norm": 1.1539521887238684, "learning_rate": 9.907080382362733e-06, "loss": 1.6092, "step": 3881 }, { "epoch": 0.5180144115292233, "grad_norm": 1.4844965472462381, "learning_rate": 9.902758672882361e-06, "loss": 1.6156, "step": 3882 }, { "epoch": 0.5181478516146251, "grad_norm": 1.0477571393778307, "learning_rate": 9.898436981565562e-06, "loss": 1.5609, "step": 3883 }, { "epoch": 0.5182812917000267, "grad_norm": 0.9919178224095909, "learning_rate": 9.894115309219579e-06, "loss": 1.6325, "step": 3884 }, { "epoch": 0.5184147317854283, "grad_norm": 1.039487001315658, "learning_rate": 9.889793656651646e-06, "loss": 1.6016, "step": 3885 }, { "epoch": 0.51854817187083, "grad_norm": 0.9999173249401138, "learning_rate": 9.885472024669007e-06, "loss": 1.5301, "step": 3886 }, { "epoch": 0.5186816119562316, "grad_norm": 1.0016577339792758, "learning_rate": 9.881150414078891e-06, "loss": 1.6022, "step": 3887 }, { "epoch": 0.5188150520416333, "grad_norm": 1.022731064253213, "learning_rate": 9.87682882568852e-06, "loss": 1.6181, "step": 3888 }, { "epoch": 0.518948492127035, "grad_norm": 1.053953730841017, "learning_rate": 9.872507260305124e-06, "loss": 1.5722, "step": 3889 }, { "epoch": 0.5190819322124366, "grad_norm": 0.9483103314838104, "learning_rate": 9.868185718735922e-06, "loss": 1.5721, "step": 3890 }, { "epoch": 0.5192153722978383, "grad_norm": 0.9716784609015309, "learning_rate": 9.863864201788127e-06, "loss": 1.5958, "step": 3891 }, { "epoch": 0.5193488123832399, "grad_norm": 7.41992607929927, "learning_rate": 9.859542710268947e-06, "loss": 1.601, "step": 3892 }, { "epoch": 0.5194822524686415, "grad_norm": 1.0842848553275373, "learning_rate": 9.85522124498559e-06, "loss": 1.6163, "step": 3893 }, { "epoch": 0.5196156925540433, "grad_norm": 1.1825348756660392, "learning_rate": 9.850899806745258e-06, "loss": 1.5681, "step": 3894 }, { "epoch": 0.5197491326394449, "grad_norm": 1.219221323073541, "learning_rate": 9.846578396355144e-06, "loss": 1.6045, "step": 3895 }, { "epoch": 0.5198825727248465, "grad_norm": 0.9727027071204921, "learning_rate": 9.842257014622436e-06, "loss": 1.6032, "step": 3896 }, { "epoch": 0.5200160128102482, "grad_norm": 0.9822928747027722, "learning_rate": 9.837935662354325e-06, "loss": 1.5696, "step": 3897 }, { "epoch": 0.5201494528956498, "grad_norm": 0.9616838656896538, "learning_rate": 9.833614340357986e-06, "loss": 1.5782, "step": 3898 }, { "epoch": 0.5202828929810515, "grad_norm": 1.0479795740977809, "learning_rate": 9.829293049440592e-06, "loss": 1.5405, "step": 3899 }, { "epoch": 0.5204163330664532, "grad_norm": 1.02813031490662, "learning_rate": 9.824971790409314e-06, "loss": 1.5542, "step": 3900 }, { "epoch": 0.5205497731518548, "grad_norm": 1.1751181903794317, "learning_rate": 9.820650564071314e-06, "loss": 1.6051, "step": 3901 }, { "epoch": 0.5206832132372565, "grad_norm": 1.01933272576621, "learning_rate": 9.816329371233747e-06, "loss": 1.6075, "step": 3902 }, { "epoch": 0.5208166533226581, "grad_norm": 1.065928748207789, "learning_rate": 9.812008212703758e-06, "loss": 1.6226, "step": 3903 }, { "epoch": 0.5209500934080598, "grad_norm": 1.0491540793462275, "learning_rate": 9.8076870892885e-06, "loss": 1.62, "step": 3904 }, { "epoch": 0.5210835334934615, "grad_norm": 0.9904597628301255, "learning_rate": 9.803366001795102e-06, "loss": 1.5997, "step": 3905 }, { "epoch": 0.5212169735788631, "grad_norm": 0.9273698585055911, "learning_rate": 9.799044951030693e-06, "loss": 1.5687, "step": 3906 }, { "epoch": 0.5213504136642647, "grad_norm": 1.051717478475509, "learning_rate": 9.794723937802403e-06, "loss": 1.5735, "step": 3907 }, { "epoch": 0.5214838537496664, "grad_norm": 0.9428209123723238, "learning_rate": 9.790402962917343e-06, "loss": 1.5791, "step": 3908 }, { "epoch": 0.521617293835068, "grad_norm": 0.9740214957204884, "learning_rate": 9.786082027182626e-06, "loss": 1.6194, "step": 3909 }, { "epoch": 0.5217507339204697, "grad_norm": 0.9865488024149134, "learning_rate": 9.781761131405346e-06, "loss": 1.5704, "step": 3910 }, { "epoch": 0.5218841740058714, "grad_norm": 0.9376345788702144, "learning_rate": 9.777440276392604e-06, "loss": 1.5978, "step": 3911 }, { "epoch": 0.522017614091273, "grad_norm": 0.9550562216569386, "learning_rate": 9.773119462951487e-06, "loss": 1.6584, "step": 3912 }, { "epoch": 0.5221510541766746, "grad_norm": 0.9603118976190848, "learning_rate": 9.768798691889066e-06, "loss": 1.5987, "step": 3913 }, { "epoch": 0.5222844942620763, "grad_norm": 1.1573687573433378, "learning_rate": 9.76447796401242e-06, "loss": 1.5802, "step": 3914 }, { "epoch": 0.522417934347478, "grad_norm": 1.0874659242828772, "learning_rate": 9.760157280128611e-06, "loss": 1.629, "step": 3915 }, { "epoch": 0.5225513744328797, "grad_norm": 1.2673180853706751, "learning_rate": 9.755836641044686e-06, "loss": 1.5725, "step": 3916 }, { "epoch": 0.5226848145182813, "grad_norm": 0.933164844674267, "learning_rate": 9.7515160475677e-06, "loss": 1.6217, "step": 3917 }, { "epoch": 0.5228182546036829, "grad_norm": 0.951797289455154, "learning_rate": 9.747195500504687e-06, "loss": 1.5448, "step": 3918 }, { "epoch": 0.5229516946890846, "grad_norm": 0.9440107265148531, "learning_rate": 9.742875000662679e-06, "loss": 1.5796, "step": 3919 }, { "epoch": 0.5230851347744863, "grad_norm": 0.9563610291259368, "learning_rate": 9.738554548848686e-06, "loss": 1.5944, "step": 3920 }, { "epoch": 0.5232185748598879, "grad_norm": 0.9662588914444328, "learning_rate": 9.734234145869731e-06, "loss": 1.5565, "step": 3921 }, { "epoch": 0.5233520149452896, "grad_norm": 0.9669810427348244, "learning_rate": 9.729913792532813e-06, "loss": 1.584, "step": 3922 }, { "epoch": 0.5234854550306912, "grad_norm": 0.9499418557964407, "learning_rate": 9.725593489644925e-06, "loss": 1.5611, "step": 3923 }, { "epoch": 0.5236188951160928, "grad_norm": 0.9825228300383785, "learning_rate": 9.721273238013042e-06, "loss": 1.5699, "step": 3924 }, { "epoch": 0.5237523352014946, "grad_norm": 1.0180904933312087, "learning_rate": 9.716953038444149e-06, "loss": 1.6399, "step": 3925 }, { "epoch": 0.5238857752868962, "grad_norm": 0.9427216935978336, "learning_rate": 9.712632891745208e-06, "loss": 1.6125, "step": 3926 }, { "epoch": 0.5240192153722978, "grad_norm": 0.9985494748443655, "learning_rate": 9.708312798723168e-06, "loss": 1.607, "step": 3927 }, { "epoch": 0.5241526554576995, "grad_norm": 0.9775720421880318, "learning_rate": 9.703992760184976e-06, "loss": 1.6487, "step": 3928 }, { "epoch": 0.5242860955431011, "grad_norm": 1.0440323447007265, "learning_rate": 9.699672776937567e-06, "loss": 1.6203, "step": 3929 }, { "epoch": 0.5244195356285029, "grad_norm": 1.073896330976579, "learning_rate": 9.695352849787865e-06, "loss": 1.6193, "step": 3930 }, { "epoch": 0.5245529757139045, "grad_norm": 0.9666654017851334, "learning_rate": 9.69103297954278e-06, "loss": 1.6498, "step": 3931 }, { "epoch": 0.5246864157993061, "grad_norm": 1.1178863635267453, "learning_rate": 9.686713167009216e-06, "loss": 1.6103, "step": 3932 }, { "epoch": 0.5248198558847078, "grad_norm": 1.0066761509572055, "learning_rate": 9.68239341299407e-06, "loss": 1.6333, "step": 3933 }, { "epoch": 0.5249532959701094, "grad_norm": 0.9297439952534905, "learning_rate": 9.678073718304215e-06, "loss": 1.5461, "step": 3934 }, { "epoch": 0.525086736055511, "grad_norm": 0.9488135408582773, "learning_rate": 9.673754083746523e-06, "loss": 1.6361, "step": 3935 }, { "epoch": 0.5252201761409128, "grad_norm": 0.9845229518536815, "learning_rate": 9.669434510127858e-06, "loss": 1.6412, "step": 3936 }, { "epoch": 0.5253536162263144, "grad_norm": 0.9802847942341244, "learning_rate": 9.66511499825506e-06, "loss": 1.5989, "step": 3937 }, { "epoch": 0.525487056311716, "grad_norm": 0.9632382904131538, "learning_rate": 9.660795548934965e-06, "loss": 1.6071, "step": 3938 }, { "epoch": 0.5256204963971177, "grad_norm": 1.0699178531684905, "learning_rate": 9.656476162974403e-06, "loss": 1.6323, "step": 3939 }, { "epoch": 0.5257539364825193, "grad_norm": 0.9511806151181409, "learning_rate": 9.652156841180185e-06, "loss": 1.6131, "step": 3940 }, { "epoch": 0.525887376567921, "grad_norm": 0.9838327792979288, "learning_rate": 9.647837584359107e-06, "loss": 1.5408, "step": 3941 }, { "epoch": 0.5260208166533227, "grad_norm": 1.1196962403177173, "learning_rate": 9.643518393317953e-06, "loss": 1.583, "step": 3942 }, { "epoch": 0.5261542567387243, "grad_norm": 0.9499526650383459, "learning_rate": 9.63919926886351e-06, "loss": 1.567, "step": 3943 }, { "epoch": 0.526287696824126, "grad_norm": 0.9520997264212869, "learning_rate": 9.634880211802535e-06, "loss": 1.6393, "step": 3944 }, { "epoch": 0.5264211369095276, "grad_norm": 1.2483925528165147, "learning_rate": 9.630561222941776e-06, "loss": 1.6007, "step": 3945 }, { "epoch": 0.5265545769949292, "grad_norm": 0.9647842321723614, "learning_rate": 9.62624230308798e-06, "loss": 1.6036, "step": 3946 }, { "epoch": 0.526688017080331, "grad_norm": 1.051538449954908, "learning_rate": 9.621923453047864e-06, "loss": 1.6024, "step": 3947 }, { "epoch": 0.5268214571657326, "grad_norm": 1.0353802905237608, "learning_rate": 9.617604673628142e-06, "loss": 1.5987, "step": 3948 }, { "epoch": 0.5269548972511342, "grad_norm": 0.9689586440798221, "learning_rate": 9.61328596563551e-06, "loss": 1.5775, "step": 3949 }, { "epoch": 0.5270883373365359, "grad_norm": 1.110258725459353, "learning_rate": 9.608967329876662e-06, "loss": 1.5446, "step": 3950 }, { "epoch": 0.5272217774219375, "grad_norm": 0.9779790351795254, "learning_rate": 9.604648767158263e-06, "loss": 1.5587, "step": 3951 }, { "epoch": 0.5273552175073392, "grad_norm": 0.9136985660924678, "learning_rate": 9.600330278286972e-06, "loss": 1.5626, "step": 3952 }, { "epoch": 0.5274886575927409, "grad_norm": 0.9427079281067329, "learning_rate": 9.596011864069434e-06, "loss": 1.597, "step": 3953 }, { "epoch": 0.5276220976781425, "grad_norm": 0.9576255235969523, "learning_rate": 9.591693525312283e-06, "loss": 1.5782, "step": 3954 }, { "epoch": 0.5277555377635441, "grad_norm": 0.9802126130169267, "learning_rate": 9.587375262822132e-06, "loss": 1.572, "step": 3955 }, { "epoch": 0.5278889778489458, "grad_norm": 0.967258469940652, "learning_rate": 9.583057077405583e-06, "loss": 1.5762, "step": 3956 }, { "epoch": 0.5280224179343475, "grad_norm": 1.034695214577326, "learning_rate": 9.578738969869227e-06, "loss": 1.5677, "step": 3957 }, { "epoch": 0.5281558580197492, "grad_norm": 0.9403505616977419, "learning_rate": 9.574420941019634e-06, "loss": 1.5619, "step": 3958 }, { "epoch": 0.5282892981051508, "grad_norm": 0.9592411207184994, "learning_rate": 9.570102991663361e-06, "loss": 1.6072, "step": 3959 }, { "epoch": 0.5284227381905524, "grad_norm": 0.9743736463148753, "learning_rate": 9.565785122606957e-06, "loss": 1.5505, "step": 3960 }, { "epoch": 0.5285561782759541, "grad_norm": 1.0090249364331059, "learning_rate": 9.561467334656949e-06, "loss": 1.5881, "step": 3961 }, { "epoch": 0.5286896183613558, "grad_norm": 0.9420075410877025, "learning_rate": 9.55714962861985e-06, "loss": 1.618, "step": 3962 }, { "epoch": 0.5288230584467574, "grad_norm": 0.9692223836450211, "learning_rate": 9.552832005302154e-06, "loss": 1.5772, "step": 3963 }, { "epoch": 0.5289564985321591, "grad_norm": 0.9489004175073099, "learning_rate": 9.548514465510353e-06, "loss": 1.6164, "step": 3964 }, { "epoch": 0.5290899386175607, "grad_norm": 0.9421154885492968, "learning_rate": 9.54419701005091e-06, "loss": 1.5717, "step": 3965 }, { "epoch": 0.5292233787029623, "grad_norm": 0.9424115466890982, "learning_rate": 9.539879639730276e-06, "loss": 1.5631, "step": 3966 }, { "epoch": 0.529356818788364, "grad_norm": 2.7276724914362083, "learning_rate": 9.53556235535488e-06, "loss": 1.5696, "step": 3967 }, { "epoch": 0.5294902588737657, "grad_norm": 0.9386033684279529, "learning_rate": 9.531245157731154e-06, "loss": 1.5285, "step": 3968 }, { "epoch": 0.5296236989591674, "grad_norm": 0.8899585596068399, "learning_rate": 9.526928047665495e-06, "loss": 1.5118, "step": 3969 }, { "epoch": 0.529757139044569, "grad_norm": 0.9826968773740292, "learning_rate": 9.522611025964288e-06, "loss": 1.5888, "step": 3970 }, { "epoch": 0.5298905791299706, "grad_norm": 1.0400188151367677, "learning_rate": 9.51829409343391e-06, "loss": 1.6159, "step": 3971 }, { "epoch": 0.5300240192153723, "grad_norm": 0.9561987813115096, "learning_rate": 9.51397725088071e-06, "loss": 1.5299, "step": 3972 }, { "epoch": 0.530157459300774, "grad_norm": 0.9612234452056033, "learning_rate": 9.509660499111027e-06, "loss": 1.5819, "step": 3973 }, { "epoch": 0.5302908993861756, "grad_norm": 0.9139976394076528, "learning_rate": 9.505343838931177e-06, "loss": 1.5485, "step": 3974 }, { "epoch": 0.5304243394715773, "grad_norm": 1.0692106111452742, "learning_rate": 9.501027271147466e-06, "loss": 1.581, "step": 3975 }, { "epoch": 0.5305577795569789, "grad_norm": 1.0131134517933047, "learning_rate": 9.496710796566181e-06, "loss": 1.6409, "step": 3976 }, { "epoch": 0.5306912196423805, "grad_norm": 0.9508397877941868, "learning_rate": 9.492394415993587e-06, "loss": 1.5288, "step": 3977 }, { "epoch": 0.5308246597277823, "grad_norm": 1.0043689220791059, "learning_rate": 9.48807813023594e-06, "loss": 1.5687, "step": 3978 }, { "epoch": 0.5309580998131839, "grad_norm": 1.0338708219457289, "learning_rate": 9.483761940099471e-06, "loss": 1.6097, "step": 3979 }, { "epoch": 0.5310915398985855, "grad_norm": 0.9661798704066481, "learning_rate": 9.479445846390389e-06, "loss": 1.6003, "step": 3980 }, { "epoch": 0.5312249799839872, "grad_norm": 1.1436314427778058, "learning_rate": 9.475129849914898e-06, "loss": 1.6077, "step": 3981 }, { "epoch": 0.5313584200693888, "grad_norm": 1.1001896581114239, "learning_rate": 9.470813951479177e-06, "loss": 1.5785, "step": 3982 }, { "epoch": 0.5314918601547906, "grad_norm": 0.9585361064875515, "learning_rate": 9.466498151889385e-06, "loss": 1.5498, "step": 3983 }, { "epoch": 0.5316253002401922, "grad_norm": 1.0620960267902282, "learning_rate": 9.46218245195166e-06, "loss": 1.5932, "step": 3984 }, { "epoch": 0.5317587403255938, "grad_norm": 1.0499763780853832, "learning_rate": 9.457866852472132e-06, "loss": 1.6056, "step": 3985 }, { "epoch": 0.5318921804109955, "grad_norm": 0.973771229695574, "learning_rate": 9.453551354256907e-06, "loss": 1.5651, "step": 3986 }, { "epoch": 0.5320256204963971, "grad_norm": 0.9807116282877302, "learning_rate": 9.449235958112065e-06, "loss": 1.5977, "step": 3987 }, { "epoch": 0.5321590605817987, "grad_norm": 0.9631392166085614, "learning_rate": 9.444920664843671e-06, "loss": 1.6167, "step": 3988 }, { "epoch": 0.5322925006672005, "grad_norm": 0.993702665081086, "learning_rate": 9.440605475257782e-06, "loss": 1.5629, "step": 3989 }, { "epoch": 0.5324259407526021, "grad_norm": 1.107669942509192, "learning_rate": 9.436290390160422e-06, "loss": 1.5701, "step": 3990 }, { "epoch": 0.5325593808380037, "grad_norm": 1.0557051966228765, "learning_rate": 9.431975410357597e-06, "loss": 1.5419, "step": 3991 }, { "epoch": 0.5326928209234054, "grad_norm": 1.0739759302965814, "learning_rate": 9.427660536655296e-06, "loss": 1.6363, "step": 3992 }, { "epoch": 0.532826261008807, "grad_norm": 0.9274766595100078, "learning_rate": 9.423345769859494e-06, "loss": 1.5837, "step": 3993 }, { "epoch": 0.5329597010942086, "grad_norm": 0.9774667853108802, "learning_rate": 9.419031110776137e-06, "loss": 1.6363, "step": 3994 }, { "epoch": 0.5330931411796104, "grad_norm": 0.9665074812404877, "learning_rate": 9.41471656021115e-06, "loss": 1.5858, "step": 3995 }, { "epoch": 0.533226581265012, "grad_norm": 0.9516009165945285, "learning_rate": 9.410402118970447e-06, "loss": 1.6294, "step": 3996 }, { "epoch": 0.5333600213504137, "grad_norm": 0.9668981552357112, "learning_rate": 9.40608778785992e-06, "loss": 1.5588, "step": 3997 }, { "epoch": 0.5334934614358153, "grad_norm": 0.9784909518756532, "learning_rate": 9.401773567685428e-06, "loss": 1.5723, "step": 3998 }, { "epoch": 0.533626901521217, "grad_norm": 0.9517428419308205, "learning_rate": 9.397459459252823e-06, "loss": 1.6369, "step": 3999 }, { "epoch": 0.5337603416066187, "grad_norm": 0.9506059122541574, "learning_rate": 9.393145463367932e-06, "loss": 1.6134, "step": 4000 }, { "epoch": 0.5338937816920203, "grad_norm": 0.9352905785580472, "learning_rate": 9.388831580836559e-06, "loss": 1.596, "step": 4001 }, { "epoch": 0.5340272217774219, "grad_norm": 0.9230020903319824, "learning_rate": 9.384517812464485e-06, "loss": 1.542, "step": 4002 }, { "epoch": 0.5341606618628236, "grad_norm": 1.0665813163596254, "learning_rate": 9.38020415905748e-06, "loss": 1.5823, "step": 4003 }, { "epoch": 0.5342941019482252, "grad_norm": 0.9983547945807545, "learning_rate": 9.37589062142128e-06, "loss": 1.6159, "step": 4004 }, { "epoch": 0.5344275420336269, "grad_norm": 1.0317932796668363, "learning_rate": 9.371577200361609e-06, "loss": 1.5753, "step": 4005 }, { "epoch": 0.5345609821190286, "grad_norm": 0.9463210756193553, "learning_rate": 9.367263896684155e-06, "loss": 1.5713, "step": 4006 }, { "epoch": 0.5346944222044302, "grad_norm": 0.9578078666890666, "learning_rate": 9.362950711194607e-06, "loss": 1.5438, "step": 4007 }, { "epoch": 0.5348278622898318, "grad_norm": 0.9984488850157227, "learning_rate": 9.358637644698614e-06, "loss": 1.5737, "step": 4008 }, { "epoch": 0.5349613023752335, "grad_norm": 0.9572054111804951, "learning_rate": 9.354324698001802e-06, "loss": 1.5819, "step": 4009 }, { "epoch": 0.5350947424606352, "grad_norm": 0.9538875645675433, "learning_rate": 9.350011871909787e-06, "loss": 1.5913, "step": 4010 }, { "epoch": 0.5352281825460369, "grad_norm": 1.0188818733645557, "learning_rate": 9.345699167228156e-06, "loss": 1.5497, "step": 4011 }, { "epoch": 0.5353616226314385, "grad_norm": 0.9520922440061501, "learning_rate": 9.34138658476247e-06, "loss": 1.6327, "step": 4012 }, { "epoch": 0.5354950627168401, "grad_norm": 0.9539885539724596, "learning_rate": 9.337074125318268e-06, "loss": 1.579, "step": 4013 }, { "epoch": 0.5356285028022418, "grad_norm": 1.2835828749577056, "learning_rate": 9.332761789701076e-06, "loss": 1.5306, "step": 4014 }, { "epoch": 0.5357619428876434, "grad_norm": 1.023893323462547, "learning_rate": 9.328449578716384e-06, "loss": 1.5653, "step": 4015 }, { "epoch": 0.5358953829730451, "grad_norm": 1.2706402832851262, "learning_rate": 9.324137493169664e-06, "loss": 1.561, "step": 4016 }, { "epoch": 0.5360288230584468, "grad_norm": 0.9500733970079291, "learning_rate": 9.319825533866364e-06, "loss": 1.5684, "step": 4017 }, { "epoch": 0.5361622631438484, "grad_norm": 0.9654037579664738, "learning_rate": 9.315513701611913e-06, "loss": 1.5545, "step": 4018 }, { "epoch": 0.53629570322925, "grad_norm": 0.9637541303641998, "learning_rate": 9.31120199721171e-06, "loss": 1.6, "step": 4019 }, { "epoch": 0.5364291433146517, "grad_norm": 0.9449747779438818, "learning_rate": 9.306890421471128e-06, "loss": 1.5729, "step": 4020 }, { "epoch": 0.5365625834000534, "grad_norm": 1.015400427968874, "learning_rate": 9.302578975195527e-06, "loss": 1.5912, "step": 4021 }, { "epoch": 0.536696023485455, "grad_norm": 1.2000776620053406, "learning_rate": 9.298267659190234e-06, "loss": 1.6051, "step": 4022 }, { "epoch": 0.5368294635708567, "grad_norm": 0.9947410519922049, "learning_rate": 9.293956474260548e-06, "loss": 1.584, "step": 4023 }, { "epoch": 0.5369629036562583, "grad_norm": 0.9569770795832728, "learning_rate": 9.289645421211756e-06, "loss": 1.5406, "step": 4024 }, { "epoch": 0.53709634374166, "grad_norm": 1.0002749981588506, "learning_rate": 9.285334500849113e-06, "loss": 1.6485, "step": 4025 }, { "epoch": 0.5372297838270617, "grad_norm": 0.9254684190371253, "learning_rate": 9.281023713977848e-06, "loss": 1.6221, "step": 4026 }, { "epoch": 0.5373632239124633, "grad_norm": 1.0069500001650689, "learning_rate": 9.276713061403164e-06, "loss": 1.6204, "step": 4027 }, { "epoch": 0.537496663997865, "grad_norm": 1.0859626601584031, "learning_rate": 9.27240254393025e-06, "loss": 1.5635, "step": 4028 }, { "epoch": 0.5376301040832666, "grad_norm": 0.9601269217825726, "learning_rate": 9.268092162364256e-06, "loss": 1.6201, "step": 4029 }, { "epoch": 0.5377635441686682, "grad_norm": 1.005400728893139, "learning_rate": 9.263781917510312e-06, "loss": 1.559, "step": 4030 }, { "epoch": 0.53789698425407, "grad_norm": 0.9879107408018795, "learning_rate": 9.259471810173522e-06, "loss": 1.5615, "step": 4031 }, { "epoch": 0.5380304243394716, "grad_norm": 1.035330873394029, "learning_rate": 9.255161841158968e-06, "loss": 1.6013, "step": 4032 }, { "epoch": 0.5381638644248732, "grad_norm": 1.092318326623765, "learning_rate": 9.250852011271703e-06, "loss": 1.6165, "step": 4033 }, { "epoch": 0.5382973045102749, "grad_norm": 1.018361237141597, "learning_rate": 9.24654232131675e-06, "loss": 1.5758, "step": 4034 }, { "epoch": 0.5384307445956765, "grad_norm": 0.9345797395155986, "learning_rate": 9.242232772099116e-06, "loss": 1.5766, "step": 4035 }, { "epoch": 0.5385641846810783, "grad_norm": 0.9679231135179047, "learning_rate": 9.237923364423772e-06, "loss": 1.5835, "step": 4036 }, { "epoch": 0.5386976247664799, "grad_norm": 1.0031021300962772, "learning_rate": 9.233614099095668e-06, "loss": 1.6144, "step": 4037 }, { "epoch": 0.5388310648518815, "grad_norm": 0.9915425586784125, "learning_rate": 9.229304976919721e-06, "loss": 1.6167, "step": 4038 }, { "epoch": 0.5389645049372832, "grad_norm": 0.9956070431123422, "learning_rate": 9.224995998700832e-06, "loss": 1.6112, "step": 4039 }, { "epoch": 0.5390979450226848, "grad_norm": 0.9175317898321539, "learning_rate": 9.220687165243867e-06, "loss": 1.5324, "step": 4040 }, { "epoch": 0.5392313851080864, "grad_norm": 0.9964567601709668, "learning_rate": 9.216378477353664e-06, "loss": 1.5438, "step": 4041 }, { "epoch": 0.5393648251934882, "grad_norm": 1.2134149238548315, "learning_rate": 9.212069935835044e-06, "loss": 1.5954, "step": 4042 }, { "epoch": 0.5394982652788898, "grad_norm": 0.9893376951501015, "learning_rate": 9.207761541492788e-06, "loss": 1.635, "step": 4043 }, { "epoch": 0.5396317053642914, "grad_norm": 0.9752793339516473, "learning_rate": 9.203453295131657e-06, "loss": 1.5758, "step": 4044 }, { "epoch": 0.5397651454496931, "grad_norm": 0.9849050003080004, "learning_rate": 9.199145197556375e-06, "loss": 1.59, "step": 4045 }, { "epoch": 0.5398985855350947, "grad_norm": 1.3161223332489436, "learning_rate": 9.194837249571658e-06, "loss": 1.6444, "step": 4046 }, { "epoch": 0.5400320256204963, "grad_norm": 1.0146112609602544, "learning_rate": 9.190529451982173e-06, "loss": 1.615, "step": 4047 }, { "epoch": 0.5401654657058981, "grad_norm": 0.9544932036464441, "learning_rate": 9.18622180559257e-06, "loss": 1.6292, "step": 4048 }, { "epoch": 0.5402989057912997, "grad_norm": 0.9541877435381779, "learning_rate": 9.18191431120747e-06, "loss": 1.5881, "step": 4049 }, { "epoch": 0.5404323458767014, "grad_norm": 0.9729000021252879, "learning_rate": 9.177606969631464e-06, "loss": 1.577, "step": 4050 }, { "epoch": 0.540565785962103, "grad_norm": 0.9802692030861584, "learning_rate": 9.173299781669112e-06, "loss": 1.5656, "step": 4051 }, { "epoch": 0.5406992260475046, "grad_norm": 0.9490128443326544, "learning_rate": 9.168992748124943e-06, "loss": 1.5705, "step": 4052 }, { "epoch": 0.5408326661329064, "grad_norm": 0.9644978819486809, "learning_rate": 9.164685869803474e-06, "loss": 1.5745, "step": 4053 }, { "epoch": 0.540966106218308, "grad_norm": 0.9346805372231108, "learning_rate": 9.160379147509172e-06, "loss": 1.5811, "step": 4054 }, { "epoch": 0.5410995463037096, "grad_norm": 1.0592259248945302, "learning_rate": 9.156072582046483e-06, "loss": 1.5821, "step": 4055 }, { "epoch": 0.5412329863891113, "grad_norm": 0.9912214570473821, "learning_rate": 9.15176617421983e-06, "loss": 1.61, "step": 4056 }, { "epoch": 0.5413664264745129, "grad_norm": 0.9898284421098535, "learning_rate": 9.147459924833599e-06, "loss": 1.574, "step": 4057 }, { "epoch": 0.5414998665599146, "grad_norm": 0.9503201276694209, "learning_rate": 9.143153834692147e-06, "loss": 1.5615, "step": 4058 }, { "epoch": 0.5416333066453163, "grad_norm": 0.933992350962357, "learning_rate": 9.138847904599802e-06, "loss": 1.5795, "step": 4059 }, { "epoch": 0.5417667467307179, "grad_norm": 1.188754739203571, "learning_rate": 9.134542135360863e-06, "loss": 1.5382, "step": 4060 }, { "epoch": 0.5419001868161195, "grad_norm": 0.9697813847683896, "learning_rate": 9.130236527779602e-06, "loss": 1.5861, "step": 4061 }, { "epoch": 0.5420336269015212, "grad_norm": 0.9800137082741205, "learning_rate": 9.125931082660255e-06, "loss": 1.5988, "step": 4062 }, { "epoch": 0.5421670669869229, "grad_norm": 1.0320301308567623, "learning_rate": 9.121625800807028e-06, "loss": 1.5667, "step": 4063 }, { "epoch": 0.5423005070723246, "grad_norm": 0.9333833148734103, "learning_rate": 9.117320683024104e-06, "loss": 1.5643, "step": 4064 }, { "epoch": 0.5424339471577262, "grad_norm": 0.9278004209324023, "learning_rate": 9.113015730115626e-06, "loss": 1.5886, "step": 4065 }, { "epoch": 0.5425673872431278, "grad_norm": 0.9639727825463563, "learning_rate": 9.108710942885705e-06, "loss": 1.5911, "step": 4066 }, { "epoch": 0.5427008273285295, "grad_norm": 0.9307792098772837, "learning_rate": 9.104406322138438e-06, "loss": 1.5635, "step": 4067 }, { "epoch": 0.5428342674139311, "grad_norm": 1.0667447561969865, "learning_rate": 9.100101868677872e-06, "loss": 1.5563, "step": 4068 }, { "epoch": 0.5429677074993328, "grad_norm": 0.912096392390571, "learning_rate": 9.095797583308033e-06, "loss": 1.566, "step": 4069 }, { "epoch": 0.5431011475847345, "grad_norm": 1.3022530608595129, "learning_rate": 9.091493466832905e-06, "loss": 1.5883, "step": 4070 }, { "epoch": 0.5432345876701361, "grad_norm": 0.9697016564423682, "learning_rate": 9.087189520056459e-06, "loss": 1.6272, "step": 4071 }, { "epoch": 0.5433680277555377, "grad_norm": 1.1713917719657274, "learning_rate": 9.082885743782617e-06, "loss": 1.539, "step": 4072 }, { "epoch": 0.5435014678409394, "grad_norm": 0.9945992018009926, "learning_rate": 9.078582138815272e-06, "loss": 1.5641, "step": 4073 }, { "epoch": 0.5436349079263411, "grad_norm": 1.09987126567883, "learning_rate": 9.074278705958297e-06, "loss": 1.5987, "step": 4074 }, { "epoch": 0.5437683480117427, "grad_norm": 0.9528284537379281, "learning_rate": 9.069975446015522e-06, "loss": 1.5384, "step": 4075 }, { "epoch": 0.5439017880971444, "grad_norm": 5.663201568713656, "learning_rate": 9.065672359790743e-06, "loss": 1.4918, "step": 4076 }, { "epoch": 0.544035228182546, "grad_norm": 0.9679094263779862, "learning_rate": 9.061369448087725e-06, "loss": 1.5623, "step": 4077 }, { "epoch": 0.5441686682679477, "grad_norm": 2.567953353801192, "learning_rate": 9.057066711710212e-06, "loss": 1.5437, "step": 4078 }, { "epoch": 0.5443021083533494, "grad_norm": 1.1402343926159393, "learning_rate": 9.052764151461902e-06, "loss": 1.6053, "step": 4079 }, { "epoch": 0.544435548438751, "grad_norm": 1.0524535435440139, "learning_rate": 9.048461768146461e-06, "loss": 1.678, "step": 4080 }, { "epoch": 0.5445689885241527, "grad_norm": 0.9636960807728361, "learning_rate": 9.044159562567532e-06, "loss": 1.6102, "step": 4081 }, { "epoch": 0.5447024286095543, "grad_norm": 1.0002275536656045, "learning_rate": 9.03985753552871e-06, "loss": 1.6551, "step": 4082 }, { "epoch": 0.5448358686949559, "grad_norm": 0.993667451230517, "learning_rate": 9.035555687833571e-06, "loss": 1.5973, "step": 4083 }, { "epoch": 0.5449693087803577, "grad_norm": 1.4929672107006777, "learning_rate": 9.031254020285648e-06, "loss": 1.5943, "step": 4084 }, { "epoch": 0.5451027488657593, "grad_norm": 0.9664973212133721, "learning_rate": 9.026952533688445e-06, "loss": 1.5989, "step": 4085 }, { "epoch": 0.5452361889511609, "grad_norm": 0.9850092658759886, "learning_rate": 9.022651228845431e-06, "loss": 1.6171, "step": 4086 }, { "epoch": 0.5453696290365626, "grad_norm": 0.971030931040022, "learning_rate": 9.018350106560036e-06, "loss": 1.5712, "step": 4087 }, { "epoch": 0.5455030691219642, "grad_norm": 1.2898471611348323, "learning_rate": 9.014049167635668e-06, "loss": 1.5786, "step": 4088 }, { "epoch": 0.5456365092073658, "grad_norm": 0.9722886044790409, "learning_rate": 9.00974841287569e-06, "loss": 1.5522, "step": 4089 }, { "epoch": 0.5457699492927676, "grad_norm": 1.274819296685493, "learning_rate": 9.005447843083436e-06, "loss": 1.6179, "step": 4090 }, { "epoch": 0.5459033893781692, "grad_norm": 0.9802333166916215, "learning_rate": 9.001147459062196e-06, "loss": 1.5479, "step": 4091 }, { "epoch": 0.5460368294635709, "grad_norm": 0.9612343249635548, "learning_rate": 8.996847261615242e-06, "loss": 1.5777, "step": 4092 }, { "epoch": 0.5461702695489725, "grad_norm": 1.0934912868937452, "learning_rate": 8.992547251545798e-06, "loss": 1.5733, "step": 4093 }, { "epoch": 0.5463037096343741, "grad_norm": 1.051460553342782, "learning_rate": 8.988247429657058e-06, "loss": 1.5806, "step": 4094 }, { "epoch": 0.5464371497197759, "grad_norm": 1.0365839786798572, "learning_rate": 8.983947796752174e-06, "loss": 1.5982, "step": 4095 }, { "epoch": 0.5465705898051775, "grad_norm": 1.1028438221141459, "learning_rate": 8.979648353634278e-06, "loss": 1.6002, "step": 4096 }, { "epoch": 0.5467040298905791, "grad_norm": 0.9267409999435865, "learning_rate": 8.975349101106451e-06, "loss": 1.5865, "step": 4097 }, { "epoch": 0.5468374699759808, "grad_norm": 0.9232565563556838, "learning_rate": 8.971050039971743e-06, "loss": 1.5637, "step": 4098 }, { "epoch": 0.5469709100613824, "grad_norm": 1.0548309109185285, "learning_rate": 8.966751171033177e-06, "loss": 1.5598, "step": 4099 }, { "epoch": 0.547104350146784, "grad_norm": 0.9446200469309572, "learning_rate": 8.962452495093729e-06, "loss": 1.5853, "step": 4100 }, { "epoch": 0.5472377902321858, "grad_norm": 0.9503336907790122, "learning_rate": 8.95815401295634e-06, "loss": 1.5885, "step": 4101 }, { "epoch": 0.5473712303175874, "grad_norm": 1.1631702714115804, "learning_rate": 8.953855725423918e-06, "loss": 1.5864, "step": 4102 }, { "epoch": 0.5475046704029891, "grad_norm": 0.928539952699467, "learning_rate": 8.949557633299335e-06, "loss": 1.5443, "step": 4103 }, { "epoch": 0.5476381104883907, "grad_norm": 0.9789123315297109, "learning_rate": 8.945259737385432e-06, "loss": 1.6024, "step": 4104 }, { "epoch": 0.5477715505737923, "grad_norm": 0.9695913713026796, "learning_rate": 8.940962038484997e-06, "loss": 1.5721, "step": 4105 }, { "epoch": 0.5479049906591941, "grad_norm": 1.2560495210786051, "learning_rate": 8.936664537400797e-06, "loss": 1.5756, "step": 4106 }, { "epoch": 0.5480384307445957, "grad_norm": 1.0698213851452432, "learning_rate": 8.932367234935554e-06, "loss": 1.5682, "step": 4107 }, { "epoch": 0.5481718708299973, "grad_norm": 1.0468767253083986, "learning_rate": 8.928070131891959e-06, "loss": 1.5945, "step": 4108 }, { "epoch": 0.548305310915399, "grad_norm": 0.9589490681881839, "learning_rate": 8.923773229072653e-06, "loss": 1.6103, "step": 4109 }, { "epoch": 0.5484387510008006, "grad_norm": 0.9736679031008811, "learning_rate": 8.919476527280257e-06, "loss": 1.543, "step": 4110 }, { "epoch": 0.5485721910862023, "grad_norm": 0.9528406682287175, "learning_rate": 8.915180027317345e-06, "loss": 1.58, "step": 4111 }, { "epoch": 0.548705631171604, "grad_norm": 1.0097173511452968, "learning_rate": 8.91088372998645e-06, "loss": 1.616, "step": 4112 }, { "epoch": 0.5488390712570056, "grad_norm": 0.9881054764092163, "learning_rate": 8.906587636090078e-06, "loss": 1.5138, "step": 4113 }, { "epoch": 0.5489725113424072, "grad_norm": 0.9333735783197412, "learning_rate": 8.902291746430686e-06, "loss": 1.5852, "step": 4114 }, { "epoch": 0.5491059514278089, "grad_norm": 0.9489550127540991, "learning_rate": 8.897996061810697e-06, "loss": 1.563, "step": 4115 }, { "epoch": 0.5492393915132106, "grad_norm": 0.962585687561712, "learning_rate": 8.893700583032494e-06, "loss": 1.5634, "step": 4116 }, { "epoch": 0.5493728315986123, "grad_norm": 1.0345096599792367, "learning_rate": 8.88940531089843e-06, "loss": 1.5799, "step": 4117 }, { "epoch": 0.5495062716840139, "grad_norm": 0.9472708442632688, "learning_rate": 8.885110246210809e-06, "loss": 1.5588, "step": 4118 }, { "epoch": 0.5496397117694155, "grad_norm": 0.9677807823462035, "learning_rate": 8.880815389771896e-06, "loss": 1.5661, "step": 4119 }, { "epoch": 0.5497731518548172, "grad_norm": 0.9496679955804133, "learning_rate": 8.876520742383931e-06, "loss": 1.5943, "step": 4120 }, { "epoch": 0.5499065919402188, "grad_norm": 0.9783924610253193, "learning_rate": 8.8722263048491e-06, "loss": 1.566, "step": 4121 }, { "epoch": 0.5500400320256205, "grad_norm": 1.1878684772471404, "learning_rate": 8.867932077969555e-06, "loss": 1.6396, "step": 4122 }, { "epoch": 0.5501734721110222, "grad_norm": 0.9611842305243232, "learning_rate": 8.863638062547406e-06, "loss": 1.5659, "step": 4123 }, { "epoch": 0.5503069121964238, "grad_norm": 0.9306514165663081, "learning_rate": 8.85934425938473e-06, "loss": 1.606, "step": 4124 }, { "epoch": 0.5504403522818254, "grad_norm": 0.9465699027935999, "learning_rate": 8.85505066928356e-06, "loss": 1.6102, "step": 4125 }, { "epoch": 0.5505737923672271, "grad_norm": 0.9749768439700673, "learning_rate": 8.85075729304589e-06, "loss": 1.5877, "step": 4126 }, { "epoch": 0.5507072324526288, "grad_norm": 0.9815904995653956, "learning_rate": 8.846464131473671e-06, "loss": 1.538, "step": 4127 }, { "epoch": 0.5508406725380304, "grad_norm": 0.9804198563493697, "learning_rate": 8.842171185368822e-06, "loss": 1.5889, "step": 4128 }, { "epoch": 0.5509741126234321, "grad_norm": 1.0046191555459012, "learning_rate": 8.837878455533213e-06, "loss": 1.563, "step": 4129 }, { "epoch": 0.5511075527088337, "grad_norm": 0.9815357324359852, "learning_rate": 8.833585942768672e-06, "loss": 1.605, "step": 4130 }, { "epoch": 0.5512409927942354, "grad_norm": 1.0030901091320008, "learning_rate": 8.829293647877002e-06, "loss": 1.5989, "step": 4131 }, { "epoch": 0.551374432879637, "grad_norm": 1.0070184181295139, "learning_rate": 8.825001571659948e-06, "loss": 1.6026, "step": 4132 }, { "epoch": 0.5515078729650387, "grad_norm": 0.9393916243147451, "learning_rate": 8.820709714919223e-06, "loss": 1.5979, "step": 4133 }, { "epoch": 0.5516413130504404, "grad_norm": 0.9663628069756637, "learning_rate": 8.816418078456493e-06, "loss": 1.6353, "step": 4134 }, { "epoch": 0.551774753135842, "grad_norm": 1.0337970146828375, "learning_rate": 8.812126663073394e-06, "loss": 1.591, "step": 4135 }, { "epoch": 0.5519081932212436, "grad_norm": 0.944300804007319, "learning_rate": 8.807835469571511e-06, "loss": 1.5938, "step": 4136 }, { "epoch": 0.5520416333066454, "grad_norm": 0.9431341497957093, "learning_rate": 8.803544498752383e-06, "loss": 1.5962, "step": 4137 }, { "epoch": 0.552175073392047, "grad_norm": 0.932632959879075, "learning_rate": 8.799253751417526e-06, "loss": 1.5937, "step": 4138 }, { "epoch": 0.5523085134774486, "grad_norm": 0.9584857084573524, "learning_rate": 8.794963228368397e-06, "loss": 1.5368, "step": 4139 }, { "epoch": 0.5524419535628503, "grad_norm": 0.959115772237164, "learning_rate": 8.790672930406416e-06, "loss": 1.5858, "step": 4140 }, { "epoch": 0.5525753936482519, "grad_norm": 1.1797548555444335, "learning_rate": 8.78638285833296e-06, "loss": 1.5726, "step": 4141 }, { "epoch": 0.5527088337336535, "grad_norm": 0.9424673897338993, "learning_rate": 8.782093012949373e-06, "loss": 1.5949, "step": 4142 }, { "epoch": 0.5528422738190553, "grad_norm": 0.9713375108175255, "learning_rate": 8.777803395056945e-06, "loss": 1.6092, "step": 4143 }, { "epoch": 0.5529757139044569, "grad_norm": 0.9774551665234783, "learning_rate": 8.773514005456923e-06, "loss": 1.5954, "step": 4144 }, { "epoch": 0.5531091539898586, "grad_norm": 1.0793312067897565, "learning_rate": 8.769224844950525e-06, "loss": 1.5821, "step": 4145 }, { "epoch": 0.5532425940752602, "grad_norm": 1.8746469240528907, "learning_rate": 8.76493591433891e-06, "loss": 1.5594, "step": 4146 }, { "epoch": 0.5533760341606618, "grad_norm": 0.990387748915661, "learning_rate": 8.760647214423206e-06, "loss": 1.5758, "step": 4147 }, { "epoch": 0.5535094742460636, "grad_norm": 0.9682888207800222, "learning_rate": 8.756358746004492e-06, "loss": 1.574, "step": 4148 }, { "epoch": 0.5536429143314652, "grad_norm": 1.10384040970137, "learning_rate": 8.752070509883805e-06, "loss": 1.5835, "step": 4149 }, { "epoch": 0.5537763544168668, "grad_norm": 0.9794947979779128, "learning_rate": 8.747782506862139e-06, "loss": 1.5744, "step": 4150 }, { "epoch": 0.5539097945022685, "grad_norm": 1.0987054858543068, "learning_rate": 8.74349473774044e-06, "loss": 1.6253, "step": 4151 }, { "epoch": 0.5540432345876701, "grad_norm": 0.9490644187289331, "learning_rate": 8.739207203319622e-06, "loss": 1.5796, "step": 4152 }, { "epoch": 0.5541766746730717, "grad_norm": 1.0378405608665844, "learning_rate": 8.734919904400544e-06, "loss": 1.5961, "step": 4153 }, { "epoch": 0.5543101147584735, "grad_norm": 0.9322492359210561, "learning_rate": 8.730632841784023e-06, "loss": 1.5715, "step": 4154 }, { "epoch": 0.5544435548438751, "grad_norm": 0.9781596956125678, "learning_rate": 8.726346016270833e-06, "loss": 1.5597, "step": 4155 }, { "epoch": 0.5545769949292767, "grad_norm": 0.9527266527507525, "learning_rate": 8.72205942866171e-06, "loss": 1.6075, "step": 4156 }, { "epoch": 0.5547104350146784, "grad_norm": 0.9106864869517425, "learning_rate": 8.717773079757333e-06, "loss": 1.5677, "step": 4157 }, { "epoch": 0.55484387510008, "grad_norm": 0.9851888382485386, "learning_rate": 8.713486970358348e-06, "loss": 1.5512, "step": 4158 }, { "epoch": 0.5549773151854818, "grad_norm": 0.986066626003795, "learning_rate": 8.709201101265346e-06, "loss": 1.6065, "step": 4159 }, { "epoch": 0.5551107552708834, "grad_norm": 0.9632922780457756, "learning_rate": 8.704915473278885e-06, "loss": 1.5635, "step": 4160 }, { "epoch": 0.555244195356285, "grad_norm": 0.9616053211021358, "learning_rate": 8.700630087199468e-06, "loss": 1.5781, "step": 4161 }, { "epoch": 0.5553776354416867, "grad_norm": 0.9536548628997336, "learning_rate": 8.696344943827553e-06, "loss": 1.5276, "step": 4162 }, { "epoch": 0.5555110755270883, "grad_norm": 1.0768239441946357, "learning_rate": 8.692060043963565e-06, "loss": 1.6121, "step": 4163 }, { "epoch": 0.55564451561249, "grad_norm": 1.214532797811325, "learning_rate": 8.68777538840787e-06, "loss": 1.6292, "step": 4164 }, { "epoch": 0.5557779556978917, "grad_norm": 0.9333872970846957, "learning_rate": 8.68349097796079e-06, "loss": 1.5566, "step": 4165 }, { "epoch": 0.5559113957832933, "grad_norm": 0.9800519597605887, "learning_rate": 8.679206813422605e-06, "loss": 1.5816, "step": 4166 }, { "epoch": 0.5560448358686949, "grad_norm": 1.0132442396753862, "learning_rate": 8.67492289559355e-06, "loss": 1.5314, "step": 4167 }, { "epoch": 0.5561782759540966, "grad_norm": 0.961576475954957, "learning_rate": 8.670639225273813e-06, "loss": 1.6011, "step": 4168 }, { "epoch": 0.5563117160394982, "grad_norm": 0.9788757658928525, "learning_rate": 8.66635580326353e-06, "loss": 1.5529, "step": 4169 }, { "epoch": 0.5564451561248999, "grad_norm": 1.0085557507010017, "learning_rate": 8.662072630362803e-06, "loss": 1.5839, "step": 4170 }, { "epoch": 0.5565785962103016, "grad_norm": 1.0390389689513875, "learning_rate": 8.657789707371675e-06, "loss": 1.6039, "step": 4171 }, { "epoch": 0.5567120362957032, "grad_norm": 8.058031207442108, "learning_rate": 8.65350703509015e-06, "loss": 1.6436, "step": 4172 }, { "epoch": 0.5568454763811049, "grad_norm": 0.9705636062848059, "learning_rate": 8.649224614318174e-06, "loss": 1.5705, "step": 4173 }, { "epoch": 0.5569789164665065, "grad_norm": 0.9395606140307743, "learning_rate": 8.644942445855666e-06, "loss": 1.619, "step": 4174 }, { "epoch": 0.5571123565519082, "grad_norm": 0.9746333291733118, "learning_rate": 8.64066053050248e-06, "loss": 1.5455, "step": 4175 }, { "epoch": 0.5572457966373099, "grad_norm": 0.9673212477085391, "learning_rate": 8.636378869058427e-06, "loss": 1.5958, "step": 4176 }, { "epoch": 0.5573792367227115, "grad_norm": 0.9991053615117632, "learning_rate": 8.63209746232328e-06, "loss": 1.6009, "step": 4177 }, { "epoch": 0.5575126768081131, "grad_norm": 0.9645490800883675, "learning_rate": 8.627816311096753e-06, "loss": 1.5588, "step": 4178 }, { "epoch": 0.5576461168935148, "grad_norm": 1.0862567601365494, "learning_rate": 8.623535416178516e-06, "loss": 1.6125, "step": 4179 }, { "epoch": 0.5577795569789165, "grad_norm": 0.9565579659399656, "learning_rate": 8.619254778368187e-06, "loss": 1.5513, "step": 4180 }, { "epoch": 0.5579129970643181, "grad_norm": 1.0678592227935304, "learning_rate": 8.61497439846535e-06, "loss": 1.6321, "step": 4181 }, { "epoch": 0.5580464371497198, "grad_norm": 0.96056585056449, "learning_rate": 8.610694277269526e-06, "loss": 1.6026, "step": 4182 }, { "epoch": 0.5581798772351214, "grad_norm": 1.011056257284224, "learning_rate": 8.60641441558019e-06, "loss": 1.5451, "step": 4183 }, { "epoch": 0.5583133173205231, "grad_norm": 0.9413247204545455, "learning_rate": 8.602134814196776e-06, "loss": 1.5566, "step": 4184 }, { "epoch": 0.5584467574059248, "grad_norm": 1.0445816035815227, "learning_rate": 8.597855473918664e-06, "loss": 1.5626, "step": 4185 }, { "epoch": 0.5585801974913264, "grad_norm": 0.9575339480339196, "learning_rate": 8.593576395545187e-06, "loss": 1.6168, "step": 4186 }, { "epoch": 0.5587136375767281, "grad_norm": 0.9282496732846511, "learning_rate": 8.589297579875624e-06, "loss": 1.5874, "step": 4187 }, { "epoch": 0.5588470776621297, "grad_norm": 0.9840758991935276, "learning_rate": 8.585019027709211e-06, "loss": 1.6467, "step": 4188 }, { "epoch": 0.5589805177475313, "grad_norm": 1.0328945704121253, "learning_rate": 8.580740739845138e-06, "loss": 1.5509, "step": 4189 }, { "epoch": 0.559113957832933, "grad_norm": 1.0478866163329408, "learning_rate": 8.576462717082532e-06, "loss": 1.5587, "step": 4190 }, { "epoch": 0.5592473979183347, "grad_norm": 1.1273918515526908, "learning_rate": 8.572184960220485e-06, "loss": 1.6093, "step": 4191 }, { "epoch": 0.5593808380037363, "grad_norm": 0.9626145010766713, "learning_rate": 8.56790747005803e-06, "loss": 1.5516, "step": 4192 }, { "epoch": 0.559514278089138, "grad_norm": 2.3623694177009043, "learning_rate": 8.563630247394157e-06, "loss": 1.6187, "step": 4193 }, { "epoch": 0.5596477181745396, "grad_norm": 1.056578864134312, "learning_rate": 8.559353293027793e-06, "loss": 1.5665, "step": 4194 }, { "epoch": 0.5597811582599412, "grad_norm": 0.9458997799992117, "learning_rate": 8.555076607757837e-06, "loss": 1.5715, "step": 4195 }, { "epoch": 0.559914598345343, "grad_norm": 0.9618299176959383, "learning_rate": 8.55080019238312e-06, "loss": 1.5743, "step": 4196 }, { "epoch": 0.5600480384307446, "grad_norm": 0.9743576637342902, "learning_rate": 8.546524047702428e-06, "loss": 1.6132, "step": 4197 }, { "epoch": 0.5601814785161463, "grad_norm": 0.954261768012623, "learning_rate": 8.54224817451449e-06, "loss": 1.5947, "step": 4198 }, { "epoch": 0.5603149186015479, "grad_norm": 0.967928239432757, "learning_rate": 8.537972573618e-06, "loss": 1.5806, "step": 4199 }, { "epoch": 0.5604483586869495, "grad_norm": 0.9509794485601105, "learning_rate": 8.53369724581159e-06, "loss": 1.5455, "step": 4200 }, { "epoch": 0.5605817987723513, "grad_norm": 0.948976660250996, "learning_rate": 8.529422191893832e-06, "loss": 1.6024, "step": 4201 }, { "epoch": 0.5607152388577529, "grad_norm": 0.9018937165977122, "learning_rate": 8.525147412663272e-06, "loss": 1.5771, "step": 4202 }, { "epoch": 0.5608486789431545, "grad_norm": 0.9881572931305829, "learning_rate": 8.520872908918382e-06, "loss": 1.5898, "step": 4203 }, { "epoch": 0.5609821190285562, "grad_norm": 1.124313244736089, "learning_rate": 8.516598681457595e-06, "loss": 1.6095, "step": 4204 }, { "epoch": 0.5611155591139578, "grad_norm": 1.1089045566415832, "learning_rate": 8.512324731079277e-06, "loss": 1.5805, "step": 4205 }, { "epoch": 0.5612489991993594, "grad_norm": 0.9797580686965339, "learning_rate": 8.508051058581768e-06, "loss": 1.5679, "step": 4206 }, { "epoch": 0.5613824392847612, "grad_norm": 0.9722387996589369, "learning_rate": 8.503777664763336e-06, "loss": 1.5833, "step": 4207 }, { "epoch": 0.5615158793701628, "grad_norm": 1.07631421997575, "learning_rate": 8.499504550422195e-06, "loss": 1.562, "step": 4208 }, { "epoch": 0.5616493194555644, "grad_norm": 1.0147686908909404, "learning_rate": 8.495231716356525e-06, "loss": 1.5465, "step": 4209 }, { "epoch": 0.5617827595409661, "grad_norm": 0.9427076649487832, "learning_rate": 8.490959163364436e-06, "loss": 1.5497, "step": 4210 }, { "epoch": 0.5619161996263677, "grad_norm": 1.1230893036871608, "learning_rate": 8.486686892243996e-06, "loss": 1.5773, "step": 4211 }, { "epoch": 0.5620496397117695, "grad_norm": 0.9289630374962218, "learning_rate": 8.482414903793213e-06, "loss": 1.6441, "step": 4212 }, { "epoch": 0.5621830797971711, "grad_norm": 0.9520481957952125, "learning_rate": 8.478143198810048e-06, "loss": 1.6163, "step": 4213 }, { "epoch": 0.5623165198825727, "grad_norm": 1.0545873114232873, "learning_rate": 8.473871778092408e-06, "loss": 1.5663, "step": 4214 }, { "epoch": 0.5624499599679744, "grad_norm": 0.9532253546176354, "learning_rate": 8.469600642438139e-06, "loss": 1.5472, "step": 4215 }, { "epoch": 0.562583400053376, "grad_norm": 0.9979955697123374, "learning_rate": 8.465329792645051e-06, "loss": 1.5256, "step": 4216 }, { "epoch": 0.5627168401387777, "grad_norm": 0.9702416076361973, "learning_rate": 8.461059229510886e-06, "loss": 1.541, "step": 4217 }, { "epoch": 0.5628502802241794, "grad_norm": 0.998163150298418, "learning_rate": 8.456788953833333e-06, "loss": 1.586, "step": 4218 }, { "epoch": 0.562983720309581, "grad_norm": 0.9618357742711305, "learning_rate": 8.45251896641003e-06, "loss": 1.5574, "step": 4219 }, { "epoch": 0.5631171603949826, "grad_norm": 0.9617959238619088, "learning_rate": 8.448249268038569e-06, "loss": 1.5801, "step": 4220 }, { "epoch": 0.5632506004803843, "grad_norm": 0.9388770256513779, "learning_rate": 8.443979859516478e-06, "loss": 1.5987, "step": 4221 }, { "epoch": 0.563384040565786, "grad_norm": 1.0984016692407286, "learning_rate": 8.439710741641234e-06, "loss": 1.6375, "step": 4222 }, { "epoch": 0.5635174806511876, "grad_norm": 0.9883405451834961, "learning_rate": 8.435441915210254e-06, "loss": 1.5521, "step": 4223 }, { "epoch": 0.5636509207365893, "grad_norm": 1.216557539530969, "learning_rate": 8.431173381020915e-06, "loss": 1.5352, "step": 4224 }, { "epoch": 0.5637843608219909, "grad_norm": 0.9774137933177777, "learning_rate": 8.426905139870528e-06, "loss": 1.555, "step": 4225 }, { "epoch": 0.5639178009073926, "grad_norm": 1.2860621275048014, "learning_rate": 8.422637192556345e-06, "loss": 1.5603, "step": 4226 }, { "epoch": 0.5640512409927942, "grad_norm": 1.0444121247666944, "learning_rate": 8.418369539875579e-06, "loss": 1.6277, "step": 4227 }, { "epoch": 0.5641846810781959, "grad_norm": 0.9276944621654523, "learning_rate": 8.414102182625377e-06, "loss": 1.6169, "step": 4228 }, { "epoch": 0.5643181211635976, "grad_norm": 0.9785924417239961, "learning_rate": 8.40983512160283e-06, "loss": 1.5704, "step": 4229 }, { "epoch": 0.5644515612489992, "grad_norm": 0.9784553941398924, "learning_rate": 8.405568357604975e-06, "loss": 1.5979, "step": 4230 }, { "epoch": 0.5645850013344008, "grad_norm": 1.0150051857133884, "learning_rate": 8.4013018914288e-06, "loss": 1.5686, "step": 4231 }, { "epoch": 0.5647184414198025, "grad_norm": 1.0133584620768177, "learning_rate": 8.39703572387123e-06, "loss": 1.5938, "step": 4232 }, { "epoch": 0.5648518815052042, "grad_norm": 0.9864373077024665, "learning_rate": 8.392769855729134e-06, "loss": 1.6306, "step": 4233 }, { "epoch": 0.5649853215906058, "grad_norm": 0.9740231720507322, "learning_rate": 8.388504287799333e-06, "loss": 1.6303, "step": 4234 }, { "epoch": 0.5651187616760075, "grad_norm": 1.0375657601783386, "learning_rate": 8.384239020878583e-06, "loss": 1.6057, "step": 4235 }, { "epoch": 0.5652522017614091, "grad_norm": 0.9612076808860169, "learning_rate": 8.379974055763587e-06, "loss": 1.5814, "step": 4236 }, { "epoch": 0.5653856418468107, "grad_norm": 0.9641347727662392, "learning_rate": 8.37570939325099e-06, "loss": 1.5954, "step": 4237 }, { "epoch": 0.5655190819322125, "grad_norm": 1.1246728358254432, "learning_rate": 8.371445034137388e-06, "loss": 1.5883, "step": 4238 }, { "epoch": 0.5656525220176141, "grad_norm": 0.9658697763988295, "learning_rate": 8.367180979219314e-06, "loss": 1.5931, "step": 4239 }, { "epoch": 0.5657859621030158, "grad_norm": 1.1394715701292135, "learning_rate": 8.362917229293236e-06, "loss": 1.5455, "step": 4240 }, { "epoch": 0.5659194021884174, "grad_norm": 1.0026059848550364, "learning_rate": 8.358653785155586e-06, "loss": 1.5607, "step": 4241 }, { "epoch": 0.566052842273819, "grad_norm": 0.9721001045323454, "learning_rate": 8.354390647602721e-06, "loss": 1.6145, "step": 4242 }, { "epoch": 0.5661862823592207, "grad_norm": 0.9081483328670256, "learning_rate": 8.350127817430949e-06, "loss": 1.5278, "step": 4243 }, { "epoch": 0.5663197224446224, "grad_norm": 1.0420600474407877, "learning_rate": 8.34586529543651e-06, "loss": 1.5287, "step": 4244 }, { "epoch": 0.566453162530024, "grad_norm": 1.0148442225077963, "learning_rate": 8.341603082415604e-06, "loss": 1.5427, "step": 4245 }, { "epoch": 0.5665866026154257, "grad_norm": 0.9893277215121336, "learning_rate": 8.337341179164363e-06, "loss": 1.6118, "step": 4246 }, { "epoch": 0.5667200427008273, "grad_norm": 1.2148103117653153, "learning_rate": 8.333079586478854e-06, "loss": 1.582, "step": 4247 }, { "epoch": 0.5668534827862289, "grad_norm": 0.9457405540546562, "learning_rate": 8.328818305155105e-06, "loss": 1.6044, "step": 4248 }, { "epoch": 0.5669869228716307, "grad_norm": 0.990303038169507, "learning_rate": 8.324557335989068e-06, "loss": 1.5833, "step": 4249 }, { "epoch": 0.5671203629570323, "grad_norm": 0.9371628277905556, "learning_rate": 8.320296679776647e-06, "loss": 1.5408, "step": 4250 }, { "epoch": 0.567253803042434, "grad_norm": 0.989229363405912, "learning_rate": 8.316036337313678e-06, "loss": 1.5705, "step": 4251 }, { "epoch": 0.5673872431278356, "grad_norm": 0.9163649237832772, "learning_rate": 8.31177630939595e-06, "loss": 1.577, "step": 4252 }, { "epoch": 0.5675206832132372, "grad_norm": 0.9865296079255822, "learning_rate": 8.307516596819188e-06, "loss": 1.5335, "step": 4253 }, { "epoch": 0.567654123298639, "grad_norm": 0.9327295789695722, "learning_rate": 8.303257200379055e-06, "loss": 1.6193, "step": 4254 }, { "epoch": 0.5677875633840406, "grad_norm": 0.996608302154684, "learning_rate": 8.298998120871159e-06, "loss": 1.5586, "step": 4255 }, { "epoch": 0.5679210034694422, "grad_norm": 1.0163058132224498, "learning_rate": 8.294739359091048e-06, "loss": 1.5789, "step": 4256 }, { "epoch": 0.5680544435548439, "grad_norm": 0.9545226447240991, "learning_rate": 8.29048091583421e-06, "loss": 1.5417, "step": 4257 }, { "epoch": 0.5681878836402455, "grad_norm": 0.9899206964927434, "learning_rate": 8.286222791896068e-06, "loss": 1.6199, "step": 4258 }, { "epoch": 0.5683213237256471, "grad_norm": 1.0349824476929077, "learning_rate": 8.281964988072001e-06, "loss": 1.5948, "step": 4259 }, { "epoch": 0.5684547638110489, "grad_norm": 0.9700116138822578, "learning_rate": 8.277707505157313e-06, "loss": 1.5548, "step": 4260 }, { "epoch": 0.5685882038964505, "grad_norm": 0.9509425096012817, "learning_rate": 8.273450343947255e-06, "loss": 1.6276, "step": 4261 }, { "epoch": 0.5687216439818521, "grad_norm": 1.2640017963359513, "learning_rate": 8.269193505237013e-06, "loss": 1.5947, "step": 4262 }, { "epoch": 0.5688550840672538, "grad_norm": 0.9467480150131761, "learning_rate": 8.26493698982172e-06, "loss": 1.6069, "step": 4263 }, { "epoch": 0.5689885241526554, "grad_norm": 0.9907646708465446, "learning_rate": 8.260680798496444e-06, "loss": 1.578, "step": 4264 }, { "epoch": 0.5691219642380572, "grad_norm": 1.1061065525912817, "learning_rate": 8.256424932056187e-06, "loss": 1.579, "step": 4265 }, { "epoch": 0.5692554043234588, "grad_norm": 1.325438533023951, "learning_rate": 8.252169391295905e-06, "loss": 1.5684, "step": 4266 }, { "epoch": 0.5693888444088604, "grad_norm": 0.9261579791060407, "learning_rate": 8.247914177010482e-06, "loss": 1.5429, "step": 4267 }, { "epoch": 0.5695222844942621, "grad_norm": 0.9433004236090312, "learning_rate": 8.243659289994741e-06, "loss": 1.5836, "step": 4268 }, { "epoch": 0.5696557245796637, "grad_norm": 0.9493027808271886, "learning_rate": 8.239404731043446e-06, "loss": 1.5597, "step": 4269 }, { "epoch": 0.5697891646650654, "grad_norm": 0.972218396115009, "learning_rate": 8.235150500951304e-06, "loss": 1.6362, "step": 4270 }, { "epoch": 0.5699226047504671, "grad_norm": 0.9262069004868365, "learning_rate": 8.230896600512956e-06, "loss": 1.6098, "step": 4271 }, { "epoch": 0.5700560448358687, "grad_norm": 1.0129980611981801, "learning_rate": 8.226643030522979e-06, "loss": 1.6419, "step": 4272 }, { "epoch": 0.5701894849212703, "grad_norm": 0.9574543891512742, "learning_rate": 8.222389791775895e-06, "loss": 1.5302, "step": 4273 }, { "epoch": 0.570322925006672, "grad_norm": 2.7284176094811077, "learning_rate": 8.218136885066158e-06, "loss": 1.6217, "step": 4274 }, { "epoch": 0.5704563650920736, "grad_norm": 0.9819347041563397, "learning_rate": 8.213884311188166e-06, "loss": 1.5888, "step": 4275 }, { "epoch": 0.5705898051774753, "grad_norm": 0.9895366406849194, "learning_rate": 8.209632070936246e-06, "loss": 1.5732, "step": 4276 }, { "epoch": 0.570723245262877, "grad_norm": 0.9348464052451259, "learning_rate": 8.205380165104673e-06, "loss": 1.5906, "step": 4277 }, { "epoch": 0.5708566853482786, "grad_norm": 0.9528816676643417, "learning_rate": 8.201128594487653e-06, "loss": 1.6113, "step": 4278 }, { "epoch": 0.5709901254336803, "grad_norm": 0.9776229291280035, "learning_rate": 8.196877359879327e-06, "loss": 1.5856, "step": 4279 }, { "epoch": 0.5711235655190819, "grad_norm": 0.9414417277905998, "learning_rate": 8.192626462073782e-06, "loss": 1.564, "step": 4280 }, { "epoch": 0.5712570056044836, "grad_norm": 0.9808001957769379, "learning_rate": 8.18837590186504e-06, "loss": 1.5853, "step": 4281 }, { "epoch": 0.5713904456898853, "grad_norm": 1.1219868857209958, "learning_rate": 8.184125680047051e-06, "loss": 1.5385, "step": 4282 }, { "epoch": 0.5715238857752869, "grad_norm": 0.9792466209394581, "learning_rate": 8.179875797413707e-06, "loss": 1.6143, "step": 4283 }, { "epoch": 0.5716573258606885, "grad_norm": 0.9655150327845816, "learning_rate": 8.175626254758847e-06, "loss": 1.5814, "step": 4284 }, { "epoch": 0.5717907659460902, "grad_norm": 1.0677323984435168, "learning_rate": 8.171377052876228e-06, "loss": 1.5212, "step": 4285 }, { "epoch": 0.5719242060314919, "grad_norm": 1.0478556969877482, "learning_rate": 8.167128192559557e-06, "loss": 1.5864, "step": 4286 }, { "epoch": 0.5720576461168935, "grad_norm": 0.9806397349097133, "learning_rate": 8.162879674602469e-06, "loss": 1.5688, "step": 4287 }, { "epoch": 0.5721910862022952, "grad_norm": 0.957753049608993, "learning_rate": 8.158631499798545e-06, "loss": 1.5769, "step": 4288 }, { "epoch": 0.5723245262876968, "grad_norm": 0.9714614263607235, "learning_rate": 8.154383668941293e-06, "loss": 1.5687, "step": 4289 }, { "epoch": 0.5724579663730984, "grad_norm": 1.1716096470089616, "learning_rate": 8.150136182824152e-06, "loss": 1.5396, "step": 4290 }, { "epoch": 0.5725914064585002, "grad_norm": 0.992260967601937, "learning_rate": 8.145889042240516e-06, "loss": 1.5828, "step": 4291 }, { "epoch": 0.5727248465439018, "grad_norm": 0.965708034327343, "learning_rate": 8.141642247983696e-06, "loss": 1.5623, "step": 4292 }, { "epoch": 0.5728582866293035, "grad_norm": 0.9907802430036895, "learning_rate": 8.137395800846948e-06, "loss": 1.6284, "step": 4293 }, { "epoch": 0.5729917267147051, "grad_norm": 0.9396948795159468, "learning_rate": 8.133149701623454e-06, "loss": 1.6403, "step": 4294 }, { "epoch": 0.5731251668001067, "grad_norm": 0.9597661967095336, "learning_rate": 8.12890395110634e-06, "loss": 1.5808, "step": 4295 }, { "epoch": 0.5732586068855084, "grad_norm": 9.3684749649107, "learning_rate": 8.124658550088668e-06, "loss": 1.6139, "step": 4296 }, { "epoch": 0.5733920469709101, "grad_norm": 0.9286997041501741, "learning_rate": 8.120413499363427e-06, "loss": 1.5668, "step": 4297 }, { "epoch": 0.5735254870563117, "grad_norm": 0.9796234552599323, "learning_rate": 8.116168799723544e-06, "loss": 1.5597, "step": 4298 }, { "epoch": 0.5736589271417134, "grad_norm": 1.0267380425578665, "learning_rate": 8.111924451961883e-06, "loss": 1.6226, "step": 4299 }, { "epoch": 0.573792367227115, "grad_norm": 1.125467638614621, "learning_rate": 8.107680456871236e-06, "loss": 1.6001, "step": 4300 }, { "epoch": 0.5739258073125166, "grad_norm": 0.9428006871087928, "learning_rate": 8.10343681524433e-06, "loss": 1.6035, "step": 4301 }, { "epoch": 0.5740592473979184, "grad_norm": 0.9693180970207167, "learning_rate": 8.09919352787384e-06, "loss": 1.5463, "step": 4302 }, { "epoch": 0.57419268748332, "grad_norm": 1.0176972242986655, "learning_rate": 8.094950595552356e-06, "loss": 1.6065, "step": 4303 }, { "epoch": 0.5743261275687216, "grad_norm": 1.1248089412402027, "learning_rate": 8.09070801907241e-06, "loss": 1.6226, "step": 4304 }, { "epoch": 0.5744595676541233, "grad_norm": 0.9608200644959385, "learning_rate": 8.086465799226468e-06, "loss": 1.5211, "step": 4305 }, { "epoch": 0.5745930077395249, "grad_norm": 0.9756758699322823, "learning_rate": 8.08222393680693e-06, "loss": 1.5682, "step": 4306 }, { "epoch": 0.5747264478249267, "grad_norm": 1.0688838840416501, "learning_rate": 8.077982432606125e-06, "loss": 1.5725, "step": 4307 }, { "epoch": 0.5748598879103283, "grad_norm": 1.1309638787833816, "learning_rate": 8.073741287416317e-06, "loss": 1.6059, "step": 4308 }, { "epoch": 0.5749933279957299, "grad_norm": 1.0556322400323577, "learning_rate": 8.069500502029705e-06, "loss": 1.5937, "step": 4309 }, { "epoch": 0.5751267680811316, "grad_norm": 0.9506475357460462, "learning_rate": 8.065260077238423e-06, "loss": 1.5802, "step": 4310 }, { "epoch": 0.5752602081665332, "grad_norm": 1.077301912471358, "learning_rate": 8.061020013834526e-06, "loss": 1.594, "step": 4311 }, { "epoch": 0.5753936482519348, "grad_norm": 0.9475246370591415, "learning_rate": 8.056780312610019e-06, "loss": 1.5518, "step": 4312 }, { "epoch": 0.5755270883373366, "grad_norm": 1.1642637584365982, "learning_rate": 8.052540974356825e-06, "loss": 1.5499, "step": 4313 }, { "epoch": 0.5756605284227382, "grad_norm": 0.962853058046866, "learning_rate": 8.048301999866803e-06, "loss": 1.625, "step": 4314 }, { "epoch": 0.5757939685081398, "grad_norm": 0.951938089036065, "learning_rate": 8.044063389931745e-06, "loss": 1.6001, "step": 4315 }, { "epoch": 0.5759274085935415, "grad_norm": 0.9850586486463077, "learning_rate": 8.039825145343378e-06, "loss": 1.5917, "step": 4316 }, { "epoch": 0.5760608486789431, "grad_norm": 0.9996996255343955, "learning_rate": 8.035587266893357e-06, "loss": 1.59, "step": 4317 }, { "epoch": 0.5761942887643449, "grad_norm": 0.9361786850612416, "learning_rate": 8.031349755373269e-06, "loss": 1.5217, "step": 4318 }, { "epoch": 0.5763277288497465, "grad_norm": 0.9534574866115021, "learning_rate": 8.02711261157463e-06, "loss": 1.6341, "step": 4319 }, { "epoch": 0.5764611689351481, "grad_norm": 0.9607054461212049, "learning_rate": 8.022875836288896e-06, "loss": 1.5407, "step": 4320 }, { "epoch": 0.5765946090205498, "grad_norm": 1.035113379371202, "learning_rate": 8.018639430307445e-06, "loss": 1.546, "step": 4321 }, { "epoch": 0.5767280491059514, "grad_norm": 0.9424964205669921, "learning_rate": 8.014403394421585e-06, "loss": 1.5562, "step": 4322 }, { "epoch": 0.576861489191353, "grad_norm": 0.9423821513514596, "learning_rate": 8.010167729422571e-06, "loss": 1.5583, "step": 4323 }, { "epoch": 0.5769949292767548, "grad_norm": 0.9340881718791657, "learning_rate": 8.005932436101567e-06, "loss": 1.5567, "step": 4324 }, { "epoch": 0.5771283693621564, "grad_norm": 0.9680824040104852, "learning_rate": 8.001697515249683e-06, "loss": 1.5881, "step": 4325 }, { "epoch": 0.577261809447558, "grad_norm": 0.9638573991235978, "learning_rate": 7.997462967657946e-06, "loss": 1.5619, "step": 4326 }, { "epoch": 0.5773952495329597, "grad_norm": 0.9479429964037883, "learning_rate": 7.993228794117332e-06, "loss": 1.573, "step": 4327 }, { "epoch": 0.5775286896183613, "grad_norm": 0.9744883318609773, "learning_rate": 7.988994995418731e-06, "loss": 1.5964, "step": 4328 }, { "epoch": 0.577662129703763, "grad_norm": 0.9469778260011422, "learning_rate": 7.984761572352963e-06, "loss": 1.5858, "step": 4329 }, { "epoch": 0.5777955697891647, "grad_norm": 0.9464430279636084, "learning_rate": 7.980528525710795e-06, "loss": 1.6171, "step": 4330 }, { "epoch": 0.5779290098745663, "grad_norm": 7.978193421669048, "learning_rate": 7.976295856282904e-06, "loss": 1.5857, "step": 4331 }, { "epoch": 0.578062449959968, "grad_norm": 1.2761028754638115, "learning_rate": 7.97206356485991e-06, "loss": 1.6031, "step": 4332 }, { "epoch": 0.5781958900453696, "grad_norm": 0.993124845092899, "learning_rate": 7.967831652232346e-06, "loss": 1.5551, "step": 4333 }, { "epoch": 0.5783293301307713, "grad_norm": 0.962042815286027, "learning_rate": 7.963600119190695e-06, "loss": 1.5865, "step": 4334 }, { "epoch": 0.578462770216173, "grad_norm": 0.9541286812670964, "learning_rate": 7.959368966525358e-06, "loss": 1.5067, "step": 4335 }, { "epoch": 0.5785962103015746, "grad_norm": 0.9197038368832774, "learning_rate": 7.955138195026663e-06, "loss": 1.5261, "step": 4336 }, { "epoch": 0.5787296503869762, "grad_norm": 0.9334310052059513, "learning_rate": 7.950907805484874e-06, "loss": 1.5611, "step": 4337 }, { "epoch": 0.5788630904723779, "grad_norm": 0.9637654944676048, "learning_rate": 7.946677798690175e-06, "loss": 1.5307, "step": 4338 }, { "epoch": 0.5789965305577796, "grad_norm": 0.9357279064845219, "learning_rate": 7.942448175432687e-06, "loss": 1.5625, "step": 4339 }, { "epoch": 0.5791299706431812, "grad_norm": 1.0168364623595538, "learning_rate": 7.938218936502451e-06, "loss": 1.6031, "step": 4340 }, { "epoch": 0.5792634107285829, "grad_norm": 0.9662217980950242, "learning_rate": 7.933990082689447e-06, "loss": 1.5805, "step": 4341 }, { "epoch": 0.5793968508139845, "grad_norm": 0.9669704491262874, "learning_rate": 7.92976161478357e-06, "loss": 1.5878, "step": 4342 }, { "epoch": 0.5795302908993861, "grad_norm": 1.00799200737011, "learning_rate": 7.925533533574652e-06, "loss": 1.5348, "step": 4343 }, { "epoch": 0.5796637309847879, "grad_norm": 0.9856175919162833, "learning_rate": 7.921305839852454e-06, "loss": 1.5661, "step": 4344 }, { "epoch": 0.5797971710701895, "grad_norm": 1.0344494447784194, "learning_rate": 7.91707853440666e-06, "loss": 1.5692, "step": 4345 }, { "epoch": 0.5799306111555912, "grad_norm": 0.9964855077609798, "learning_rate": 7.912851618026878e-06, "loss": 1.6038, "step": 4346 }, { "epoch": 0.5800640512409928, "grad_norm": 1.0409910183629953, "learning_rate": 7.90862509150265e-06, "loss": 1.5509, "step": 4347 }, { "epoch": 0.5801974913263944, "grad_norm": 0.9473445719334401, "learning_rate": 7.904398955623443e-06, "loss": 1.6044, "step": 4348 }, { "epoch": 0.5803309314117961, "grad_norm": 1.0011241389147565, "learning_rate": 7.900173211178655e-06, "loss": 1.5819, "step": 4349 }, { "epoch": 0.5804643714971978, "grad_norm": 0.9616239459082261, "learning_rate": 7.895947858957603e-06, "loss": 1.5791, "step": 4350 }, { "epoch": 0.5805978115825994, "grad_norm": 0.9896389964933032, "learning_rate": 7.891722899749531e-06, "loss": 1.5909, "step": 4351 }, { "epoch": 0.5807312516680011, "grad_norm": 1.1806253383689567, "learning_rate": 7.887498334343625e-06, "loss": 1.5656, "step": 4352 }, { "epoch": 0.5808646917534027, "grad_norm": 0.9467806964207979, "learning_rate": 7.883274163528974e-06, "loss": 1.5911, "step": 4353 }, { "epoch": 0.5809981318388043, "grad_norm": 0.9523674420210726, "learning_rate": 7.879050388094606e-06, "loss": 1.5564, "step": 4354 }, { "epoch": 0.5811315719242061, "grad_norm": 0.9826286319469207, "learning_rate": 7.874827008829485e-06, "loss": 1.6093, "step": 4355 }, { "epoch": 0.5812650120096077, "grad_norm": 0.9353553399457585, "learning_rate": 7.87060402652248e-06, "loss": 1.5576, "step": 4356 }, { "epoch": 0.5813984520950093, "grad_norm": 0.9890860146426698, "learning_rate": 7.866381441962398e-06, "loss": 1.6012, "step": 4357 }, { "epoch": 0.581531892180411, "grad_norm": 1.3238728327798368, "learning_rate": 7.86215925593797e-06, "loss": 1.5197, "step": 4358 }, { "epoch": 0.5816653322658126, "grad_norm": 0.9735487703890018, "learning_rate": 7.857937469237853e-06, "loss": 1.577, "step": 4359 }, { "epoch": 0.5817987723512144, "grad_norm": 1.0582339375938654, "learning_rate": 7.85371608265063e-06, "loss": 1.5682, "step": 4360 }, { "epoch": 0.581932212436616, "grad_norm": 1.1375161685312762, "learning_rate": 7.849495096964803e-06, "loss": 1.5383, "step": 4361 }, { "epoch": 0.5820656525220176, "grad_norm": 1.026443022449838, "learning_rate": 7.84527451296881e-06, "loss": 1.5996, "step": 4362 }, { "epoch": 0.5821990926074193, "grad_norm": 0.97828369672077, "learning_rate": 7.841054331451008e-06, "loss": 1.5338, "step": 4363 }, { "epoch": 0.5823325326928209, "grad_norm": 1.0179518604332118, "learning_rate": 7.836834553199675e-06, "loss": 1.568, "step": 4364 }, { "epoch": 0.5824659727782225, "grad_norm": 0.9804583013213366, "learning_rate": 7.832615179003013e-06, "loss": 1.5702, "step": 4365 }, { "epoch": 0.5825994128636243, "grad_norm": 1.1561708940014976, "learning_rate": 7.828396209649166e-06, "loss": 1.5852, "step": 4366 }, { "epoch": 0.5827328529490259, "grad_norm": 0.9703494752720414, "learning_rate": 7.824177645926181e-06, "loss": 1.6168, "step": 4367 }, { "epoch": 0.5828662930344275, "grad_norm": 0.975607666820386, "learning_rate": 7.819959488622034e-06, "loss": 1.5722, "step": 4368 }, { "epoch": 0.5829997331198292, "grad_norm": 1.330373136193926, "learning_rate": 7.81574173852464e-06, "loss": 1.5116, "step": 4369 }, { "epoch": 0.5831331732052308, "grad_norm": 0.9750167029171607, "learning_rate": 7.811524396421818e-06, "loss": 1.5523, "step": 4370 }, { "epoch": 0.5832666132906325, "grad_norm": 0.977713970035295, "learning_rate": 7.807307463101323e-06, "loss": 1.5085, "step": 4371 }, { "epoch": 0.5834000533760342, "grad_norm": 0.9684749707247869, "learning_rate": 7.803090939350825e-06, "loss": 1.5986, "step": 4372 }, { "epoch": 0.5835334934614358, "grad_norm": 0.9894691014054225, "learning_rate": 7.798874825957932e-06, "loss": 1.5637, "step": 4373 }, { "epoch": 0.5836669335468375, "grad_norm": 0.9811750958682591, "learning_rate": 7.79465912371016e-06, "loss": 1.5513, "step": 4374 }, { "epoch": 0.5838003736322391, "grad_norm": 0.9268597986006433, "learning_rate": 7.790443833394951e-06, "loss": 1.5789, "step": 4375 }, { "epoch": 0.5839338137176407, "grad_norm": 0.9589691936010886, "learning_rate": 7.786228955799682e-06, "loss": 1.6262, "step": 4376 }, { "epoch": 0.5840672538030425, "grad_norm": 1.0121372632908987, "learning_rate": 7.782014491711638e-06, "loss": 1.5929, "step": 4377 }, { "epoch": 0.5842006938884441, "grad_norm": 0.9838055243486561, "learning_rate": 7.777800441918036e-06, "loss": 1.6404, "step": 4378 }, { "epoch": 0.5843341339738457, "grad_norm": 0.9753282451165607, "learning_rate": 7.773586807206007e-06, "loss": 1.614, "step": 4379 }, { "epoch": 0.5844675740592474, "grad_norm": 0.9364592889583468, "learning_rate": 7.769373588362616e-06, "loss": 1.6258, "step": 4380 }, { "epoch": 0.584601014144649, "grad_norm": 0.9733374026966305, "learning_rate": 7.765160786174838e-06, "loss": 1.5896, "step": 4381 }, { "epoch": 0.5847344542300507, "grad_norm": 0.9583014709288828, "learning_rate": 7.760948401429587e-06, "loss": 1.569, "step": 4382 }, { "epoch": 0.5848678943154524, "grad_norm": 1.1438908541726007, "learning_rate": 7.756736434913678e-06, "loss": 1.5556, "step": 4383 }, { "epoch": 0.585001334400854, "grad_norm": 0.9585046094942674, "learning_rate": 7.752524887413864e-06, "loss": 1.5598, "step": 4384 }, { "epoch": 0.5851347744862557, "grad_norm": 1.0484117367016692, "learning_rate": 7.748313759716812e-06, "loss": 1.6091, "step": 4385 }, { "epoch": 0.5852682145716573, "grad_norm": 0.9196030055638242, "learning_rate": 7.74410305260911e-06, "loss": 1.5314, "step": 4386 }, { "epoch": 0.585401654657059, "grad_norm": 0.9757449887475365, "learning_rate": 7.739892766877278e-06, "loss": 1.577, "step": 4387 }, { "epoch": 0.5855350947424607, "grad_norm": 1.2029427959464503, "learning_rate": 7.735682903307745e-06, "loss": 1.6089, "step": 4388 }, { "epoch": 0.5856685348278623, "grad_norm": 0.9931185472448057, "learning_rate": 7.731473462686865e-06, "loss": 1.566, "step": 4389 }, { "epoch": 0.5858019749132639, "grad_norm": 0.9306099137325194, "learning_rate": 7.727264445800909e-06, "loss": 1.559, "step": 4390 }, { "epoch": 0.5859354149986656, "grad_norm": 0.9714231921122422, "learning_rate": 7.723055853436084e-06, "loss": 1.6069, "step": 4391 }, { "epoch": 0.5860688550840673, "grad_norm": 0.9857980626951369, "learning_rate": 7.718847686378502e-06, "loss": 1.5685, "step": 4392 }, { "epoch": 0.5862022951694689, "grad_norm": 1.0373287455803648, "learning_rate": 7.714639945414193e-06, "loss": 1.6062, "step": 4393 }, { "epoch": 0.5863357352548706, "grad_norm": 1.0138880235597887, "learning_rate": 7.71043263132913e-06, "loss": 1.5886, "step": 4394 }, { "epoch": 0.5864691753402722, "grad_norm": 0.9451198980798459, "learning_rate": 7.70622574490918e-06, "loss": 1.6238, "step": 4395 }, { "epoch": 0.5866026154256738, "grad_norm": 0.9760854352122434, "learning_rate": 7.70201928694015e-06, "loss": 1.5641, "step": 4396 }, { "epoch": 0.5867360555110755, "grad_norm": 0.9771605508734201, "learning_rate": 7.697813258207747e-06, "loss": 1.5616, "step": 4397 }, { "epoch": 0.5868694955964772, "grad_norm": 1.016442594503307, "learning_rate": 7.693607659497621e-06, "loss": 1.572, "step": 4398 }, { "epoch": 0.5870029356818789, "grad_norm": 1.1109077879279698, "learning_rate": 7.689402491595324e-06, "loss": 1.5504, "step": 4399 }, { "epoch": 0.5871363757672805, "grad_norm": 7.23681120527737, "learning_rate": 7.685197755286332e-06, "loss": 1.6183, "step": 4400 }, { "epoch": 0.5872698158526821, "grad_norm": 1.0014151692392677, "learning_rate": 7.680993451356047e-06, "loss": 1.6214, "step": 4401 }, { "epoch": 0.5874032559380838, "grad_norm": 1.0492817400153631, "learning_rate": 7.676789580589781e-06, "loss": 1.5989, "step": 4402 }, { "epoch": 0.5875366960234855, "grad_norm": 0.9443703490938348, "learning_rate": 7.672586143772772e-06, "loss": 1.5646, "step": 4403 }, { "epoch": 0.5876701361088871, "grad_norm": 0.9410293011189405, "learning_rate": 7.66838314169017e-06, "loss": 1.5367, "step": 4404 }, { "epoch": 0.5878035761942888, "grad_norm": 0.9504920239170566, "learning_rate": 7.664180575127054e-06, "loss": 1.5748, "step": 4405 }, { "epoch": 0.5879370162796904, "grad_norm": 1.1257241646301057, "learning_rate": 7.659978444868412e-06, "loss": 1.5842, "step": 4406 }, { "epoch": 0.588070456365092, "grad_norm": 1.0186274137996902, "learning_rate": 7.65577675169915e-06, "loss": 1.591, "step": 4407 }, { "epoch": 0.5882038964504938, "grad_norm": 1.1327767010645338, "learning_rate": 7.651575496404104e-06, "loss": 1.654, "step": 4408 }, { "epoch": 0.5883373365358954, "grad_norm": 0.9109735683849596, "learning_rate": 7.64737467976802e-06, "loss": 1.5817, "step": 4409 }, { "epoch": 0.588470776621297, "grad_norm": 0.9553652260357995, "learning_rate": 7.643174302575558e-06, "loss": 1.5662, "step": 4410 }, { "epoch": 0.5886042167066987, "grad_norm": 0.9727279713423016, "learning_rate": 7.638974365611299e-06, "loss": 1.6463, "step": 4411 }, { "epoch": 0.5887376567921003, "grad_norm": 0.9442402335099999, "learning_rate": 7.634774869659751e-06, "loss": 1.5588, "step": 4412 }, { "epoch": 0.588871096877502, "grad_norm": 0.9022310204474651, "learning_rate": 7.630575815505328e-06, "loss": 1.5363, "step": 4413 }, { "epoch": 0.5890045369629037, "grad_norm": 0.9386827766287047, "learning_rate": 7.626377203932368e-06, "loss": 1.5746, "step": 4414 }, { "epoch": 0.5891379770483053, "grad_norm": 1.086942823690136, "learning_rate": 7.622179035725116e-06, "loss": 1.6061, "step": 4415 }, { "epoch": 0.589271417133707, "grad_norm": 1.0205058353350165, "learning_rate": 7.617981311667751e-06, "loss": 1.5322, "step": 4416 }, { "epoch": 0.5894048572191086, "grad_norm": 0.9823401514999283, "learning_rate": 7.613784032544358e-06, "loss": 1.5882, "step": 4417 }, { "epoch": 0.5895382973045102, "grad_norm": 0.9433142149165429, "learning_rate": 7.609587199138934e-06, "loss": 1.5114, "step": 4418 }, { "epoch": 0.589671737389912, "grad_norm": 0.9904727920933845, "learning_rate": 7.605390812235412e-06, "loss": 1.595, "step": 4419 }, { "epoch": 0.5898051774753136, "grad_norm": 0.9453239515038033, "learning_rate": 7.60119487261762e-06, "loss": 1.5716, "step": 4420 }, { "epoch": 0.5899386175607152, "grad_norm": 0.9710304464960416, "learning_rate": 7.596999381069316e-06, "loss": 1.6152, "step": 4421 }, { "epoch": 0.5900720576461169, "grad_norm": 0.9149997314948785, "learning_rate": 7.592804338374166e-06, "loss": 1.5745, "step": 4422 }, { "epoch": 0.5902054977315185, "grad_norm": 0.9611341617563518, "learning_rate": 7.588609745315758e-06, "loss": 1.5301, "step": 4423 }, { "epoch": 0.5903389378169202, "grad_norm": 1.0320270411702828, "learning_rate": 7.584415602677597e-06, "loss": 1.6336, "step": 4424 }, { "epoch": 0.5904723779023219, "grad_norm": 0.9578784490487312, "learning_rate": 7.580221911243098e-06, "loss": 1.612, "step": 4425 }, { "epoch": 0.5906058179877235, "grad_norm": 1.0925180577920106, "learning_rate": 7.576028671795596e-06, "loss": 1.6268, "step": 4426 }, { "epoch": 0.5907392580731252, "grad_norm": 1.0026716186226203, "learning_rate": 7.571835885118341e-06, "loss": 1.545, "step": 4427 }, { "epoch": 0.5908726981585268, "grad_norm": 1.1376864232127364, "learning_rate": 7.567643551994498e-06, "loss": 1.579, "step": 4428 }, { "epoch": 0.5910061382439284, "grad_norm": 0.99803761360832, "learning_rate": 7.56345167320714e-06, "loss": 1.5658, "step": 4429 }, { "epoch": 0.5911395783293302, "grad_norm": 0.9610248802222642, "learning_rate": 7.5592602495392744e-06, "loss": 1.6057, "step": 4430 }, { "epoch": 0.5912730184147318, "grad_norm": 0.9418422477809483, "learning_rate": 7.555069281773805e-06, "loss": 1.5575, "step": 4431 }, { "epoch": 0.5914064585001334, "grad_norm": 0.9421321981921353, "learning_rate": 7.550878770693551e-06, "loss": 1.5754, "step": 4432 }, { "epoch": 0.5915398985855351, "grad_norm": 1.035596217537491, "learning_rate": 7.546688717081265e-06, "loss": 1.5615, "step": 4433 }, { "epoch": 0.5916733386709367, "grad_norm": 0.9571740732982612, "learning_rate": 7.542499121719593e-06, "loss": 1.5694, "step": 4434 }, { "epoch": 0.5918067787563384, "grad_norm": 0.9590760123078373, "learning_rate": 7.538309985391107e-06, "loss": 1.5727, "step": 4435 }, { "epoch": 0.5919402188417401, "grad_norm": 0.9626015322928804, "learning_rate": 7.534121308878283e-06, "loss": 1.5456, "step": 4436 }, { "epoch": 0.5920736589271417, "grad_norm": 0.9448338727741227, "learning_rate": 7.529933092963527e-06, "loss": 1.5902, "step": 4437 }, { "epoch": 0.5922070990125433, "grad_norm": 1.0098216915844183, "learning_rate": 7.525745338429148e-06, "loss": 1.5554, "step": 4438 }, { "epoch": 0.592340539097945, "grad_norm": 0.9387499498237363, "learning_rate": 7.521558046057364e-06, "loss": 1.5319, "step": 4439 }, { "epoch": 0.5924739791833467, "grad_norm": 0.9367552163787548, "learning_rate": 7.517371216630324e-06, "loss": 1.5604, "step": 4440 }, { "epoch": 0.5926074192687484, "grad_norm": 15.945356768463004, "learning_rate": 7.513184850930075e-06, "loss": 1.5926, "step": 4441 }, { "epoch": 0.59274085935415, "grad_norm": 1.0201759619596091, "learning_rate": 7.5089989497385805e-06, "loss": 1.5564, "step": 4442 }, { "epoch": 0.5928742994395516, "grad_norm": 1.0381537793869493, "learning_rate": 7.504813513837721e-06, "loss": 1.5926, "step": 4443 }, { "epoch": 0.5930077395249533, "grad_norm": 1.066841128052637, "learning_rate": 7.5006285440092894e-06, "loss": 1.5384, "step": 4444 }, { "epoch": 0.593141179610355, "grad_norm": 1.060786355395775, "learning_rate": 7.496444041034987e-06, "loss": 1.5944, "step": 4445 }, { "epoch": 0.5932746196957566, "grad_norm": 0.9860687095888007, "learning_rate": 7.492260005696435e-06, "loss": 1.5632, "step": 4446 }, { "epoch": 0.5934080597811583, "grad_norm": 0.9858498336031716, "learning_rate": 7.488076438775164e-06, "loss": 1.5638, "step": 4447 }, { "epoch": 0.5935414998665599, "grad_norm": 0.9833114803634568, "learning_rate": 7.483893341052613e-06, "loss": 1.5688, "step": 4448 }, { "epoch": 0.5936749399519615, "grad_norm": 0.9443909139265638, "learning_rate": 7.479710713310142e-06, "loss": 1.6111, "step": 4449 }, { "epoch": 0.5938083800373632, "grad_norm": 1.754532656615733, "learning_rate": 7.4755285563290084e-06, "loss": 1.5721, "step": 4450 }, { "epoch": 0.5939418201227649, "grad_norm": 0.9696978925421736, "learning_rate": 7.4713468708904035e-06, "loss": 1.583, "step": 4451 }, { "epoch": 0.5940752602081665, "grad_norm": 0.9849603466349487, "learning_rate": 7.4671656577754134e-06, "loss": 1.5552, "step": 4452 }, { "epoch": 0.5942087002935682, "grad_norm": 0.9804321553897487, "learning_rate": 7.462984917765042e-06, "loss": 1.5638, "step": 4453 }, { "epoch": 0.5943421403789698, "grad_norm": 0.987700482819893, "learning_rate": 7.4588046516401965e-06, "loss": 1.605, "step": 4454 }, { "epoch": 0.5944755804643715, "grad_norm": 0.9960716798209621, "learning_rate": 7.454624860181716e-06, "loss": 1.5791, "step": 4455 }, { "epoch": 0.5946090205497732, "grad_norm": 1.0747136053703605, "learning_rate": 7.450445544170331e-06, "loss": 1.5723, "step": 4456 }, { "epoch": 0.5947424606351748, "grad_norm": 0.9519414871231925, "learning_rate": 7.446266704386685e-06, "loss": 1.5776, "step": 4457 }, { "epoch": 0.5948759007205765, "grad_norm": 1.0093105843986834, "learning_rate": 7.442088341611349e-06, "loss": 1.5719, "step": 4458 }, { "epoch": 0.5950093408059781, "grad_norm": 0.9633876822009539, "learning_rate": 7.437910456624786e-06, "loss": 1.5895, "step": 4459 }, { "epoch": 0.5951427808913797, "grad_norm": 1.1771212337662196, "learning_rate": 7.4337330502073815e-06, "loss": 1.5678, "step": 4460 }, { "epoch": 0.5952762209767815, "grad_norm": 1.027523600819857, "learning_rate": 7.429556123139418e-06, "loss": 1.5628, "step": 4461 }, { "epoch": 0.5954096610621831, "grad_norm": 1.0233162890520189, "learning_rate": 7.425379676201112e-06, "loss": 1.6438, "step": 4462 }, { "epoch": 0.5955431011475847, "grad_norm": 0.982674022132527, "learning_rate": 7.421203710172569e-06, "loss": 1.5377, "step": 4463 }, { "epoch": 0.5956765412329864, "grad_norm": 0.928927279763224, "learning_rate": 7.417028225833809e-06, "loss": 1.5629, "step": 4464 }, { "epoch": 0.595809981318388, "grad_norm": 1.003275613884901, "learning_rate": 7.412853223964771e-06, "loss": 1.5441, "step": 4465 }, { "epoch": 0.5959434214037898, "grad_norm": 1.0353673874246174, "learning_rate": 7.408678705345292e-06, "loss": 1.6081, "step": 4466 }, { "epoch": 0.5960768614891914, "grad_norm": 0.9778720926332395, "learning_rate": 7.40450467075513e-06, "loss": 1.5633, "step": 4467 }, { "epoch": 0.596210301574593, "grad_norm": 1.0932199693315858, "learning_rate": 7.400331120973943e-06, "loss": 1.5514, "step": 4468 }, { "epoch": 0.5963437416599947, "grad_norm": 0.9995117901765698, "learning_rate": 7.3961580567813065e-06, "loss": 1.6022, "step": 4469 }, { "epoch": 0.5964771817453963, "grad_norm": 1.04545722147908, "learning_rate": 7.391985478956699e-06, "loss": 1.5811, "step": 4470 }, { "epoch": 0.5966106218307979, "grad_norm": 0.9491415931893125, "learning_rate": 7.387813388279507e-06, "loss": 1.5659, "step": 4471 }, { "epoch": 0.5967440619161997, "grad_norm": 0.9962391326790979, "learning_rate": 7.383641785529037e-06, "loss": 1.5627, "step": 4472 }, { "epoch": 0.5968775020016013, "grad_norm": 0.9766553612308028, "learning_rate": 7.379470671484492e-06, "loss": 1.5631, "step": 4473 }, { "epoch": 0.5970109420870029, "grad_norm": 0.9060488686225177, "learning_rate": 7.375300046924991e-06, "loss": 1.5714, "step": 4474 }, { "epoch": 0.5971443821724046, "grad_norm": 0.9993988316021899, "learning_rate": 7.3711299126295535e-06, "loss": 1.6233, "step": 4475 }, { "epoch": 0.5972778222578062, "grad_norm": 0.9685553543361992, "learning_rate": 7.366960269377122e-06, "loss": 1.5233, "step": 4476 }, { "epoch": 0.5974112623432078, "grad_norm": 0.9992661533210414, "learning_rate": 7.362791117946533e-06, "loss": 1.5369, "step": 4477 }, { "epoch": 0.5975447024286096, "grad_norm": 0.9445669078870059, "learning_rate": 7.3586224591165335e-06, "loss": 1.5614, "step": 4478 }, { "epoch": 0.5976781425140112, "grad_norm": 1.1684515472112844, "learning_rate": 7.354454293665789e-06, "loss": 1.5789, "step": 4479 }, { "epoch": 0.5978115825994129, "grad_norm": 0.9866975359267203, "learning_rate": 7.350286622372863e-06, "loss": 1.5559, "step": 4480 }, { "epoch": 0.5979450226848145, "grad_norm": 0.948924170206914, "learning_rate": 7.346119446016228e-06, "loss": 1.6123, "step": 4481 }, { "epoch": 0.5980784627702161, "grad_norm": 6.114143464113286, "learning_rate": 7.34195276537426e-06, "loss": 1.5768, "step": 4482 }, { "epoch": 0.5982119028556179, "grad_norm": 1.0955825105939028, "learning_rate": 7.337786581225257e-06, "loss": 1.5832, "step": 4483 }, { "epoch": 0.5983453429410195, "grad_norm": 1.1743034044346985, "learning_rate": 7.333620894347408e-06, "loss": 1.6208, "step": 4484 }, { "epoch": 0.5984787830264211, "grad_norm": 0.9813455603327857, "learning_rate": 7.329455705518822e-06, "loss": 1.5635, "step": 4485 }, { "epoch": 0.5986122231118228, "grad_norm": 0.97080471220344, "learning_rate": 7.325291015517499e-06, "loss": 1.5297, "step": 4486 }, { "epoch": 0.5987456631972244, "grad_norm": 1.0737172814869553, "learning_rate": 7.321126825121367e-06, "loss": 1.5659, "step": 4487 }, { "epoch": 0.5988791032826261, "grad_norm": 0.9965236541353721, "learning_rate": 7.316963135108239e-06, "loss": 1.5796, "step": 4488 }, { "epoch": 0.5990125433680278, "grad_norm": 0.9817919653306049, "learning_rate": 7.3127999462558515e-06, "loss": 1.6373, "step": 4489 }, { "epoch": 0.5991459834534294, "grad_norm": 1.050110329669737, "learning_rate": 7.308637259341842e-06, "loss": 1.5925, "step": 4490 }, { "epoch": 0.599279423538831, "grad_norm": 0.967042532570296, "learning_rate": 7.304475075143749e-06, "loss": 1.6259, "step": 4491 }, { "epoch": 0.5994128636242327, "grad_norm": 4.112038683707192, "learning_rate": 7.3003133944390226e-06, "loss": 1.5667, "step": 4492 }, { "epoch": 0.5995463037096344, "grad_norm": 1.0543783444772516, "learning_rate": 7.296152218005012e-06, "loss": 1.547, "step": 4493 }, { "epoch": 0.5996797437950361, "grad_norm": 1.0059093670666783, "learning_rate": 7.291991546618987e-06, "loss": 1.5244, "step": 4494 }, { "epoch": 0.5998131838804377, "grad_norm": 0.9544888819843551, "learning_rate": 7.28783138105811e-06, "loss": 1.5661, "step": 4495 }, { "epoch": 0.5999466239658393, "grad_norm": 0.99919602721375, "learning_rate": 7.283671722099447e-06, "loss": 1.5961, "step": 4496 }, { "epoch": 0.600080064051241, "grad_norm": 0.9615117915366642, "learning_rate": 7.279512570519984e-06, "loss": 1.5151, "step": 4497 }, { "epoch": 0.6002135041366427, "grad_norm": 0.9810089776515174, "learning_rate": 7.275353927096599e-06, "loss": 1.5632, "step": 4498 }, { "epoch": 0.6003469442220443, "grad_norm": 0.930480896476526, "learning_rate": 7.271195792606079e-06, "loss": 1.5504, "step": 4499 }, { "epoch": 0.600480384307446, "grad_norm": 1.1160803987845584, "learning_rate": 7.2670381678251135e-06, "loss": 1.5948, "step": 4500 }, { "epoch": 0.6006138243928476, "grad_norm": 0.9662586762512358, "learning_rate": 7.262881053530304e-06, "loss": 1.5188, "step": 4501 }, { "epoch": 0.6007472644782492, "grad_norm": 0.9546482055535903, "learning_rate": 7.258724450498153e-06, "loss": 1.5879, "step": 4502 }, { "epoch": 0.600880704563651, "grad_norm": 0.9262806341117151, "learning_rate": 7.254568359505059e-06, "loss": 1.5666, "step": 4503 }, { "epoch": 0.6010141446490526, "grad_norm": 0.9344199271306789, "learning_rate": 7.250412781327341e-06, "loss": 1.584, "step": 4504 }, { "epoch": 0.6011475847344542, "grad_norm": 1.006266539272583, "learning_rate": 7.24625771674121e-06, "loss": 1.6019, "step": 4505 }, { "epoch": 0.6012810248198559, "grad_norm": 0.9707157824495067, "learning_rate": 7.242103166522786e-06, "loss": 1.5485, "step": 4506 }, { "epoch": 0.6014144649052575, "grad_norm": 0.9330255414867166, "learning_rate": 7.2379491314480874e-06, "loss": 1.6067, "step": 4507 }, { "epoch": 0.6015479049906592, "grad_norm": 1.1082901632889555, "learning_rate": 7.233795612293048e-06, "loss": 1.6019, "step": 4508 }, { "epoch": 0.6016813450760609, "grad_norm": 0.9629851142489154, "learning_rate": 7.229642609833489e-06, "loss": 1.6062, "step": 4509 }, { "epoch": 0.6018147851614625, "grad_norm": 0.9416715246155154, "learning_rate": 7.2254901248451515e-06, "loss": 1.5347, "step": 4510 }, { "epoch": 0.6019482252468642, "grad_norm": 1.0187472160513316, "learning_rate": 7.22133815810367e-06, "loss": 1.5216, "step": 4511 }, { "epoch": 0.6020816653322658, "grad_norm": 1.2896554547647145, "learning_rate": 7.217186710384585e-06, "loss": 1.588, "step": 4512 }, { "epoch": 0.6022151054176674, "grad_norm": 0.9584433847605133, "learning_rate": 7.213035782463339e-06, "loss": 1.5431, "step": 4513 }, { "epoch": 0.6023485455030692, "grad_norm": 0.948451999723668, "learning_rate": 7.208885375115273e-06, "loss": 1.5385, "step": 4514 }, { "epoch": 0.6024819855884708, "grad_norm": 0.9144383102664776, "learning_rate": 7.204735489115646e-06, "loss": 1.6077, "step": 4515 }, { "epoch": 0.6026154256738724, "grad_norm": 1.0021475280100742, "learning_rate": 7.200586125239605e-06, "loss": 1.5783, "step": 4516 }, { "epoch": 0.6027488657592741, "grad_norm": 1.0074542834053597, "learning_rate": 7.196437284262202e-06, "loss": 1.6181, "step": 4517 }, { "epoch": 0.6028823058446757, "grad_norm": 0.9555123773303952, "learning_rate": 7.19228896695839e-06, "loss": 1.5896, "step": 4518 }, { "epoch": 0.6030157459300773, "grad_norm": 0.9474460595871118, "learning_rate": 7.1881411741030385e-06, "loss": 1.525, "step": 4519 }, { "epoch": 0.6031491860154791, "grad_norm": 0.9503729257356935, "learning_rate": 7.1839939064708985e-06, "loss": 1.6017, "step": 4520 }, { "epoch": 0.6032826261008807, "grad_norm": 0.9907523282551414, "learning_rate": 7.179847164836633e-06, "loss": 1.585, "step": 4521 }, { "epoch": 0.6034160661862824, "grad_norm": 0.9768177889092633, "learning_rate": 7.1757009499748135e-06, "loss": 1.5499, "step": 4522 }, { "epoch": 0.603549506271684, "grad_norm": 0.9674311604833397, "learning_rate": 7.171555262659899e-06, "loss": 1.5691, "step": 4523 }, { "epoch": 0.6036829463570856, "grad_norm": 1.045115554264623, "learning_rate": 7.167410103666258e-06, "loss": 1.5396, "step": 4524 }, { "epoch": 0.6038163864424874, "grad_norm": 1.008009383355456, "learning_rate": 7.1632654737681565e-06, "loss": 1.5004, "step": 4525 }, { "epoch": 0.603949826527889, "grad_norm": 0.9418379068039596, "learning_rate": 7.15912137373977e-06, "loss": 1.5583, "step": 4526 }, { "epoch": 0.6040832666132906, "grad_norm": 0.9601231973109439, "learning_rate": 7.154977804355167e-06, "loss": 1.5904, "step": 4527 }, { "epoch": 0.6042167066986923, "grad_norm": 1.091493380346413, "learning_rate": 7.1508347663883175e-06, "loss": 1.5876, "step": 4528 }, { "epoch": 0.6043501467840939, "grad_norm": 0.9773122566221465, "learning_rate": 7.146692260613095e-06, "loss": 1.5567, "step": 4529 }, { "epoch": 0.6044835868694955, "grad_norm": 0.9744498878123181, "learning_rate": 7.142550287803271e-06, "loss": 1.6091, "step": 4530 }, { "epoch": 0.6046170269548973, "grad_norm": 0.9623687799737277, "learning_rate": 7.138408848732521e-06, "loss": 1.5327, "step": 4531 }, { "epoch": 0.6047504670402989, "grad_norm": 1.0233729179790898, "learning_rate": 7.134267944174415e-06, "loss": 1.5748, "step": 4532 }, { "epoch": 0.6048839071257006, "grad_norm": 1.0743557870760962, "learning_rate": 7.130127574902433e-06, "loss": 1.5972, "step": 4533 }, { "epoch": 0.6050173472111022, "grad_norm": 0.9535415958368197, "learning_rate": 7.125987741689946e-06, "loss": 1.6278, "step": 4534 }, { "epoch": 0.6051507872965038, "grad_norm": 0.9506929714552727, "learning_rate": 7.121848445310221e-06, "loss": 1.5897, "step": 4535 }, { "epoch": 0.6052842273819056, "grad_norm": 1.0206134352483975, "learning_rate": 7.117709686536442e-06, "loss": 1.5732, "step": 4536 }, { "epoch": 0.6054176674673072, "grad_norm": 0.9419673451960351, "learning_rate": 7.113571466141678e-06, "loss": 1.5487, "step": 4537 }, { "epoch": 0.6055511075527088, "grad_norm": 0.9968498706511424, "learning_rate": 7.1094337848989e-06, "loss": 1.6093, "step": 4538 }, { "epoch": 0.6056845476381105, "grad_norm": 0.9476217156397696, "learning_rate": 7.105296643580979e-06, "loss": 1.564, "step": 4539 }, { "epoch": 0.6058179877235121, "grad_norm": 0.936868206502605, "learning_rate": 7.101160042960688e-06, "loss": 1.5533, "step": 4540 }, { "epoch": 0.6059514278089138, "grad_norm": 1.0845350953229937, "learning_rate": 7.097023983810699e-06, "loss": 1.553, "step": 4541 }, { "epoch": 0.6060848678943155, "grad_norm": 1.6937000276166494, "learning_rate": 7.092888466903574e-06, "loss": 1.602, "step": 4542 }, { "epoch": 0.6062183079797171, "grad_norm": 0.9553209112139722, "learning_rate": 7.0887534930117885e-06, "loss": 1.6083, "step": 4543 }, { "epoch": 0.6063517480651187, "grad_norm": 0.9702968584345638, "learning_rate": 7.084619062907704e-06, "loss": 1.6188, "step": 4544 }, { "epoch": 0.6064851881505204, "grad_norm": 0.9483887892908953, "learning_rate": 7.080485177363585e-06, "loss": 1.5665, "step": 4545 }, { "epoch": 0.606618628235922, "grad_norm": 0.9832425329814938, "learning_rate": 7.076351837151593e-06, "loss": 1.582, "step": 4546 }, { "epoch": 0.6067520683213238, "grad_norm": 0.9685038627751058, "learning_rate": 7.072219043043793e-06, "loss": 1.6042, "step": 4547 }, { "epoch": 0.6068855084067254, "grad_norm": 0.93835156044391, "learning_rate": 7.0680867958121434e-06, "loss": 1.5932, "step": 4548 }, { "epoch": 0.607018948492127, "grad_norm": 0.9471054104804755, "learning_rate": 7.063955096228498e-06, "loss": 1.6032, "step": 4549 }, { "epoch": 0.6071523885775287, "grad_norm": 0.9886055085335903, "learning_rate": 7.059823945064611e-06, "loss": 1.5914, "step": 4550 }, { "epoch": 0.6072858286629303, "grad_norm": 1.1513593773945008, "learning_rate": 7.055693343092138e-06, "loss": 1.5919, "step": 4551 }, { "epoch": 0.607419268748332, "grad_norm": 0.9399419338670711, "learning_rate": 7.051563291082624e-06, "loss": 1.6072, "step": 4552 }, { "epoch": 0.6075527088337337, "grad_norm": 0.968419038546263, "learning_rate": 7.047433789807518e-06, "loss": 1.5867, "step": 4553 }, { "epoch": 0.6076861489191353, "grad_norm": 1.159641461420781, "learning_rate": 7.043304840038166e-06, "loss": 1.6149, "step": 4554 }, { "epoch": 0.6078195890045369, "grad_norm": 0.9281119870608585, "learning_rate": 7.039176442545808e-06, "loss": 1.5342, "step": 4555 }, { "epoch": 0.6079530290899386, "grad_norm": 0.9235387184302374, "learning_rate": 7.035048598101578e-06, "loss": 1.5764, "step": 4556 }, { "epoch": 0.6080864691753403, "grad_norm": 0.9721122200439459, "learning_rate": 7.03092130747651e-06, "loss": 1.549, "step": 4557 }, { "epoch": 0.6082199092607419, "grad_norm": 0.9229902607898596, "learning_rate": 7.02679457144154e-06, "loss": 1.5277, "step": 4558 }, { "epoch": 0.6083533493461436, "grad_norm": 1.0561032370889225, "learning_rate": 7.022668390767495e-06, "loss": 1.5849, "step": 4559 }, { "epoch": 0.6084867894315452, "grad_norm": 0.910655450515761, "learning_rate": 7.018542766225091e-06, "loss": 1.5658, "step": 4560 }, { "epoch": 0.6086202295169469, "grad_norm": 0.9741339058858085, "learning_rate": 7.0144176985849565e-06, "loss": 1.5601, "step": 4561 }, { "epoch": 0.6087536696023486, "grad_norm": 0.9130348746535919, "learning_rate": 7.0102931886176055e-06, "loss": 1.5731, "step": 4562 }, { "epoch": 0.6088871096877502, "grad_norm": 0.9215074638471374, "learning_rate": 7.006169237093447e-06, "loss": 1.5319, "step": 4563 }, { "epoch": 0.6090205497731519, "grad_norm": 0.920657107314274, "learning_rate": 7.002045844782785e-06, "loss": 1.5355, "step": 4564 }, { "epoch": 0.6091539898585535, "grad_norm": 0.9316084219094781, "learning_rate": 6.9979230124558295e-06, "loss": 1.5888, "step": 4565 }, { "epoch": 0.6092874299439551, "grad_norm": 1.0136087188967267, "learning_rate": 6.9938007408826765e-06, "loss": 1.6226, "step": 4566 }, { "epoch": 0.6094208700293569, "grad_norm": 0.9419684119712161, "learning_rate": 6.989679030833314e-06, "loss": 1.5506, "step": 4567 }, { "epoch": 0.6095543101147585, "grad_norm": 0.9475350185524345, "learning_rate": 6.9855578830776385e-06, "loss": 1.5925, "step": 4568 }, { "epoch": 0.6096877502001601, "grad_norm": 1.069989064607908, "learning_rate": 6.98143729838543e-06, "loss": 1.6082, "step": 4569 }, { "epoch": 0.6098211902855618, "grad_norm": 0.9321140114017321, "learning_rate": 6.977317277526366e-06, "loss": 1.5179, "step": 4570 }, { "epoch": 0.6099546303709634, "grad_norm": 1.0848876865520107, "learning_rate": 6.973197821270018e-06, "loss": 1.6046, "step": 4571 }, { "epoch": 0.610088070456365, "grad_norm": 0.9683930596028457, "learning_rate": 6.969078930385858e-06, "loss": 1.5473, "step": 4572 }, { "epoch": 0.6102215105417668, "grad_norm": 0.9338234054464237, "learning_rate": 6.964960605643243e-06, "loss": 1.5351, "step": 4573 }, { "epoch": 0.6103549506271684, "grad_norm": 0.9528182144070801, "learning_rate": 6.960842847811432e-06, "loss": 1.5674, "step": 4574 }, { "epoch": 0.6104883907125701, "grad_norm": 0.950808534998642, "learning_rate": 6.956725657659578e-06, "loss": 1.5983, "step": 4575 }, { "epoch": 0.6106218307979717, "grad_norm": 1.0116221503224754, "learning_rate": 6.95260903595672e-06, "loss": 1.5796, "step": 4576 }, { "epoch": 0.6107552708833733, "grad_norm": 0.9762343267133087, "learning_rate": 6.948492983471799e-06, "loss": 1.6156, "step": 4577 }, { "epoch": 0.6108887109687751, "grad_norm": 0.9069025915216041, "learning_rate": 6.944377500973642e-06, "loss": 1.5628, "step": 4578 }, { "epoch": 0.6110221510541767, "grad_norm": 0.9272734873764399, "learning_rate": 6.9402625892309825e-06, "loss": 1.5324, "step": 4579 }, { "epoch": 0.6111555911395783, "grad_norm": 0.9668585275369073, "learning_rate": 6.936148249012436e-06, "loss": 1.5459, "step": 4580 }, { "epoch": 0.61128903122498, "grad_norm": 0.9728627382480686, "learning_rate": 6.932034481086512e-06, "loss": 1.647, "step": 4581 }, { "epoch": 0.6114224713103816, "grad_norm": 0.950645414784866, "learning_rate": 6.927921286221613e-06, "loss": 1.545, "step": 4582 }, { "epoch": 0.6115559113957832, "grad_norm": 1.0447314600826156, "learning_rate": 6.923808665186045e-06, "loss": 1.6108, "step": 4583 }, { "epoch": 0.611689351481185, "grad_norm": 1.0871835933095921, "learning_rate": 6.919696618747994e-06, "loss": 1.5817, "step": 4584 }, { "epoch": 0.6118227915665866, "grad_norm": 1.041327828019737, "learning_rate": 6.915585147675541e-06, "loss": 1.5483, "step": 4585 }, { "epoch": 0.6119562316519882, "grad_norm": 0.9762414215476425, "learning_rate": 6.911474252736667e-06, "loss": 1.507, "step": 4586 }, { "epoch": 0.6120896717373899, "grad_norm": 1.030786578810606, "learning_rate": 6.907363934699241e-06, "loss": 1.6357, "step": 4587 }, { "epoch": 0.6122231118227915, "grad_norm": 0.9814383825607843, "learning_rate": 6.90325419433102e-06, "loss": 1.5921, "step": 4588 }, { "epoch": 0.6123565519081933, "grad_norm": 0.9363299777601847, "learning_rate": 6.8991450323996535e-06, "loss": 1.5827, "step": 4589 }, { "epoch": 0.6124899919935949, "grad_norm": 0.9538862237104196, "learning_rate": 6.895036449672694e-06, "loss": 1.5372, "step": 4590 }, { "epoch": 0.6126234320789965, "grad_norm": 0.97494009355015, "learning_rate": 6.890928446917575e-06, "loss": 1.561, "step": 4591 }, { "epoch": 0.6127568721643982, "grad_norm": 0.9647040856357274, "learning_rate": 6.886821024901622e-06, "loss": 1.5882, "step": 4592 }, { "epoch": 0.6128903122497998, "grad_norm": 1.091307209481251, "learning_rate": 6.8827141843920585e-06, "loss": 1.6223, "step": 4593 }, { "epoch": 0.6130237523352015, "grad_norm": 0.9582456852774733, "learning_rate": 6.878607926155992e-06, "loss": 1.5471, "step": 4594 }, { "epoch": 0.6131571924206032, "grad_norm": 0.9618755168259653, "learning_rate": 6.874502250960429e-06, "loss": 1.5851, "step": 4595 }, { "epoch": 0.6132906325060048, "grad_norm": 0.9960213913651693, "learning_rate": 6.870397159572257e-06, "loss": 1.5711, "step": 4596 }, { "epoch": 0.6134240725914064, "grad_norm": 0.9675989710680151, "learning_rate": 6.866292652758266e-06, "loss": 1.5615, "step": 4597 }, { "epoch": 0.6135575126768081, "grad_norm": 1.1290180972157486, "learning_rate": 6.862188731285131e-06, "loss": 1.5547, "step": 4598 }, { "epoch": 0.6136909527622098, "grad_norm": 1.0884500014392584, "learning_rate": 6.8580853959194095e-06, "loss": 1.5555, "step": 4599 }, { "epoch": 0.6138243928476115, "grad_norm": 0.9752550116948083, "learning_rate": 6.853982647427568e-06, "loss": 1.5554, "step": 4600 }, { "epoch": 0.6139578329330131, "grad_norm": 0.9556126452390394, "learning_rate": 6.84988048657595e-06, "loss": 1.6092, "step": 4601 }, { "epoch": 0.6140912730184147, "grad_norm": 0.9563824103011194, "learning_rate": 6.845778914130792e-06, "loss": 1.5739, "step": 4602 }, { "epoch": 0.6142247131038164, "grad_norm": 0.9841983764198419, "learning_rate": 6.841677930858215e-06, "loss": 1.5626, "step": 4603 }, { "epoch": 0.614358153189218, "grad_norm": 0.945890058393798, "learning_rate": 6.837577537524247e-06, "loss": 1.5651, "step": 4604 }, { "epoch": 0.6144915932746197, "grad_norm": 0.9637365996725469, "learning_rate": 6.833477734894789e-06, "loss": 1.5531, "step": 4605 }, { "epoch": 0.6146250333600214, "grad_norm": 14.074871733099643, "learning_rate": 6.829378523735635e-06, "loss": 1.4761, "step": 4606 }, { "epoch": 0.614758473445423, "grad_norm": 1.0353012877423016, "learning_rate": 6.825279904812476e-06, "loss": 1.5987, "step": 4607 }, { "epoch": 0.6148919135308246, "grad_norm": 1.069043606434025, "learning_rate": 6.821181878890886e-06, "loss": 1.6326, "step": 4608 }, { "epoch": 0.6150253536162263, "grad_norm": 0.9544980613861165, "learning_rate": 6.817084446736329e-06, "loss": 1.5769, "step": 4609 }, { "epoch": 0.615158793701628, "grad_norm": 1.0198488235290817, "learning_rate": 6.812987609114155e-06, "loss": 1.5888, "step": 4610 }, { "epoch": 0.6152922337870296, "grad_norm": 1.0114306343190909, "learning_rate": 6.808891366789614e-06, "loss": 1.5866, "step": 4611 }, { "epoch": 0.6154256738724313, "grad_norm": 0.9683323450632398, "learning_rate": 6.804795720527832e-06, "loss": 1.6103, "step": 4612 }, { "epoch": 0.6155591139578329, "grad_norm": 0.9651109935189197, "learning_rate": 6.800700671093831e-06, "loss": 1.5516, "step": 4613 }, { "epoch": 0.6156925540432346, "grad_norm": 1.02345582166681, "learning_rate": 6.796606219252519e-06, "loss": 1.5824, "step": 4614 }, { "epoch": 0.6158259941286363, "grad_norm": 0.9451195015480363, "learning_rate": 6.7925123657686956e-06, "loss": 1.5651, "step": 4615 }, { "epoch": 0.6159594342140379, "grad_norm": 0.9485310120839627, "learning_rate": 6.78841911140704e-06, "loss": 1.5163, "step": 4616 }, { "epoch": 0.6160928742994396, "grad_norm": 0.9543391087151242, "learning_rate": 6.784326456932129e-06, "loss": 1.6248, "step": 4617 }, { "epoch": 0.6162263143848412, "grad_norm": 1.1822886103916135, "learning_rate": 6.7802344031084264e-06, "loss": 1.589, "step": 4618 }, { "epoch": 0.6163597544702428, "grad_norm": 1.0774092766967276, "learning_rate": 6.77614295070028e-06, "loss": 1.5877, "step": 4619 }, { "epoch": 0.6164931945556446, "grad_norm": 1.0639094196701442, "learning_rate": 6.772052100471924e-06, "loss": 1.6265, "step": 4620 }, { "epoch": 0.6166266346410462, "grad_norm": 0.9726691460017839, "learning_rate": 6.76796185318748e-06, "loss": 1.5791, "step": 4621 }, { "epoch": 0.6167600747264478, "grad_norm": 0.8990169867198253, "learning_rate": 6.763872209610969e-06, "loss": 1.5604, "step": 4622 }, { "epoch": 0.6168935148118495, "grad_norm": 0.9730961102585249, "learning_rate": 6.759783170506283e-06, "loss": 1.5419, "step": 4623 }, { "epoch": 0.6170269548972511, "grad_norm": 1.2338560915834138, "learning_rate": 6.755694736637206e-06, "loss": 1.5565, "step": 4624 }, { "epoch": 0.6171603949826527, "grad_norm": 0.9206856089021481, "learning_rate": 6.7516069087674186e-06, "loss": 1.5654, "step": 4625 }, { "epoch": 0.6172938350680545, "grad_norm": 0.9814257642355421, "learning_rate": 6.747519687660477e-06, "loss": 1.5811, "step": 4626 }, { "epoch": 0.6174272751534561, "grad_norm": 0.9803535643820207, "learning_rate": 6.743433074079826e-06, "loss": 1.6115, "step": 4627 }, { "epoch": 0.6175607152388578, "grad_norm": 0.9340160005856276, "learning_rate": 6.739347068788795e-06, "loss": 1.5486, "step": 4628 }, { "epoch": 0.6176941553242594, "grad_norm": 3.36171539060685, "learning_rate": 6.7352616725506125e-06, "loss": 1.5931, "step": 4629 }, { "epoch": 0.617827595409661, "grad_norm": 1.4276244458599252, "learning_rate": 6.731176886128379e-06, "loss": 1.5847, "step": 4630 }, { "epoch": 0.6179610354950628, "grad_norm": 1.0518515488300342, "learning_rate": 6.727092710285081e-06, "loss": 1.5419, "step": 4631 }, { "epoch": 0.6180944755804644, "grad_norm": 0.9733143124272681, "learning_rate": 6.723009145783607e-06, "loss": 1.5984, "step": 4632 }, { "epoch": 0.618227915665866, "grad_norm": 0.9431452106066329, "learning_rate": 6.718926193386714e-06, "loss": 1.5877, "step": 4633 }, { "epoch": 0.6183613557512677, "grad_norm": 1.0629787651629599, "learning_rate": 6.714843853857052e-06, "loss": 1.5851, "step": 4634 }, { "epoch": 0.6184947958366693, "grad_norm": 1.110188030750085, "learning_rate": 6.710762127957152e-06, "loss": 1.5875, "step": 4635 }, { "epoch": 0.618628235922071, "grad_norm": 1.1946206918422089, "learning_rate": 6.706681016449441e-06, "loss": 1.6129, "step": 4636 }, { "epoch": 0.6187616760074727, "grad_norm": 0.996759831026245, "learning_rate": 6.702600520096216e-06, "loss": 1.5783, "step": 4637 }, { "epoch": 0.6188951160928743, "grad_norm": 0.945997143369553, "learning_rate": 6.698520639659674e-06, "loss": 1.5278, "step": 4638 }, { "epoch": 0.6190285561782759, "grad_norm": 0.9733621613054791, "learning_rate": 6.694441375901888e-06, "loss": 1.5809, "step": 4639 }, { "epoch": 0.6191619962636776, "grad_norm": 0.9735974397589957, "learning_rate": 6.690362729584818e-06, "loss": 1.6003, "step": 4640 }, { "epoch": 0.6192954363490792, "grad_norm": 0.9864220968652933, "learning_rate": 6.686284701470309e-06, "loss": 1.5841, "step": 4641 }, { "epoch": 0.619428876434481, "grad_norm": 0.9772535789032403, "learning_rate": 6.682207292320084e-06, "loss": 1.5611, "step": 4642 }, { "epoch": 0.6195623165198826, "grad_norm": 1.0280279728242607, "learning_rate": 6.678130502895769e-06, "loss": 1.5529, "step": 4643 }, { "epoch": 0.6196957566052842, "grad_norm": 0.9248902395905066, "learning_rate": 6.674054333958854e-06, "loss": 1.5616, "step": 4644 }, { "epoch": 0.6198291966906859, "grad_norm": 0.9523872687282976, "learning_rate": 6.669978786270721e-06, "loss": 1.5617, "step": 4645 }, { "epoch": 0.6199626367760875, "grad_norm": 0.9776370256998808, "learning_rate": 6.665903860592635e-06, "loss": 1.5794, "step": 4646 }, { "epoch": 0.6200960768614892, "grad_norm": 0.952043520590941, "learning_rate": 6.661829557685751e-06, "loss": 1.4796, "step": 4647 }, { "epoch": 0.6202295169468909, "grad_norm": 1.1835704731983463, "learning_rate": 6.6577558783110985e-06, "loss": 1.5927, "step": 4648 }, { "epoch": 0.6203629570322925, "grad_norm": 0.9658315781942995, "learning_rate": 6.65368282322959e-06, "loss": 1.5559, "step": 4649 }, { "epoch": 0.6204963971176941, "grad_norm": 0.9972805039885417, "learning_rate": 6.649610393202037e-06, "loss": 1.6225, "step": 4650 }, { "epoch": 0.6206298372030958, "grad_norm": 1.0977055810151135, "learning_rate": 6.645538588989117e-06, "loss": 1.5555, "step": 4651 }, { "epoch": 0.6207632772884975, "grad_norm": 1.25686084150492, "learning_rate": 6.641467411351395e-06, "loss": 1.6138, "step": 4652 }, { "epoch": 0.6208967173738991, "grad_norm": 0.9606097211322975, "learning_rate": 6.637396861049319e-06, "loss": 1.5605, "step": 4653 }, { "epoch": 0.6210301574593008, "grad_norm": 0.9543190537739319, "learning_rate": 6.6333269388432295e-06, "loss": 1.5894, "step": 4654 }, { "epoch": 0.6211635975447024, "grad_norm": 0.9663911647320823, "learning_rate": 6.6292576454933355e-06, "loss": 1.5667, "step": 4655 }, { "epoch": 0.6212970376301041, "grad_norm": 1.0490534949737367, "learning_rate": 6.625188981759734e-06, "loss": 1.6118, "step": 4656 }, { "epoch": 0.6214304777155057, "grad_norm": 1.053734926356376, "learning_rate": 6.621120948402411e-06, "loss": 1.595, "step": 4657 }, { "epoch": 0.6215639178009074, "grad_norm": 0.9503515200342192, "learning_rate": 6.617053546181222e-06, "loss": 1.5537, "step": 4658 }, { "epoch": 0.6216973578863091, "grad_norm": 0.9603504106356093, "learning_rate": 6.612986775855914e-06, "loss": 1.5798, "step": 4659 }, { "epoch": 0.6218307979717107, "grad_norm": 0.9347443417349168, "learning_rate": 6.6089206381861135e-06, "loss": 1.5463, "step": 4660 }, { "epoch": 0.6219642380571123, "grad_norm": 0.9559788324052123, "learning_rate": 6.60485513393133e-06, "loss": 1.5512, "step": 4661 }, { "epoch": 0.622097678142514, "grad_norm": 0.9350187591640845, "learning_rate": 6.600790263850953e-06, "loss": 1.5178, "step": 4662 }, { "epoch": 0.6222311182279157, "grad_norm": 0.9721750502110509, "learning_rate": 6.59672602870425e-06, "loss": 1.6067, "step": 4663 }, { "epoch": 0.6223645583133173, "grad_norm": 0.9681735106486332, "learning_rate": 6.592662429250381e-06, "loss": 1.5639, "step": 4664 }, { "epoch": 0.622497998398719, "grad_norm": 1.0145576572286186, "learning_rate": 6.588599466248376e-06, "loss": 1.5534, "step": 4665 }, { "epoch": 0.6226314384841206, "grad_norm": 1.0483912253591907, "learning_rate": 6.58453714045715e-06, "loss": 1.5743, "step": 4666 }, { "epoch": 0.6227648785695223, "grad_norm": 0.9862160876054513, "learning_rate": 6.580475452635495e-06, "loss": 1.5696, "step": 4667 }, { "epoch": 0.622898318654924, "grad_norm": 0.9554839586876005, "learning_rate": 6.576414403542098e-06, "loss": 1.5909, "step": 4668 }, { "epoch": 0.6230317587403256, "grad_norm": 1.0820406864016012, "learning_rate": 6.57235399393551e-06, "loss": 1.5565, "step": 4669 }, { "epoch": 0.6231651988257273, "grad_norm": 0.98515013329777, "learning_rate": 6.568294224574168e-06, "loss": 1.5059, "step": 4670 }, { "epoch": 0.6232986389111289, "grad_norm": 1.0239649377684183, "learning_rate": 6.564235096216397e-06, "loss": 1.6116, "step": 4671 }, { "epoch": 0.6234320789965305, "grad_norm": 0.9784224342622083, "learning_rate": 6.560176609620392e-06, "loss": 1.6186, "step": 4672 }, { "epoch": 0.6235655190819323, "grad_norm": 0.9678732574412918, "learning_rate": 6.556118765544233e-06, "loss": 1.5708, "step": 4673 }, { "epoch": 0.6236989591673339, "grad_norm": 0.9800954857490598, "learning_rate": 6.5520615647458754e-06, "loss": 1.5868, "step": 4674 }, { "epoch": 0.6238323992527355, "grad_norm": 1.052059487698131, "learning_rate": 6.548005007983163e-06, "loss": 1.5298, "step": 4675 }, { "epoch": 0.6239658393381372, "grad_norm": 0.9774410359007661, "learning_rate": 6.543949096013814e-06, "loss": 1.6378, "step": 4676 }, { "epoch": 0.6240992794235388, "grad_norm": 1.0154666358789204, "learning_rate": 6.539893829595425e-06, "loss": 1.5616, "step": 4677 }, { "epoch": 0.6242327195089404, "grad_norm": 1.0875488553183978, "learning_rate": 6.535839209485473e-06, "loss": 1.6052, "step": 4678 }, { "epoch": 0.6243661595943422, "grad_norm": 0.996755457440969, "learning_rate": 6.531785236441316e-06, "loss": 1.5472, "step": 4679 }, { "epoch": 0.6244995996797438, "grad_norm": 0.9597796664804172, "learning_rate": 6.527731911220188e-06, "loss": 1.5385, "step": 4680 }, { "epoch": 0.6246330397651455, "grad_norm": 0.9868274657841508, "learning_rate": 6.523679234579207e-06, "loss": 1.5555, "step": 4681 }, { "epoch": 0.6247664798505471, "grad_norm": 0.9610538520046898, "learning_rate": 6.5196272072753665e-06, "loss": 1.6318, "step": 4682 }, { "epoch": 0.6248999199359487, "grad_norm": 0.9714178990062254, "learning_rate": 6.515575830065538e-06, "loss": 1.5617, "step": 4683 }, { "epoch": 0.6250333600213505, "grad_norm": 1.108314947127384, "learning_rate": 6.511525103706473e-06, "loss": 1.6024, "step": 4684 }, { "epoch": 0.6251668001067521, "grad_norm": 0.9704286966755745, "learning_rate": 6.507475028954797e-06, "loss": 1.5746, "step": 4685 }, { "epoch": 0.6253002401921537, "grad_norm": 0.9303993833203598, "learning_rate": 6.503425606567026e-06, "loss": 1.5919, "step": 4686 }, { "epoch": 0.6254336802775554, "grad_norm": 0.9605139751499199, "learning_rate": 6.499376837299541e-06, "loss": 1.5261, "step": 4687 }, { "epoch": 0.625567120362957, "grad_norm": 0.9509770084954142, "learning_rate": 6.4953287219086035e-06, "loss": 1.5105, "step": 4688 }, { "epoch": 0.6257005604483586, "grad_norm": 0.9729887663740894, "learning_rate": 6.491281261150362e-06, "loss": 1.6217, "step": 4689 }, { "epoch": 0.6258340005337604, "grad_norm": 0.9413725926216433, "learning_rate": 6.487234455780833e-06, "loss": 1.5847, "step": 4690 }, { "epoch": 0.625967440619162, "grad_norm": 0.9874467867153237, "learning_rate": 6.483188306555913e-06, "loss": 1.5823, "step": 4691 }, { "epoch": 0.6261008807045636, "grad_norm": 0.9489046613674973, "learning_rate": 6.479142814231374e-06, "loss": 1.5656, "step": 4692 }, { "epoch": 0.6262343207899653, "grad_norm": 0.9603586164165097, "learning_rate": 6.475097979562873e-06, "loss": 1.5801, "step": 4693 }, { "epoch": 0.6263677608753669, "grad_norm": 0.9391371736350217, "learning_rate": 6.471053803305938e-06, "loss": 1.6123, "step": 4694 }, { "epoch": 0.6265012009607687, "grad_norm": 0.9897178331583285, "learning_rate": 6.467010286215971e-06, "loss": 1.5621, "step": 4695 }, { "epoch": 0.6266346410461703, "grad_norm": 1.3397968048069753, "learning_rate": 6.462967429048259e-06, "loss": 1.5448, "step": 4696 }, { "epoch": 0.6267680811315719, "grad_norm": 0.9969569105203754, "learning_rate": 6.458925232557964e-06, "loss": 1.5672, "step": 4697 }, { "epoch": 0.6269015212169736, "grad_norm": 1.4282904976235595, "learning_rate": 6.4548836975001165e-06, "loss": 1.5755, "step": 4698 }, { "epoch": 0.6270349613023752, "grad_norm": 0.9304292340727252, "learning_rate": 6.4508428246296306e-06, "loss": 1.5704, "step": 4699 }, { "epoch": 0.6271684013877769, "grad_norm": 1.0676289094313383, "learning_rate": 6.446802614701298e-06, "loss": 1.6326, "step": 4700 }, { "epoch": 0.6273018414731786, "grad_norm": 0.9536617515956951, "learning_rate": 6.44276306846978e-06, "loss": 1.5803, "step": 4701 }, { "epoch": 0.6274352815585802, "grad_norm": 1.0650052391897409, "learning_rate": 6.438724186689621e-06, "loss": 1.5345, "step": 4702 }, { "epoch": 0.6275687216439818, "grad_norm": 0.928912755262742, "learning_rate": 6.43468597011524e-06, "loss": 1.6111, "step": 4703 }, { "epoch": 0.6277021617293835, "grad_norm": 1.0609739791096682, "learning_rate": 6.4306484195009246e-06, "loss": 1.5605, "step": 4704 }, { "epoch": 0.6278356018147851, "grad_norm": 1.0242522017660989, "learning_rate": 6.426611535600848e-06, "loss": 1.5373, "step": 4705 }, { "epoch": 0.6279690419001868, "grad_norm": 1.1731834357230497, "learning_rate": 6.422575319169047e-06, "loss": 1.5407, "step": 4706 }, { "epoch": 0.6281024819855885, "grad_norm": 0.9787653325551281, "learning_rate": 6.418539770959451e-06, "loss": 1.6097, "step": 4707 }, { "epoch": 0.6282359220709901, "grad_norm": 1.0177554635966886, "learning_rate": 6.414504891725848e-06, "loss": 1.5137, "step": 4708 }, { "epoch": 0.6283693621563918, "grad_norm": 0.9446114640014235, "learning_rate": 6.41047068222191e-06, "loss": 1.6302, "step": 4709 }, { "epoch": 0.6285028022417934, "grad_norm": 4.459094665047282, "learning_rate": 6.406437143201174e-06, "loss": 1.6297, "step": 4710 }, { "epoch": 0.6286362423271951, "grad_norm": 0.9485026202281593, "learning_rate": 6.402404275417071e-06, "loss": 1.532, "step": 4711 }, { "epoch": 0.6287696824125968, "grad_norm": 0.9321489717238166, "learning_rate": 6.39837207962289e-06, "loss": 1.5491, "step": 4712 }, { "epoch": 0.6289031224979984, "grad_norm": 1.013553078508425, "learning_rate": 6.394340556571794e-06, "loss": 1.5433, "step": 4713 }, { "epoch": 0.6290365625834, "grad_norm": 0.9238968592943705, "learning_rate": 6.390309707016833e-06, "loss": 1.5176, "step": 4714 }, { "epoch": 0.6291700026688017, "grad_norm": 0.9769175672259005, "learning_rate": 6.386279531710921e-06, "loss": 1.5942, "step": 4715 }, { "epoch": 0.6293034427542034, "grad_norm": 0.9697292609419739, "learning_rate": 6.382250031406851e-06, "loss": 1.5822, "step": 4716 }, { "epoch": 0.629436882839605, "grad_norm": 1.0258012834838501, "learning_rate": 6.378221206857278e-06, "loss": 1.5787, "step": 4717 }, { "epoch": 0.6295703229250067, "grad_norm": 0.9633940936233738, "learning_rate": 6.374193058814755e-06, "loss": 1.557, "step": 4718 }, { "epoch": 0.6297037630104083, "grad_norm": 0.9921666668432783, "learning_rate": 6.370165588031686e-06, "loss": 1.5687, "step": 4719 }, { "epoch": 0.6298372030958099, "grad_norm": 0.9501732810691954, "learning_rate": 6.366138795260356e-06, "loss": 1.4943, "step": 4720 }, { "epoch": 0.6299706431812117, "grad_norm": 0.952051603638449, "learning_rate": 6.362112681252928e-06, "loss": 1.5632, "step": 4721 }, { "epoch": 0.6301040832666133, "grad_norm": 1.0075164901271165, "learning_rate": 6.358087246761432e-06, "loss": 1.5648, "step": 4722 }, { "epoch": 0.630237523352015, "grad_norm": 0.9580671930174274, "learning_rate": 6.354062492537772e-06, "loss": 1.5348, "step": 4723 }, { "epoch": 0.6303709634374166, "grad_norm": 0.9723415922337499, "learning_rate": 6.3500384193337275e-06, "loss": 1.5957, "step": 4724 }, { "epoch": 0.6305044035228182, "grad_norm": 1.0243012844746644, "learning_rate": 6.346015027900951e-06, "loss": 1.6132, "step": 4725 }, { "epoch": 0.63063784360822, "grad_norm": 0.923480627824626, "learning_rate": 6.3419923189909674e-06, "loss": 1.6316, "step": 4726 }, { "epoch": 0.6307712836936216, "grad_norm": 0.9845476502020702, "learning_rate": 6.337970293355164e-06, "loss": 1.5867, "step": 4727 }, { "epoch": 0.6309047237790232, "grad_norm": 1.0867072821365296, "learning_rate": 6.333948951744823e-06, "loss": 1.5594, "step": 4728 }, { "epoch": 0.6310381638644249, "grad_norm": 0.9725956783048197, "learning_rate": 6.329928294911076e-06, "loss": 1.5799, "step": 4729 }, { "epoch": 0.6311716039498265, "grad_norm": 0.92191760617083, "learning_rate": 6.325908323604939e-06, "loss": 1.5785, "step": 4730 }, { "epoch": 0.6313050440352281, "grad_norm": 0.9502477068461954, "learning_rate": 6.321889038577291e-06, "loss": 1.5772, "step": 4731 }, { "epoch": 0.6314384841206299, "grad_norm": 0.9378807993847438, "learning_rate": 6.317870440578899e-06, "loss": 1.5395, "step": 4732 }, { "epoch": 0.6315719242060315, "grad_norm": 1.0944522310079952, "learning_rate": 6.313852530360387e-06, "loss": 1.597, "step": 4733 }, { "epoch": 0.6317053642914331, "grad_norm": 0.9643616082201257, "learning_rate": 6.309835308672248e-06, "loss": 1.5474, "step": 4734 }, { "epoch": 0.6318388043768348, "grad_norm": 1.1104130886728687, "learning_rate": 6.305818776264864e-06, "loss": 1.5716, "step": 4735 }, { "epoch": 0.6319722444622364, "grad_norm": 1.0769075057648472, "learning_rate": 6.301802933888472e-06, "loss": 1.5691, "step": 4736 }, { "epoch": 0.6321056845476382, "grad_norm": 0.9721124377884932, "learning_rate": 6.297787782293188e-06, "loss": 1.5656, "step": 4737 }, { "epoch": 0.6322391246330398, "grad_norm": 0.945916645197926, "learning_rate": 6.293773322228989e-06, "loss": 1.5976, "step": 4738 }, { "epoch": 0.6323725647184414, "grad_norm": 1.0327667218788614, "learning_rate": 6.2897595544457426e-06, "loss": 1.5513, "step": 4739 }, { "epoch": 0.6325060048038431, "grad_norm": 0.9500434363342504, "learning_rate": 6.285746479693166e-06, "loss": 1.5667, "step": 4740 }, { "epoch": 0.6326394448892447, "grad_norm": 1.1553660667606065, "learning_rate": 6.281734098720862e-06, "loss": 1.5331, "step": 4741 }, { "epoch": 0.6327728849746463, "grad_norm": 1.0053138525266323, "learning_rate": 6.27772241227829e-06, "loss": 1.6188, "step": 4742 }, { "epoch": 0.6329063250600481, "grad_norm": 1.117959784943353, "learning_rate": 6.2737114211147946e-06, "loss": 1.6097, "step": 4743 }, { "epoch": 0.6330397651454497, "grad_norm": 0.91872923216644, "learning_rate": 6.269701125979577e-06, "loss": 1.5345, "step": 4744 }, { "epoch": 0.6331732052308513, "grad_norm": 1.0580192994245377, "learning_rate": 6.26569152762172e-06, "loss": 1.5928, "step": 4745 }, { "epoch": 0.633306645316253, "grad_norm": 0.9371703645600504, "learning_rate": 6.261682626790169e-06, "loss": 1.5538, "step": 4746 }, { "epoch": 0.6334400854016546, "grad_norm": 0.9689827210946913, "learning_rate": 6.257674424233742e-06, "loss": 1.6359, "step": 4747 }, { "epoch": 0.6335735254870564, "grad_norm": 0.9472008982891928, "learning_rate": 6.253666920701125e-06, "loss": 1.6057, "step": 4748 }, { "epoch": 0.633706965572458, "grad_norm": 0.9192823024984135, "learning_rate": 6.249660116940869e-06, "loss": 1.5798, "step": 4749 }, { "epoch": 0.6338404056578596, "grad_norm": 1.031749263396716, "learning_rate": 6.245654013701408e-06, "loss": 1.5517, "step": 4750 }, { "epoch": 0.6339738457432613, "grad_norm": 0.9624574391068491, "learning_rate": 6.2416486117310326e-06, "loss": 1.6054, "step": 4751 }, { "epoch": 0.6341072858286629, "grad_norm": 0.9912718125263276, "learning_rate": 6.237643911777902e-06, "loss": 1.5548, "step": 4752 }, { "epoch": 0.6342407259140646, "grad_norm": 0.9308587514815552, "learning_rate": 6.233639914590056e-06, "loss": 1.5791, "step": 4753 }, { "epoch": 0.6343741659994663, "grad_norm": 0.9574581547965888, "learning_rate": 6.2296366209153935e-06, "loss": 1.582, "step": 4754 }, { "epoch": 0.6345076060848679, "grad_norm": 0.9471871838216913, "learning_rate": 6.225634031501682e-06, "loss": 1.5467, "step": 4755 }, { "epoch": 0.6346410461702695, "grad_norm": 0.9623094686395599, "learning_rate": 6.221632147096557e-06, "loss": 1.5942, "step": 4756 }, { "epoch": 0.6347744862556712, "grad_norm": 1.0311697985777823, "learning_rate": 6.2176309684475325e-06, "loss": 1.5744, "step": 4757 }, { "epoch": 0.6349079263410728, "grad_norm": 1.000138982436376, "learning_rate": 6.213630496301979e-06, "loss": 1.6009, "step": 4758 }, { "epoch": 0.6350413664264745, "grad_norm": 1.0996770869148338, "learning_rate": 6.209630731407138e-06, "loss": 1.5789, "step": 4759 }, { "epoch": 0.6351748065118762, "grad_norm": 0.9621838976319239, "learning_rate": 6.205631674510122e-06, "loss": 1.5206, "step": 4760 }, { "epoch": 0.6353082465972778, "grad_norm": 1.0190348806503502, "learning_rate": 6.2016333263579096e-06, "loss": 1.529, "step": 4761 }, { "epoch": 0.6354416866826795, "grad_norm": 0.9330752296166617, "learning_rate": 6.197635687697345e-06, "loss": 1.5315, "step": 4762 }, { "epoch": 0.6355751267680811, "grad_norm": 0.9925347326400968, "learning_rate": 6.1936387592751425e-06, "loss": 1.5504, "step": 4763 }, { "epoch": 0.6357085668534828, "grad_norm": 0.9627856019335367, "learning_rate": 6.189642541837883e-06, "loss": 1.5874, "step": 4764 }, { "epoch": 0.6358420069388845, "grad_norm": 1.0100325648862956, "learning_rate": 6.185647036132013e-06, "loss": 1.6278, "step": 4765 }, { "epoch": 0.6359754470242861, "grad_norm": 0.9333049284580013, "learning_rate": 6.181652242903848e-06, "loss": 1.5312, "step": 4766 }, { "epoch": 0.6361088871096877, "grad_norm": 0.9491077426744001, "learning_rate": 6.177658162899573e-06, "loss": 1.5889, "step": 4767 }, { "epoch": 0.6362423271950894, "grad_norm": 1.0817082893352956, "learning_rate": 6.1736647968652345e-06, "loss": 1.5957, "step": 4768 }, { "epoch": 0.6363757672804911, "grad_norm": 1.0229373542272857, "learning_rate": 6.169672145546747e-06, "loss": 1.5911, "step": 4769 }, { "epoch": 0.6365092073658927, "grad_norm": 1.0467028908683291, "learning_rate": 6.165680209689889e-06, "loss": 1.5631, "step": 4770 }, { "epoch": 0.6366426474512944, "grad_norm": 0.9981899865143897, "learning_rate": 6.161688990040315e-06, "loss": 1.566, "step": 4771 }, { "epoch": 0.636776087536696, "grad_norm": 0.9334836157550336, "learning_rate": 6.15769848734354e-06, "loss": 1.5353, "step": 4772 }, { "epoch": 0.6369095276220976, "grad_norm": 0.9251044403398437, "learning_rate": 6.1537087023449386e-06, "loss": 1.5542, "step": 4773 }, { "epoch": 0.6370429677074994, "grad_norm": 0.9282614735198017, "learning_rate": 6.149719635789757e-06, "loss": 1.5508, "step": 4774 }, { "epoch": 0.637176407792901, "grad_norm": 0.9399449871354199, "learning_rate": 6.145731288423114e-06, "loss": 1.5636, "step": 4775 }, { "epoch": 0.6373098478783027, "grad_norm": 0.9865252792900647, "learning_rate": 6.141743660989984e-06, "loss": 1.5661, "step": 4776 }, { "epoch": 0.6374432879637043, "grad_norm": 0.9797903050975381, "learning_rate": 6.137756754235205e-06, "loss": 1.5981, "step": 4777 }, { "epoch": 0.6375767280491059, "grad_norm": 0.9421928709402042, "learning_rate": 6.133770568903497e-06, "loss": 1.5313, "step": 4778 }, { "epoch": 0.6377101681345076, "grad_norm": 1.0060524251777645, "learning_rate": 6.129785105739427e-06, "loss": 1.6234, "step": 4779 }, { "epoch": 0.6378436082199093, "grad_norm": 0.9803594189527336, "learning_rate": 6.1258003654874355e-06, "loss": 1.58, "step": 4780 }, { "epoch": 0.6379770483053109, "grad_norm": 0.9902714344055336, "learning_rate": 6.121816348891822e-06, "loss": 1.5152, "step": 4781 }, { "epoch": 0.6381104883907126, "grad_norm": 1.057224007473837, "learning_rate": 6.117833056696765e-06, "loss": 1.5827, "step": 4782 }, { "epoch": 0.6382439284761142, "grad_norm": 1.4743335061623923, "learning_rate": 6.1138504896462915e-06, "loss": 1.593, "step": 4783 }, { "epoch": 0.6383773685615158, "grad_norm": 0.953837336988305, "learning_rate": 6.1098686484843e-06, "loss": 1.5946, "step": 4784 }, { "epoch": 0.6385108086469176, "grad_norm": 1.0530003073440923, "learning_rate": 6.105887533954555e-06, "loss": 1.5438, "step": 4785 }, { "epoch": 0.6386442487323192, "grad_norm": 1.091118899015719, "learning_rate": 6.101907146800683e-06, "loss": 1.5471, "step": 4786 }, { "epoch": 0.6387776888177208, "grad_norm": 0.9672697431062299, "learning_rate": 6.0979274877661734e-06, "loss": 1.5845, "step": 4787 }, { "epoch": 0.6389111289031225, "grad_norm": 1.004597787760983, "learning_rate": 6.09394855759438e-06, "loss": 1.5889, "step": 4788 }, { "epoch": 0.6390445689885241, "grad_norm": 0.9685979515200861, "learning_rate": 6.089970357028528e-06, "loss": 1.5362, "step": 4789 }, { "epoch": 0.6391780090739259, "grad_norm": 0.9618619817586496, "learning_rate": 6.085992886811696e-06, "loss": 1.5808, "step": 4790 }, { "epoch": 0.6393114491593275, "grad_norm": 0.928222428021561, "learning_rate": 6.082016147686824e-06, "loss": 1.5076, "step": 4791 }, { "epoch": 0.6394448892447291, "grad_norm": 0.9491261783995589, "learning_rate": 6.0780401403967346e-06, "loss": 1.5641, "step": 4792 }, { "epoch": 0.6395783293301308, "grad_norm": 1.0480229583034089, "learning_rate": 6.0740648656840925e-06, "loss": 1.54, "step": 4793 }, { "epoch": 0.6397117694155324, "grad_norm": 0.9544547807714423, "learning_rate": 6.070090324291436e-06, "loss": 1.5695, "step": 4794 }, { "epoch": 0.639845209500934, "grad_norm": 0.9172480413703332, "learning_rate": 6.066116516961157e-06, "loss": 1.525, "step": 4795 }, { "epoch": 0.6399786495863358, "grad_norm": 0.9492147743557356, "learning_rate": 6.062143444435529e-06, "loss": 1.5423, "step": 4796 }, { "epoch": 0.6401120896717374, "grad_norm": 0.97339963817088, "learning_rate": 6.058171107456672e-06, "loss": 1.5391, "step": 4797 }, { "epoch": 0.640245529757139, "grad_norm": 1.2402769128852358, "learning_rate": 6.0541995067665675e-06, "loss": 1.5826, "step": 4798 }, { "epoch": 0.6403789698425407, "grad_norm": 0.9756211394635729, "learning_rate": 6.050228643107074e-06, "loss": 1.5811, "step": 4799 }, { "epoch": 0.6405124099279423, "grad_norm": 0.923769213113528, "learning_rate": 6.046258517219902e-06, "loss": 1.54, "step": 4800 }, { "epoch": 0.640645850013344, "grad_norm": 1.1597847530387866, "learning_rate": 6.042289129846623e-06, "loss": 1.5603, "step": 4801 }, { "epoch": 0.6407792900987457, "grad_norm": 1.1413316657225265, "learning_rate": 6.03832048172867e-06, "loss": 1.5174, "step": 4802 }, { "epoch": 0.6409127301841473, "grad_norm": 0.9271474721968134, "learning_rate": 6.0343525736073506e-06, "loss": 1.5434, "step": 4803 }, { "epoch": 0.641046170269549, "grad_norm": 0.9223447060910756, "learning_rate": 6.03038540622382e-06, "loss": 1.5687, "step": 4804 }, { "epoch": 0.6411796103549506, "grad_norm": 0.9276917949899848, "learning_rate": 6.026418980319098e-06, "loss": 1.5586, "step": 4805 }, { "epoch": 0.6413130504403523, "grad_norm": 0.9541595431849743, "learning_rate": 6.022453296634069e-06, "loss": 1.5539, "step": 4806 }, { "epoch": 0.641446490525754, "grad_norm": 7.6768327775349, "learning_rate": 6.01848835590948e-06, "loss": 1.5641, "step": 4807 }, { "epoch": 0.6415799306111556, "grad_norm": 1.2862124397586092, "learning_rate": 6.0145241588859324e-06, "loss": 1.5405, "step": 4808 }, { "epoch": 0.6417133706965572, "grad_norm": 0.9605935993165724, "learning_rate": 6.010560706303896e-06, "loss": 1.5141, "step": 4809 }, { "epoch": 0.6418468107819589, "grad_norm": 0.9542783776622776, "learning_rate": 6.006597998903699e-06, "loss": 1.522, "step": 4810 }, { "epoch": 0.6419802508673605, "grad_norm": 0.9681776765662814, "learning_rate": 6.002636037425531e-06, "loss": 1.5561, "step": 4811 }, { "epoch": 0.6421136909527622, "grad_norm": 0.9642447674867372, "learning_rate": 5.998674822609438e-06, "loss": 1.5985, "step": 4812 }, { "epoch": 0.6422471310381639, "grad_norm": 0.991926581119737, "learning_rate": 5.9947143551953275e-06, "loss": 1.536, "step": 4813 }, { "epoch": 0.6423805711235655, "grad_norm": 0.9566617525797997, "learning_rate": 5.99075463592298e-06, "loss": 1.5697, "step": 4814 }, { "epoch": 0.6425140112089672, "grad_norm": 0.9676450282612642, "learning_rate": 5.986795665532017e-06, "loss": 1.5289, "step": 4815 }, { "epoch": 0.6426474512943688, "grad_norm": 0.9268671621535911, "learning_rate": 5.982837444761929e-06, "loss": 1.5541, "step": 4816 }, { "epoch": 0.6427808913797705, "grad_norm": 0.9595643521431843, "learning_rate": 5.978879974352072e-06, "loss": 1.5536, "step": 4817 }, { "epoch": 0.6429143314651722, "grad_norm": 0.9796809856469711, "learning_rate": 5.974923255041653e-06, "loss": 1.5676, "step": 4818 }, { "epoch": 0.6430477715505738, "grad_norm": 0.9646690903498765, "learning_rate": 5.9709672875697445e-06, "loss": 1.6259, "step": 4819 }, { "epoch": 0.6431812116359754, "grad_norm": 0.9534358718940747, "learning_rate": 5.967012072675269e-06, "loss": 1.5495, "step": 4820 }, { "epoch": 0.6433146517213771, "grad_norm": 0.9361561726578995, "learning_rate": 5.963057611097026e-06, "loss": 1.5803, "step": 4821 }, { "epoch": 0.6434480918067788, "grad_norm": 0.9139126102709447, "learning_rate": 5.9591039035736595e-06, "loss": 1.5603, "step": 4822 }, { "epoch": 0.6435815318921804, "grad_norm": 1.0664502849150646, "learning_rate": 5.955150950843673e-06, "loss": 1.5331, "step": 4823 }, { "epoch": 0.6437149719775821, "grad_norm": 1.021561984009056, "learning_rate": 5.951198753645437e-06, "loss": 1.5335, "step": 4824 }, { "epoch": 0.6438484120629837, "grad_norm": 0.9503166506512712, "learning_rate": 5.9472473127171795e-06, "loss": 1.5493, "step": 4825 }, { "epoch": 0.6439818521483853, "grad_norm": 0.9149759543673214, "learning_rate": 5.9432966287969805e-06, "loss": 1.5783, "step": 4826 }, { "epoch": 0.644115292233787, "grad_norm": 0.9900352522766276, "learning_rate": 5.939346702622782e-06, "loss": 1.632, "step": 4827 }, { "epoch": 0.6442487323191887, "grad_norm": 0.9616514543356586, "learning_rate": 5.93539753493239e-06, "loss": 1.5656, "step": 4828 }, { "epoch": 0.6443821724045904, "grad_norm": 0.9438961654754505, "learning_rate": 5.93144912646346e-06, "loss": 1.5744, "step": 4829 }, { "epoch": 0.644515612489992, "grad_norm": 0.914116676055478, "learning_rate": 5.927501477953508e-06, "loss": 1.5665, "step": 4830 }, { "epoch": 0.6446490525753936, "grad_norm": 1.0053787424529315, "learning_rate": 5.923554590139917e-06, "loss": 1.5984, "step": 4831 }, { "epoch": 0.6447824926607953, "grad_norm": 1.0553270414804121, "learning_rate": 5.919608463759916e-06, "loss": 1.6098, "step": 4832 }, { "epoch": 0.644915932746197, "grad_norm": 1.0814676717925145, "learning_rate": 5.915663099550597e-06, "loss": 1.5929, "step": 4833 }, { "epoch": 0.6450493728315986, "grad_norm": 1.2260986664990163, "learning_rate": 5.911718498248904e-06, "loss": 1.5417, "step": 4834 }, { "epoch": 0.6451828129170003, "grad_norm": 0.9760168090523013, "learning_rate": 5.907774660591654e-06, "loss": 1.5707, "step": 4835 }, { "epoch": 0.6453162530024019, "grad_norm": 1.038465167882452, "learning_rate": 5.903831587315505e-06, "loss": 1.5461, "step": 4836 }, { "epoch": 0.6454496930878035, "grad_norm": 0.9965870568346387, "learning_rate": 5.899889279156976e-06, "loss": 1.5951, "step": 4837 }, { "epoch": 0.6455831331732053, "grad_norm": 0.921641688093775, "learning_rate": 5.895947736852452e-06, "loss": 1.5367, "step": 4838 }, { "epoch": 0.6457165732586069, "grad_norm": 0.9488713580551892, "learning_rate": 5.892006961138164e-06, "loss": 1.5896, "step": 4839 }, { "epoch": 0.6458500133440085, "grad_norm": 0.94133164082473, "learning_rate": 5.8880669527502035e-06, "loss": 1.5983, "step": 4840 }, { "epoch": 0.6459834534294102, "grad_norm": 0.960447508576979, "learning_rate": 5.884127712424517e-06, "loss": 1.5464, "step": 4841 }, { "epoch": 0.6461168935148118, "grad_norm": 1.0620853615582413, "learning_rate": 5.880189240896916e-06, "loss": 1.5659, "step": 4842 }, { "epoch": 0.6462503336002136, "grad_norm": 0.9604672083966226, "learning_rate": 5.876251538903059e-06, "loss": 1.5793, "step": 4843 }, { "epoch": 0.6463837736856152, "grad_norm": 0.96613702059847, "learning_rate": 5.8723146071784654e-06, "loss": 1.58, "step": 4844 }, { "epoch": 0.6465172137710168, "grad_norm": 0.9248419108271216, "learning_rate": 5.868378446458503e-06, "loss": 1.5689, "step": 4845 }, { "epoch": 0.6466506538564185, "grad_norm": 0.9190338150548885, "learning_rate": 5.864443057478411e-06, "loss": 1.5892, "step": 4846 }, { "epoch": 0.6467840939418201, "grad_norm": 1.0016350457379213, "learning_rate": 5.860508440973269e-06, "loss": 1.5805, "step": 4847 }, { "epoch": 0.6469175340272217, "grad_norm": 1.0549731633470492, "learning_rate": 5.856574597678019e-06, "loss": 1.5599, "step": 4848 }, { "epoch": 0.6470509741126235, "grad_norm": 1.0001222985459557, "learning_rate": 5.852641528327463e-06, "loss": 1.5254, "step": 4849 }, { "epoch": 0.6471844141980251, "grad_norm": 0.9739278595702441, "learning_rate": 5.848709233656249e-06, "loss": 1.6014, "step": 4850 }, { "epoch": 0.6473178542834267, "grad_norm": 0.9808998489102456, "learning_rate": 5.844777714398884e-06, "loss": 1.5885, "step": 4851 }, { "epoch": 0.6474512943688284, "grad_norm": 0.9562991254634166, "learning_rate": 5.840846971289733e-06, "loss": 1.5986, "step": 4852 }, { "epoch": 0.64758473445423, "grad_norm": 0.9170189603539738, "learning_rate": 5.836917005063016e-06, "loss": 1.5917, "step": 4853 }, { "epoch": 0.6477181745396317, "grad_norm": 1.0146263459940281, "learning_rate": 5.832987816452804e-06, "loss": 1.5877, "step": 4854 }, { "epoch": 0.6478516146250334, "grad_norm": 1.0944373179261144, "learning_rate": 5.82905940619302e-06, "loss": 1.5499, "step": 4855 }, { "epoch": 0.647985054710435, "grad_norm": 1.0119646752828362, "learning_rate": 5.825131775017457e-06, "loss": 1.5549, "step": 4856 }, { "epoch": 0.6481184947958367, "grad_norm": 0.9692668549051643, "learning_rate": 5.8212049236597426e-06, "loss": 1.5815, "step": 4857 }, { "epoch": 0.6482519348812383, "grad_norm": 0.9836312870853201, "learning_rate": 5.817278852853373e-06, "loss": 1.5055, "step": 4858 }, { "epoch": 0.64838537496664, "grad_norm": 0.9881357720367061, "learning_rate": 5.813353563331687e-06, "loss": 1.6121, "step": 4859 }, { "epoch": 0.6485188150520417, "grad_norm": 0.9589370913572998, "learning_rate": 5.809429055827893e-06, "loss": 1.5853, "step": 4860 }, { "epoch": 0.6486522551374433, "grad_norm": 1.0049866924367945, "learning_rate": 5.805505331075041e-06, "loss": 1.5434, "step": 4861 }, { "epoch": 0.6487856952228449, "grad_norm": 0.9715555700327767, "learning_rate": 5.801582389806031e-06, "loss": 1.5507, "step": 4862 }, { "epoch": 0.6489191353082466, "grad_norm": 1.048295893300858, "learning_rate": 5.797660232753635e-06, "loss": 1.6251, "step": 4863 }, { "epoch": 0.6490525753936482, "grad_norm": 1.1124589689964615, "learning_rate": 5.793738860650462e-06, "loss": 1.5035, "step": 4864 }, { "epoch": 0.6491860154790499, "grad_norm": 1.0962898619845716, "learning_rate": 5.78981827422898e-06, "loss": 1.5325, "step": 4865 }, { "epoch": 0.6493194555644516, "grad_norm": 0.9341503031181468, "learning_rate": 5.785898474221505e-06, "loss": 1.4986, "step": 4866 }, { "epoch": 0.6494528956498532, "grad_norm": 1.042466248631925, "learning_rate": 5.781979461360221e-06, "loss": 1.5031, "step": 4867 }, { "epoch": 0.6495863357352548, "grad_norm": 0.9970733349938308, "learning_rate": 5.778061236377148e-06, "loss": 1.5859, "step": 4868 }, { "epoch": 0.6497197758206565, "grad_norm": 0.9565435536603323, "learning_rate": 5.774143800004164e-06, "loss": 1.5419, "step": 4869 }, { "epoch": 0.6498532159060582, "grad_norm": 0.9836979900956078, "learning_rate": 5.770227152973009e-06, "loss": 1.5494, "step": 4870 }, { "epoch": 0.6499866559914599, "grad_norm": 1.0135189985173951, "learning_rate": 5.766311296015263e-06, "loss": 1.6009, "step": 4871 }, { "epoch": 0.6501200960768615, "grad_norm": 0.9506176687117898, "learning_rate": 5.762396229862367e-06, "loss": 1.5465, "step": 4872 }, { "epoch": 0.6502535361622631, "grad_norm": 0.971050453122327, "learning_rate": 5.758481955245603e-06, "loss": 1.5391, "step": 4873 }, { "epoch": 0.6503869762476648, "grad_norm": 1.0432819879051074, "learning_rate": 5.754568472896123e-06, "loss": 1.5995, "step": 4874 }, { "epoch": 0.6505204163330665, "grad_norm": 1.079857789585133, "learning_rate": 5.750655783544914e-06, "loss": 1.5867, "step": 4875 }, { "epoch": 0.6506538564184681, "grad_norm": 0.9601148800734111, "learning_rate": 5.7467438879228255e-06, "loss": 1.6103, "step": 4876 }, { "epoch": 0.6507872965038698, "grad_norm": 1.0295074482916855, "learning_rate": 5.7428327867605505e-06, "loss": 1.5321, "step": 4877 }, { "epoch": 0.6509207365892714, "grad_norm": 0.9531639765490284, "learning_rate": 5.738922480788645e-06, "loss": 1.5331, "step": 4878 }, { "epoch": 0.651054176674673, "grad_norm": 0.9435011459958462, "learning_rate": 5.7350129707375035e-06, "loss": 1.5342, "step": 4879 }, { "epoch": 0.6511876167600748, "grad_norm": 0.9604110330797555, "learning_rate": 5.731104257337383e-06, "loss": 1.671, "step": 4880 }, { "epoch": 0.6513210568454764, "grad_norm": 1.1655933070828943, "learning_rate": 5.727196341318383e-06, "loss": 1.5843, "step": 4881 }, { "epoch": 0.6514544969308781, "grad_norm": 0.915628984133999, "learning_rate": 5.72328922341046e-06, "loss": 1.5624, "step": 4882 }, { "epoch": 0.6515879370162797, "grad_norm": 0.9121442794247614, "learning_rate": 5.719382904343411e-06, "loss": 1.5364, "step": 4883 }, { "epoch": 0.6517213771016813, "grad_norm": 1.0971113356076188, "learning_rate": 5.715477384846906e-06, "loss": 1.606, "step": 4884 }, { "epoch": 0.651854817187083, "grad_norm": 0.9787600524230616, "learning_rate": 5.7115726656504425e-06, "loss": 1.5761, "step": 4885 }, { "epoch": 0.6519882572724847, "grad_norm": 0.9284424960740136, "learning_rate": 5.7076687474833795e-06, "loss": 1.5292, "step": 4886 }, { "epoch": 0.6521216973578863, "grad_norm": 0.9590699643912758, "learning_rate": 5.703765631074922e-06, "loss": 1.5564, "step": 4887 }, { "epoch": 0.652255137443288, "grad_norm": 0.9384956754557705, "learning_rate": 5.699863317154133e-06, "loss": 1.5851, "step": 4888 }, { "epoch": 0.6523885775286896, "grad_norm": 0.9240054444337821, "learning_rate": 5.695961806449917e-06, "loss": 1.5768, "step": 4889 }, { "epoch": 0.6525220176140912, "grad_norm": 1.021644427657407, "learning_rate": 5.692061099691033e-06, "loss": 1.5574, "step": 4890 }, { "epoch": 0.652655457699493, "grad_norm": 1.017574465371194, "learning_rate": 5.688161197606083e-06, "loss": 1.6171, "step": 4891 }, { "epoch": 0.6527888977848946, "grad_norm": 0.9327196434111883, "learning_rate": 5.6842621009235345e-06, "loss": 1.5752, "step": 4892 }, { "epoch": 0.6529223378702962, "grad_norm": 0.9728469536906587, "learning_rate": 5.680363810371687e-06, "loss": 1.6167, "step": 4893 }, { "epoch": 0.6530557779556979, "grad_norm": 1.1670661476372766, "learning_rate": 5.676466326678697e-06, "loss": 1.5304, "step": 4894 }, { "epoch": 0.6531892180410995, "grad_norm": 1.0260789492682234, "learning_rate": 5.672569650572574e-06, "loss": 1.5498, "step": 4895 }, { "epoch": 0.6533226581265013, "grad_norm": 0.942513307680248, "learning_rate": 5.668673782781173e-06, "loss": 1.5392, "step": 4896 }, { "epoch": 0.6534560982119029, "grad_norm": 0.9337827595786473, "learning_rate": 5.664778724032194e-06, "loss": 1.5591, "step": 4897 }, { "epoch": 0.6535895382973045, "grad_norm": 0.9280660756568939, "learning_rate": 5.660884475053187e-06, "loss": 1.5647, "step": 4898 }, { "epoch": 0.6537229783827062, "grad_norm": 0.9775864120631028, "learning_rate": 5.656991036571561e-06, "loss": 1.5598, "step": 4899 }, { "epoch": 0.6538564184681078, "grad_norm": 0.9090145991125382, "learning_rate": 5.653098409314562e-06, "loss": 1.5081, "step": 4900 }, { "epoch": 0.6539898585535094, "grad_norm": 1.1498598455869353, "learning_rate": 5.649206594009287e-06, "loss": 1.5525, "step": 4901 }, { "epoch": 0.6541232986389112, "grad_norm": 0.9452608782280502, "learning_rate": 5.645315591382686e-06, "loss": 1.484, "step": 4902 }, { "epoch": 0.6542567387243128, "grad_norm": 1.0058072441790753, "learning_rate": 5.641425402161553e-06, "loss": 1.5849, "step": 4903 }, { "epoch": 0.6543901788097144, "grad_norm": 0.9290237461844418, "learning_rate": 5.6375360270725324e-06, "loss": 1.5293, "step": 4904 }, { "epoch": 0.6545236188951161, "grad_norm": 1.0552247300637112, "learning_rate": 5.633647466842108e-06, "loss": 1.5393, "step": 4905 }, { "epoch": 0.6546570589805177, "grad_norm": 0.9311882795001768, "learning_rate": 5.629759722196629e-06, "loss": 1.5945, "step": 4906 }, { "epoch": 0.6547904990659194, "grad_norm": 6.174325495147954, "learning_rate": 5.625872793862276e-06, "loss": 1.5381, "step": 4907 }, { "epoch": 0.6549239391513211, "grad_norm": 0.9663081495612666, "learning_rate": 5.621986682565084e-06, "loss": 1.5634, "step": 4908 }, { "epoch": 0.6550573792367227, "grad_norm": 1.0036068699063934, "learning_rate": 5.61810138903093e-06, "loss": 1.6021, "step": 4909 }, { "epoch": 0.6551908193221244, "grad_norm": 1.0524583355071078, "learning_rate": 5.614216913985551e-06, "loss": 1.5163, "step": 4910 }, { "epoch": 0.655324259407526, "grad_norm": 0.9691722893349417, "learning_rate": 5.610333258154519e-06, "loss": 1.5607, "step": 4911 }, { "epoch": 0.6554576994929276, "grad_norm": 0.9220645447522142, "learning_rate": 5.606450422263251e-06, "loss": 1.5614, "step": 4912 }, { "epoch": 0.6555911395783294, "grad_norm": 0.9449443848226934, "learning_rate": 5.602568407037025e-06, "loss": 1.6021, "step": 4913 }, { "epoch": 0.655724579663731, "grad_norm": 0.991397619710688, "learning_rate": 5.598687213200956e-06, "loss": 1.5747, "step": 4914 }, { "epoch": 0.6558580197491326, "grad_norm": 0.9384924843099448, "learning_rate": 5.594806841480005e-06, "loss": 1.5914, "step": 4915 }, { "epoch": 0.6559914598345343, "grad_norm": 2.1381936866886178, "learning_rate": 5.5909272925989756e-06, "loss": 1.5951, "step": 4916 }, { "epoch": 0.6561248999199359, "grad_norm": 0.9393256924385065, "learning_rate": 5.587048567282533e-06, "loss": 1.5611, "step": 4917 }, { "epoch": 0.6562583400053376, "grad_norm": 1.0866095395539688, "learning_rate": 5.583170666255174e-06, "loss": 1.5516, "step": 4918 }, { "epoch": 0.6563917800907393, "grad_norm": 0.9771179575753627, "learning_rate": 5.5792935902412485e-06, "loss": 1.6421, "step": 4919 }, { "epoch": 0.6565252201761409, "grad_norm": 0.9901820279238563, "learning_rate": 5.575417339964944e-06, "loss": 1.573, "step": 4920 }, { "epoch": 0.6566586602615425, "grad_norm": 0.9453752857587083, "learning_rate": 5.5715419161503085e-06, "loss": 1.5831, "step": 4921 }, { "epoch": 0.6567921003469442, "grad_norm": 0.9600381275049558, "learning_rate": 5.567667319521222e-06, "loss": 1.5852, "step": 4922 }, { "epoch": 0.6569255404323459, "grad_norm": 0.9722922529852711, "learning_rate": 5.563793550801418e-06, "loss": 1.5744, "step": 4923 }, { "epoch": 0.6570589805177476, "grad_norm": 1.088044217413912, "learning_rate": 5.559920610714471e-06, "loss": 1.5541, "step": 4924 }, { "epoch": 0.6571924206031492, "grad_norm": 1.0635606707693064, "learning_rate": 5.5560484999838005e-06, "loss": 1.5841, "step": 4925 }, { "epoch": 0.6573258606885508, "grad_norm": 0.9473218174863391, "learning_rate": 5.552177219332671e-06, "loss": 1.5783, "step": 4926 }, { "epoch": 0.6574593007739525, "grad_norm": 0.9671296980936441, "learning_rate": 5.548306769484199e-06, "loss": 1.5286, "step": 4927 }, { "epoch": 0.6575927408593542, "grad_norm": 0.9206430753905613, "learning_rate": 5.544437151161339e-06, "loss": 1.5968, "step": 4928 }, { "epoch": 0.6577261809447558, "grad_norm": 0.954056329951995, "learning_rate": 5.540568365086891e-06, "loss": 1.5636, "step": 4929 }, { "epoch": 0.6578596210301575, "grad_norm": 0.9276580594697565, "learning_rate": 5.536700411983495e-06, "loss": 1.6237, "step": 4930 }, { "epoch": 0.6579930611155591, "grad_norm": 0.9240637357436527, "learning_rate": 5.5328332925736495e-06, "loss": 1.5337, "step": 4931 }, { "epoch": 0.6581265012009607, "grad_norm": 0.9782170394984411, "learning_rate": 5.528967007579684e-06, "loss": 1.5287, "step": 4932 }, { "epoch": 0.6582599412863624, "grad_norm": 0.9715908393003854, "learning_rate": 5.525101557723773e-06, "loss": 1.6012, "step": 4933 }, { "epoch": 0.6583933813717641, "grad_norm": 1.0552772715223717, "learning_rate": 5.521236943727948e-06, "loss": 1.6308, "step": 4934 }, { "epoch": 0.6585268214571657, "grad_norm": 0.9273214286380065, "learning_rate": 5.517373166314068e-06, "loss": 1.5141, "step": 4935 }, { "epoch": 0.6586602615425674, "grad_norm": 1.167409050528772, "learning_rate": 5.513510226203844e-06, "loss": 1.5365, "step": 4936 }, { "epoch": 0.658793701627969, "grad_norm": 0.9613274910119957, "learning_rate": 5.5096481241188246e-06, "loss": 1.5552, "step": 4937 }, { "epoch": 0.6589271417133707, "grad_norm": 1.0120678736060127, "learning_rate": 5.505786860780416e-06, "loss": 1.5682, "step": 4938 }, { "epoch": 0.6590605817987724, "grad_norm": 0.9499179626379158, "learning_rate": 5.5019264369098535e-06, "loss": 1.5632, "step": 4939 }, { "epoch": 0.659194021884174, "grad_norm": 0.9642512741416759, "learning_rate": 5.4980668532282185e-06, "loss": 1.5566, "step": 4940 }, { "epoch": 0.6593274619695757, "grad_norm": 0.934320051094941, "learning_rate": 5.494208110456437e-06, "loss": 1.5399, "step": 4941 }, { "epoch": 0.6594609020549773, "grad_norm": 1.0972956472557598, "learning_rate": 5.490350209315283e-06, "loss": 1.5875, "step": 4942 }, { "epoch": 0.6595943421403789, "grad_norm": 0.9405793372237963, "learning_rate": 5.486493150525365e-06, "loss": 1.5474, "step": 4943 }, { "epoch": 0.6597277822257807, "grad_norm": 0.9802055587437492, "learning_rate": 5.482636934807135e-06, "loss": 1.5946, "step": 4944 }, { "epoch": 0.6598612223111823, "grad_norm": 0.949526189030937, "learning_rate": 5.478781562880897e-06, "loss": 1.595, "step": 4945 }, { "epoch": 0.6599946623965839, "grad_norm": 0.9485337756721357, "learning_rate": 5.474927035466789e-06, "loss": 1.6176, "step": 4946 }, { "epoch": 0.6601281024819856, "grad_norm": 0.917365602362209, "learning_rate": 5.471073353284788e-06, "loss": 1.5559, "step": 4947 }, { "epoch": 0.6602615425673872, "grad_norm": 0.9752592699248175, "learning_rate": 5.467220517054719e-06, "loss": 1.5717, "step": 4948 }, { "epoch": 0.660394982652789, "grad_norm": 1.0396268387538175, "learning_rate": 5.463368527496254e-06, "loss": 1.5159, "step": 4949 }, { "epoch": 0.6605284227381906, "grad_norm": 0.9639723430780847, "learning_rate": 5.459517385328894e-06, "loss": 1.5804, "step": 4950 }, { "epoch": 0.6606618628235922, "grad_norm": 0.9221523093110253, "learning_rate": 5.4556670912719885e-06, "loss": 1.5503, "step": 4951 }, { "epoch": 0.6607953029089939, "grad_norm": 0.9723372937636856, "learning_rate": 5.451817646044735e-06, "loss": 1.6308, "step": 4952 }, { "epoch": 0.6609287429943955, "grad_norm": 0.9472013736532353, "learning_rate": 5.447969050366163e-06, "loss": 1.5603, "step": 4953 }, { "epoch": 0.6610621830797971, "grad_norm": 1.2387063918292023, "learning_rate": 5.444121304955145e-06, "loss": 1.6083, "step": 4954 }, { "epoch": 0.6611956231651989, "grad_norm": 1.2148466148047536, "learning_rate": 5.440274410530393e-06, "loss": 1.5535, "step": 4955 }, { "epoch": 0.6613290632506005, "grad_norm": 0.9349547994328788, "learning_rate": 5.43642836781047e-06, "loss": 1.6053, "step": 4956 }, { "epoch": 0.6614625033360021, "grad_norm": 0.9546746784572933, "learning_rate": 5.43258317751377e-06, "loss": 1.5645, "step": 4957 }, { "epoch": 0.6615959434214038, "grad_norm": 0.9571654338821786, "learning_rate": 5.4287388403585275e-06, "loss": 1.5331, "step": 4958 }, { "epoch": 0.6617293835068054, "grad_norm": 1.0081431486737675, "learning_rate": 5.424895357062826e-06, "loss": 1.5648, "step": 4959 }, { "epoch": 0.661862823592207, "grad_norm": 1.1640026927531668, "learning_rate": 5.4210527283445824e-06, "loss": 1.5421, "step": 4960 }, { "epoch": 0.6619962636776088, "grad_norm": 0.9785271528986913, "learning_rate": 5.417210954921557e-06, "loss": 1.5635, "step": 4961 }, { "epoch": 0.6621297037630104, "grad_norm": 0.9591488914375493, "learning_rate": 5.413370037511347e-06, "loss": 1.5291, "step": 4962 }, { "epoch": 0.6622631438484121, "grad_norm": 0.9320181389081423, "learning_rate": 5.409529976831392e-06, "loss": 1.5519, "step": 4963 }, { "epoch": 0.6623965839338137, "grad_norm": 0.9436255066913496, "learning_rate": 5.4056907735989735e-06, "loss": 1.587, "step": 4964 }, { "epoch": 0.6625300240192153, "grad_norm": 0.9445224040253575, "learning_rate": 5.401852428531212e-06, "loss": 1.616, "step": 4965 }, { "epoch": 0.6626634641046171, "grad_norm": 1.0814364292819985, "learning_rate": 5.398014942345064e-06, "loss": 1.5734, "step": 4966 }, { "epoch": 0.6627969041900187, "grad_norm": 0.9584108100106682, "learning_rate": 5.39417831575733e-06, "loss": 1.5685, "step": 4967 }, { "epoch": 0.6629303442754203, "grad_norm": 0.9665649530241158, "learning_rate": 5.3903425494846485e-06, "loss": 1.6028, "step": 4968 }, { "epoch": 0.663063784360822, "grad_norm": 0.9365312188882325, "learning_rate": 5.386507644243491e-06, "loss": 1.5945, "step": 4969 }, { "epoch": 0.6631972244462236, "grad_norm": 0.9729492213835405, "learning_rate": 5.382673600750182e-06, "loss": 1.5775, "step": 4970 }, { "epoch": 0.6633306645316253, "grad_norm": 1.0805444576468706, "learning_rate": 5.3788404197208744e-06, "loss": 1.5617, "step": 4971 }, { "epoch": 0.663464104617027, "grad_norm": 0.9617923471221628, "learning_rate": 5.375008101871563e-06, "loss": 1.5363, "step": 4972 }, { "epoch": 0.6635975447024286, "grad_norm": 0.9481741920611069, "learning_rate": 5.371176647918076e-06, "loss": 1.5512, "step": 4973 }, { "epoch": 0.6637309847878302, "grad_norm": 0.980896728929079, "learning_rate": 5.367346058576095e-06, "loss": 1.5675, "step": 4974 }, { "epoch": 0.6638644248732319, "grad_norm": 1.2424778825915792, "learning_rate": 5.363516334561125e-06, "loss": 1.5574, "step": 4975 }, { "epoch": 0.6639978649586336, "grad_norm": 1.3069446139543355, "learning_rate": 5.359687476588511e-06, "loss": 1.5679, "step": 4976 }, { "epoch": 0.6641313050440353, "grad_norm": 0.9334235903224387, "learning_rate": 5.355859485373449e-06, "loss": 1.515, "step": 4977 }, { "epoch": 0.6642647451294369, "grad_norm": 0.9796056460510058, "learning_rate": 5.352032361630959e-06, "loss": 1.5388, "step": 4978 }, { "epoch": 0.6643981852148385, "grad_norm": 1.0622315589231197, "learning_rate": 5.348206106075906e-06, "loss": 1.6171, "step": 4979 }, { "epoch": 0.6645316253002402, "grad_norm": 0.9535560005647347, "learning_rate": 5.344380719422985e-06, "loss": 1.5387, "step": 4980 }, { "epoch": 0.6646650653856419, "grad_norm": 0.9332886144151455, "learning_rate": 5.340556202386743e-06, "loss": 1.5437, "step": 4981 }, { "epoch": 0.6647985054710435, "grad_norm": 0.926508722116168, "learning_rate": 5.336732555681552e-06, "loss": 1.5249, "step": 4982 }, { "epoch": 0.6649319455564452, "grad_norm": 0.9597733820765061, "learning_rate": 5.3329097800216244e-06, "loss": 1.6112, "step": 4983 }, { "epoch": 0.6650653856418468, "grad_norm": 0.9263015315418155, "learning_rate": 5.329087876121016e-06, "loss": 1.5548, "step": 4984 }, { "epoch": 0.6651988257272484, "grad_norm": 0.9197318999051447, "learning_rate": 5.325266844693611e-06, "loss": 1.5108, "step": 4985 }, { "epoch": 0.6653322658126501, "grad_norm": 0.9346342407015626, "learning_rate": 5.321446686453137e-06, "loss": 1.5587, "step": 4986 }, { "epoch": 0.6654657058980518, "grad_norm": 0.9609313535996501, "learning_rate": 5.31762740211315e-06, "loss": 1.5212, "step": 4987 }, { "epoch": 0.6655991459834534, "grad_norm": 0.9379253903460791, "learning_rate": 5.313808992387057e-06, "loss": 1.5553, "step": 4988 }, { "epoch": 0.6657325860688551, "grad_norm": 0.9452683100970093, "learning_rate": 5.309991457988091e-06, "loss": 1.5827, "step": 4989 }, { "epoch": 0.6658660261542567, "grad_norm": 0.9871345858341555, "learning_rate": 5.306174799629317e-06, "loss": 1.5048, "step": 4990 }, { "epoch": 0.6659994662396584, "grad_norm": 0.9745323174519458, "learning_rate": 5.302359018023656e-06, "loss": 1.5655, "step": 4991 }, { "epoch": 0.6661329063250601, "grad_norm": 1.0874975316674835, "learning_rate": 5.298544113883845e-06, "loss": 1.5807, "step": 4992 }, { "epoch": 0.6662663464104617, "grad_norm": 0.9359689028295645, "learning_rate": 5.294730087922464e-06, "loss": 1.555, "step": 4993 }, { "epoch": 0.6663997864958634, "grad_norm": 1.065007282594557, "learning_rate": 5.290916940851929e-06, "loss": 1.5666, "step": 4994 }, { "epoch": 0.666533226581265, "grad_norm": 1.1502681142848064, "learning_rate": 5.287104673384498e-06, "loss": 1.5713, "step": 4995 }, { "epoch": 0.6666666666666666, "grad_norm": 0.9479228632028691, "learning_rate": 5.283293286232254e-06, "loss": 1.5684, "step": 4996 }, { "epoch": 0.6668001067520684, "grad_norm": 0.9987657361216116, "learning_rate": 5.279482780107119e-06, "loss": 1.5635, "step": 4997 }, { "epoch": 0.66693354683747, "grad_norm": 1.1782973127657173, "learning_rate": 5.275673155720861e-06, "loss": 1.5947, "step": 4998 }, { "epoch": 0.6670669869228716, "grad_norm": 0.9525996465669817, "learning_rate": 5.271864413785068e-06, "loss": 1.5657, "step": 4999 }, { "epoch": 0.6672004270082733, "grad_norm": 0.9683386220692802, "learning_rate": 5.26805655501117e-06, "loss": 1.5778, "step": 5000 }, { "epoch": 0.6673338670936749, "grad_norm": 1.0500879084213508, "learning_rate": 5.26424958011043e-06, "loss": 1.5636, "step": 5001 }, { "epoch": 0.6674673071790765, "grad_norm": 0.9531705010447756, "learning_rate": 5.260443489793953e-06, "loss": 1.5954, "step": 5002 }, { "epoch": 0.6676007472644783, "grad_norm": 0.9421433711002707, "learning_rate": 5.25663828477267e-06, "loss": 1.5517, "step": 5003 }, { "epoch": 0.6677341873498799, "grad_norm": 0.9278026930537797, "learning_rate": 5.252833965757351e-06, "loss": 1.583, "step": 5004 }, { "epoch": 0.6678676274352816, "grad_norm": 0.9463961992902421, "learning_rate": 5.249030533458594e-06, "loss": 1.5259, "step": 5005 }, { "epoch": 0.6680010675206832, "grad_norm": 1.019719000874434, "learning_rate": 5.245227988586845e-06, "loss": 1.5647, "step": 5006 }, { "epoch": 0.6681345076060848, "grad_norm": 0.9199080705366238, "learning_rate": 5.241426331852373e-06, "loss": 1.5497, "step": 5007 }, { "epoch": 0.6682679476914866, "grad_norm": 0.9388293679619114, "learning_rate": 5.237625563965285e-06, "loss": 1.512, "step": 5008 }, { "epoch": 0.6684013877768882, "grad_norm": 0.9772164529249927, "learning_rate": 5.233825685635518e-06, "loss": 1.5984, "step": 5009 }, { "epoch": 0.6685348278622898, "grad_norm": 1.3066296901302348, "learning_rate": 5.23002669757285e-06, "loss": 1.5724, "step": 5010 }, { "epoch": 0.6686682679476915, "grad_norm": 1.1126937385329367, "learning_rate": 5.226228600486883e-06, "loss": 1.5821, "step": 5011 }, { "epoch": 0.6688017080330931, "grad_norm": 0.9120825110494507, "learning_rate": 5.222431395087064e-06, "loss": 1.5863, "step": 5012 }, { "epoch": 0.6689351481184947, "grad_norm": 0.9636066977274008, "learning_rate": 5.218635082082666e-06, "loss": 1.592, "step": 5013 }, { "epoch": 0.6690685882038965, "grad_norm": 0.9146764316842598, "learning_rate": 5.214839662182798e-06, "loss": 1.5786, "step": 5014 }, { "epoch": 0.6692020282892981, "grad_norm": 0.9588946756171576, "learning_rate": 5.211045136096394e-06, "loss": 1.5927, "step": 5015 }, { "epoch": 0.6693354683746997, "grad_norm": 0.944621271446456, "learning_rate": 5.20725150453224e-06, "loss": 1.51, "step": 5016 }, { "epoch": 0.6694689084601014, "grad_norm": 0.9688157625406866, "learning_rate": 5.2034587681989364e-06, "loss": 1.5454, "step": 5017 }, { "epoch": 0.669602348545503, "grad_norm": 0.9487031537844105, "learning_rate": 5.199666927804925e-06, "loss": 1.5289, "step": 5018 }, { "epoch": 0.6697357886309048, "grad_norm": 0.9892335986551877, "learning_rate": 5.195875984058474e-06, "loss": 1.5918, "step": 5019 }, { "epoch": 0.6698692287163064, "grad_norm": 0.9044754479784632, "learning_rate": 5.192085937667696e-06, "loss": 1.6101, "step": 5020 }, { "epoch": 0.670002668801708, "grad_norm": 0.9397019351239638, "learning_rate": 5.188296789340523e-06, "loss": 1.5, "step": 5021 }, { "epoch": 0.6701361088871097, "grad_norm": 0.9823103967616779, "learning_rate": 5.184508539784725e-06, "loss": 1.6176, "step": 5022 }, { "epoch": 0.6702695489725113, "grad_norm": 0.9220689248773222, "learning_rate": 5.180721189707908e-06, "loss": 1.5349, "step": 5023 }, { "epoch": 0.670402989057913, "grad_norm": 0.9481270673997315, "learning_rate": 5.176934739817503e-06, "loss": 1.5678, "step": 5024 }, { "epoch": 0.6705364291433147, "grad_norm": 1.0396242680439707, "learning_rate": 5.173149190820776e-06, "loss": 1.6279, "step": 5025 }, { "epoch": 0.6706698692287163, "grad_norm": 0.9646160121743006, "learning_rate": 5.1693645434248216e-06, "loss": 1.5928, "step": 5026 }, { "epoch": 0.6708033093141179, "grad_norm": 0.9553746604638392, "learning_rate": 5.165580798336575e-06, "loss": 1.5881, "step": 5027 }, { "epoch": 0.6709367493995196, "grad_norm": 0.9360288710325949, "learning_rate": 5.161797956262793e-06, "loss": 1.6201, "step": 5028 }, { "epoch": 0.6710701894849213, "grad_norm": 1.0854046179543964, "learning_rate": 5.158016017910064e-06, "loss": 1.5416, "step": 5029 }, { "epoch": 0.671203629570323, "grad_norm": 0.9591439987959153, "learning_rate": 5.154234983984818e-06, "loss": 1.5665, "step": 5030 }, { "epoch": 0.6713370696557246, "grad_norm": 1.0881353005554923, "learning_rate": 5.150454855193308e-06, "loss": 1.5806, "step": 5031 }, { "epoch": 0.6714705097411262, "grad_norm": 1.0095150308218221, "learning_rate": 5.146675632241614e-06, "loss": 1.5884, "step": 5032 }, { "epoch": 0.6716039498265279, "grad_norm": 0.9853452337837284, "learning_rate": 5.142897315835653e-06, "loss": 1.5928, "step": 5033 }, { "epoch": 0.6717373899119295, "grad_norm": 0.9114971278210366, "learning_rate": 5.139119906681176e-06, "loss": 1.5162, "step": 5034 }, { "epoch": 0.6718708299973312, "grad_norm": 0.970853734583226, "learning_rate": 5.135343405483757e-06, "loss": 1.555, "step": 5035 }, { "epoch": 0.6720042700827329, "grad_norm": 0.9830827365486369, "learning_rate": 5.131567812948805e-06, "loss": 1.5957, "step": 5036 }, { "epoch": 0.6721377101681345, "grad_norm": 1.0109831996031944, "learning_rate": 5.127793129781551e-06, "loss": 1.6049, "step": 5037 }, { "epoch": 0.6722711502535361, "grad_norm": 0.9273191953375031, "learning_rate": 5.124019356687073e-06, "loss": 1.5482, "step": 5038 }, { "epoch": 0.6724045903389378, "grad_norm": 0.9530762057231332, "learning_rate": 5.120246494370264e-06, "loss": 1.557, "step": 5039 }, { "epoch": 0.6725380304243395, "grad_norm": 1.009716414085073, "learning_rate": 5.116474543535848e-06, "loss": 1.5817, "step": 5040 }, { "epoch": 0.6726714705097411, "grad_norm": 0.936535223545121, "learning_rate": 5.11270350488839e-06, "loss": 1.5498, "step": 5041 }, { "epoch": 0.6728049105951428, "grad_norm": 0.9587135356454837, "learning_rate": 5.108933379132272e-06, "loss": 1.6064, "step": 5042 }, { "epoch": 0.6729383506805444, "grad_norm": 0.9707574935271643, "learning_rate": 5.105164166971714e-06, "loss": 1.4911, "step": 5043 }, { "epoch": 0.6730717907659461, "grad_norm": 0.9255255572906976, "learning_rate": 5.101395869110755e-06, "loss": 1.5974, "step": 5044 }, { "epoch": 0.6732052308513478, "grad_norm": 0.9526351690279754, "learning_rate": 5.097628486253278e-06, "loss": 1.583, "step": 5045 }, { "epoch": 0.6733386709367494, "grad_norm": 0.9231045207080708, "learning_rate": 5.0938620191029865e-06, "loss": 1.571, "step": 5046 }, { "epoch": 0.6734721110221511, "grad_norm": 0.9515238827519511, "learning_rate": 5.090096468363409e-06, "loss": 1.6318, "step": 5047 }, { "epoch": 0.6736055511075527, "grad_norm": 0.9356804828343049, "learning_rate": 5.086331834737908e-06, "loss": 1.5706, "step": 5048 }, { "epoch": 0.6737389911929543, "grad_norm": 1.1128834664193885, "learning_rate": 5.082568118929678e-06, "loss": 1.5895, "step": 5049 }, { "epoch": 0.673872431278356, "grad_norm": 1.1955255342152211, "learning_rate": 5.078805321641736e-06, "loss": 1.5635, "step": 5050 }, { "epoch": 0.6740058713637577, "grad_norm": 0.9185509120855007, "learning_rate": 5.075043443576933e-06, "loss": 1.5417, "step": 5051 }, { "epoch": 0.6741393114491593, "grad_norm": 0.9576951089769443, "learning_rate": 5.071282485437938e-06, "loss": 1.5684, "step": 5052 }, { "epoch": 0.674272751534561, "grad_norm": 0.9875819130260579, "learning_rate": 5.067522447927262e-06, "loss": 1.5387, "step": 5053 }, { "epoch": 0.6744061916199626, "grad_norm": 1.0759454029563662, "learning_rate": 5.06376333174723e-06, "loss": 1.5579, "step": 5054 }, { "epoch": 0.6745396317053642, "grad_norm": 0.9953780319051102, "learning_rate": 5.06000513760001e-06, "loss": 1.5767, "step": 5055 }, { "epoch": 0.674673071790766, "grad_norm": 0.9538482284664277, "learning_rate": 5.056247866187587e-06, "loss": 1.6003, "step": 5056 }, { "epoch": 0.6748065118761676, "grad_norm": 0.9578854545173783, "learning_rate": 5.0524915182117754e-06, "loss": 1.5625, "step": 5057 }, { "epoch": 0.6749399519615693, "grad_norm": 0.9333496781315979, "learning_rate": 5.0487360943742135e-06, "loss": 1.5545, "step": 5058 }, { "epoch": 0.6750733920469709, "grad_norm": 0.936433357559595, "learning_rate": 5.044981595376383e-06, "loss": 1.571, "step": 5059 }, { "epoch": 0.6752068321323725, "grad_norm": 0.9366758168083295, "learning_rate": 5.0412280219195746e-06, "loss": 1.6242, "step": 5060 }, { "epoch": 0.6753402722177743, "grad_norm": 1.0171664676564722, "learning_rate": 5.03747537470491e-06, "loss": 1.578, "step": 5061 }, { "epoch": 0.6754737123031759, "grad_norm": 1.2233118469857216, "learning_rate": 5.033723654433349e-06, "loss": 1.577, "step": 5062 }, { "epoch": 0.6756071523885775, "grad_norm": 0.9036663728010922, "learning_rate": 5.029972861805665e-06, "loss": 1.522, "step": 5063 }, { "epoch": 0.6757405924739792, "grad_norm": 0.9771449490125018, "learning_rate": 5.026222997522465e-06, "loss": 1.5776, "step": 5064 }, { "epoch": 0.6758740325593808, "grad_norm": 0.9516286727943922, "learning_rate": 5.022474062284177e-06, "loss": 1.5614, "step": 5065 }, { "epoch": 0.6760074726447824, "grad_norm": 0.9177876450126852, "learning_rate": 5.018726056791068e-06, "loss": 1.5596, "step": 5066 }, { "epoch": 0.6761409127301842, "grad_norm": 0.9415870237949052, "learning_rate": 5.014978981743216e-06, "loss": 1.5728, "step": 5067 }, { "epoch": 0.6762743528155858, "grad_norm": 0.9378316877697337, "learning_rate": 5.011232837840534e-06, "loss": 1.5742, "step": 5068 }, { "epoch": 0.6764077929009874, "grad_norm": 0.9740662444000914, "learning_rate": 5.007487625782755e-06, "loss": 1.5946, "step": 5069 }, { "epoch": 0.6765412329863891, "grad_norm": 0.955131305489453, "learning_rate": 5.003743346269449e-06, "loss": 1.5925, "step": 5070 }, { "epoch": 0.6766746730717907, "grad_norm": 0.9317999097948068, "learning_rate": 5.000000000000003e-06, "loss": 1.5604, "step": 5071 }, { "epoch": 0.6768081131571925, "grad_norm": 0.9365536795010426, "learning_rate": 4.9962575876736245e-06, "loss": 1.6174, "step": 5072 }, { "epoch": 0.6769415532425941, "grad_norm": 1.0580187577949391, "learning_rate": 4.992516109989362e-06, "loss": 1.5542, "step": 5073 }, { "epoch": 0.6770749933279957, "grad_norm": 0.9475023984254052, "learning_rate": 4.988775567646079e-06, "loss": 1.5899, "step": 5074 }, { "epoch": 0.6772084334133974, "grad_norm": 1.0674090359857158, "learning_rate": 4.985035961342466e-06, "loss": 1.5677, "step": 5075 }, { "epoch": 0.677341873498799, "grad_norm": 0.9866266273983164, "learning_rate": 4.981297291777032e-06, "loss": 1.5378, "step": 5076 }, { "epoch": 0.6774753135842007, "grad_norm": 0.9868827012284229, "learning_rate": 4.977559559648128e-06, "loss": 1.5811, "step": 5077 }, { "epoch": 0.6776087536696024, "grad_norm": 1.1780046727927997, "learning_rate": 4.973822765653917e-06, "loss": 1.4914, "step": 5078 }, { "epoch": 0.677742193755004, "grad_norm": 0.95257815072713, "learning_rate": 4.970086910492385e-06, "loss": 1.5842, "step": 5079 }, { "epoch": 0.6778756338404056, "grad_norm": 1.0663204797517163, "learning_rate": 4.966351994861352e-06, "loss": 1.6202, "step": 5080 }, { "epoch": 0.6780090739258073, "grad_norm": 0.935486976169621, "learning_rate": 4.962618019458456e-06, "loss": 1.5488, "step": 5081 }, { "epoch": 0.678142514011209, "grad_norm": 0.9770535662216747, "learning_rate": 4.958884984981163e-06, "loss": 1.5611, "step": 5082 }, { "epoch": 0.6782759540966106, "grad_norm": 0.918514304568767, "learning_rate": 4.9551528921267545e-06, "loss": 1.5666, "step": 5083 }, { "epoch": 0.6784093941820123, "grad_norm": 0.9318149736553532, "learning_rate": 4.951421741592353e-06, "loss": 1.5857, "step": 5084 }, { "epoch": 0.6785428342674139, "grad_norm": 13.688631168582868, "learning_rate": 4.947691534074889e-06, "loss": 1.6078, "step": 5085 }, { "epoch": 0.6786762743528156, "grad_norm": 1.002752650809866, "learning_rate": 4.94396227027112e-06, "loss": 1.516, "step": 5086 }, { "epoch": 0.6788097144382172, "grad_norm": 1.023591072044742, "learning_rate": 4.940233950877637e-06, "loss": 1.5348, "step": 5087 }, { "epoch": 0.6789431545236189, "grad_norm": 1.1764691952827722, "learning_rate": 4.936506576590846e-06, "loss": 1.5717, "step": 5088 }, { "epoch": 0.6790765946090206, "grad_norm": 1.029025311630692, "learning_rate": 4.932780148106975e-06, "loss": 1.5159, "step": 5089 }, { "epoch": 0.6792100346944222, "grad_norm": 0.950650193469443, "learning_rate": 4.929054666122081e-06, "loss": 1.5447, "step": 5090 }, { "epoch": 0.6793434747798238, "grad_norm": 1.1799569092706195, "learning_rate": 4.9253301313320355e-06, "loss": 1.5769, "step": 5091 }, { "epoch": 0.6794769148652255, "grad_norm": 0.9432993878667583, "learning_rate": 4.921606544432549e-06, "loss": 1.5891, "step": 5092 }, { "epoch": 0.6796103549506272, "grad_norm": 0.9465085033881826, "learning_rate": 4.91788390611914e-06, "loss": 1.5216, "step": 5093 }, { "epoch": 0.6797437950360288, "grad_norm": 1.0542649886569853, "learning_rate": 4.914162217087155e-06, "loss": 1.5736, "step": 5094 }, { "epoch": 0.6798772351214305, "grad_norm": 1.0000819446908524, "learning_rate": 4.910441478031763e-06, "loss": 1.6619, "step": 5095 }, { "epoch": 0.6800106752068321, "grad_norm": 0.9322943527809388, "learning_rate": 4.906721689647957e-06, "loss": 1.5867, "step": 5096 }, { "epoch": 0.6801441152922338, "grad_norm": 0.9464543046875021, "learning_rate": 4.903002852630546e-06, "loss": 1.5638, "step": 5097 }, { "epoch": 0.6802775553776355, "grad_norm": 0.9523084416197721, "learning_rate": 4.899284967674174e-06, "loss": 1.4874, "step": 5098 }, { "epoch": 0.6804109954630371, "grad_norm": 0.9259089895023304, "learning_rate": 4.895568035473297e-06, "loss": 1.575, "step": 5099 }, { "epoch": 0.6805444355484388, "grad_norm": 0.913018189144235, "learning_rate": 4.891852056722195e-06, "loss": 1.5759, "step": 5100 }, { "epoch": 0.6806778756338404, "grad_norm": 0.9351223685450596, "learning_rate": 4.888137032114966e-06, "loss": 1.552, "step": 5101 }, { "epoch": 0.680811315719242, "grad_norm": 1.1155154706231596, "learning_rate": 4.884422962345542e-06, "loss": 1.5482, "step": 5102 }, { "epoch": 0.6809447558046438, "grad_norm": 0.9366870347444463, "learning_rate": 4.880709848107666e-06, "loss": 1.5683, "step": 5103 }, { "epoch": 0.6810781958900454, "grad_norm": 0.972915796830404, "learning_rate": 4.876997690094902e-06, "loss": 1.5796, "step": 5104 }, { "epoch": 0.681211635975447, "grad_norm": 0.9605649263345989, "learning_rate": 4.873286489000645e-06, "loss": 1.4809, "step": 5105 }, { "epoch": 0.6813450760608487, "grad_norm": 1.0145828253220486, "learning_rate": 4.869576245518101e-06, "loss": 1.5529, "step": 5106 }, { "epoch": 0.6814785161462503, "grad_norm": 1.0299976481166175, "learning_rate": 4.865866960340304e-06, "loss": 1.556, "step": 5107 }, { "epoch": 0.6816119562316519, "grad_norm": 0.9634555693945733, "learning_rate": 4.862158634160101e-06, "loss": 1.5617, "step": 5108 }, { "epoch": 0.6817453963170537, "grad_norm": 1.0182839375048582, "learning_rate": 4.858451267670171e-06, "loss": 1.5827, "step": 5109 }, { "epoch": 0.6818788364024553, "grad_norm": 1.1453431192773607, "learning_rate": 4.854744861563007e-06, "loss": 1.5854, "step": 5110 }, { "epoch": 0.682012276487857, "grad_norm": 0.9803243409985238, "learning_rate": 4.851039416530917e-06, "loss": 1.5329, "step": 5111 }, { "epoch": 0.6821457165732586, "grad_norm": 1.016213296072417, "learning_rate": 4.847334933266044e-06, "loss": 1.6221, "step": 5112 }, { "epoch": 0.6822791566586602, "grad_norm": 0.9505609706075121, "learning_rate": 4.843631412460341e-06, "loss": 1.5815, "step": 5113 }, { "epoch": 0.682412596744062, "grad_norm": 1.0060585154404742, "learning_rate": 4.839928854805583e-06, "loss": 1.5788, "step": 5114 }, { "epoch": 0.6825460368294636, "grad_norm": 3.7584015873744785, "learning_rate": 4.836227260993361e-06, "loss": 1.5487, "step": 5115 }, { "epoch": 0.6826794769148652, "grad_norm": 0.9458041427982877, "learning_rate": 4.8325266317150996e-06, "loss": 1.5569, "step": 5116 }, { "epoch": 0.6828129170002669, "grad_norm": 1.0741204494235426, "learning_rate": 4.828826967662028e-06, "loss": 1.5389, "step": 5117 }, { "epoch": 0.6829463570856685, "grad_norm": 1.0796007293741565, "learning_rate": 4.8251282695252e-06, "loss": 1.5467, "step": 5118 }, { "epoch": 0.6830797971710701, "grad_norm": 0.984104078234117, "learning_rate": 4.821430537995497e-06, "loss": 1.5582, "step": 5119 }, { "epoch": 0.6832132372564719, "grad_norm": 0.9768350794179331, "learning_rate": 4.817733773763609e-06, "loss": 1.5747, "step": 5120 }, { "epoch": 0.6833466773418735, "grad_norm": 0.97701351831298, "learning_rate": 4.8140379775200494e-06, "loss": 1.562, "step": 5121 }, { "epoch": 0.6834801174272751, "grad_norm": 1.1798523816680644, "learning_rate": 4.8103431499551474e-06, "loss": 1.5862, "step": 5122 }, { "epoch": 0.6836135575126768, "grad_norm": 1.0067654037143134, "learning_rate": 4.8066492917590635e-06, "loss": 1.594, "step": 5123 }, { "epoch": 0.6837469975980784, "grad_norm": 0.9823447933349997, "learning_rate": 4.802956403621763e-06, "loss": 1.5824, "step": 5124 }, { "epoch": 0.6838804376834802, "grad_norm": 0.9800571652551026, "learning_rate": 4.799264486233031e-06, "loss": 1.5426, "step": 5125 }, { "epoch": 0.6840138777688818, "grad_norm": 0.9373855164532967, "learning_rate": 4.795573540282484e-06, "loss": 1.5959, "step": 5126 }, { "epoch": 0.6841473178542834, "grad_norm": 1.1258697616784983, "learning_rate": 4.791883566459546e-06, "loss": 1.5546, "step": 5127 }, { "epoch": 0.6842807579396851, "grad_norm": 1.0460314104044222, "learning_rate": 4.7881945654534615e-06, "loss": 1.5461, "step": 5128 }, { "epoch": 0.6844141980250867, "grad_norm": 0.9470018047113221, "learning_rate": 4.78450653795329e-06, "loss": 1.5871, "step": 5129 }, { "epoch": 0.6845476381104884, "grad_norm": 0.9381571561346944, "learning_rate": 4.780819484647918e-06, "loss": 1.5455, "step": 5130 }, { "epoch": 0.6846810781958901, "grad_norm": 1.1209989432461438, "learning_rate": 4.777133406226046e-06, "loss": 1.6104, "step": 5131 }, { "epoch": 0.6848145182812917, "grad_norm": 0.9861145590915344, "learning_rate": 4.773448303376188e-06, "loss": 1.6122, "step": 5132 }, { "epoch": 0.6849479583666933, "grad_norm": 0.9632711224553404, "learning_rate": 4.769764176786681e-06, "loss": 1.5319, "step": 5133 }, { "epoch": 0.685081398452095, "grad_norm": 1.0114719533896925, "learning_rate": 4.7660810271456735e-06, "loss": 1.5546, "step": 5134 }, { "epoch": 0.6852148385374967, "grad_norm": 0.9449667280372048, "learning_rate": 4.762398855141143e-06, "loss": 1.4989, "step": 5135 }, { "epoch": 0.6853482786228983, "grad_norm": 0.925328878656421, "learning_rate": 4.758717661460874e-06, "loss": 1.5486, "step": 5136 }, { "epoch": 0.6854817187083, "grad_norm": 0.9589142342169662, "learning_rate": 4.755037446792472e-06, "loss": 1.5787, "step": 5137 }, { "epoch": 0.6856151587937016, "grad_norm": 0.9539174340381931, "learning_rate": 4.751358211823359e-06, "loss": 1.4947, "step": 5138 }, { "epoch": 0.6857485988791033, "grad_norm": 0.9935890025844325, "learning_rate": 4.747679957240774e-06, "loss": 1.5835, "step": 5139 }, { "epoch": 0.685882038964505, "grad_norm": 0.9561138767634939, "learning_rate": 4.744002683731769e-06, "loss": 1.5068, "step": 5140 }, { "epoch": 0.6860154790499066, "grad_norm": 1.0881008249779185, "learning_rate": 4.740326391983225e-06, "loss": 1.5547, "step": 5141 }, { "epoch": 0.6861489191353083, "grad_norm": 1.0331157860816333, "learning_rate": 4.736651082681826e-06, "loss": 1.5485, "step": 5142 }, { "epoch": 0.6862823592207099, "grad_norm": 0.9726155072774376, "learning_rate": 4.732976756514077e-06, "loss": 1.5666, "step": 5143 }, { "epoch": 0.6864157993061115, "grad_norm": 0.9698903915052955, "learning_rate": 4.729303414166305e-06, "loss": 1.5705, "step": 5144 }, { "epoch": 0.6865492393915132, "grad_norm": 0.9739962559971576, "learning_rate": 4.7256310563246475e-06, "loss": 1.5984, "step": 5145 }, { "epoch": 0.6866826794769149, "grad_norm": 1.0147943743702117, "learning_rate": 4.721959683675057e-06, "loss": 1.5452, "step": 5146 }, { "epoch": 0.6868161195623165, "grad_norm": 0.9785035074553505, "learning_rate": 4.718289296903301e-06, "loss": 1.5578, "step": 5147 }, { "epoch": 0.6869495596477182, "grad_norm": 0.9329927814076208, "learning_rate": 4.714619896694974e-06, "loss": 1.5801, "step": 5148 }, { "epoch": 0.6870829997331198, "grad_norm": 0.9379344188241905, "learning_rate": 4.710951483735474e-06, "loss": 1.5543, "step": 5149 }, { "epoch": 0.6872164398185214, "grad_norm": 1.2323367929741, "learning_rate": 4.7072840587100146e-06, "loss": 1.5424, "step": 5150 }, { "epoch": 0.6873498799039232, "grad_norm": 0.9866412354494483, "learning_rate": 4.7036176223036375e-06, "loss": 1.5593, "step": 5151 }, { "epoch": 0.6874833199893248, "grad_norm": 1.0200114322334428, "learning_rate": 4.699952175201187e-06, "loss": 1.5624, "step": 5152 }, { "epoch": 0.6876167600747265, "grad_norm": 1.0952002464406705, "learning_rate": 4.696287718087326e-06, "loss": 1.5471, "step": 5153 }, { "epoch": 0.6877502001601281, "grad_norm": 0.9626777049116638, "learning_rate": 4.6926242516465315e-06, "loss": 1.5889, "step": 5154 }, { "epoch": 0.6878836402455297, "grad_norm": 0.9768971512913973, "learning_rate": 4.688961776563102e-06, "loss": 1.5778, "step": 5155 }, { "epoch": 0.6880170803309315, "grad_norm": 1.0154475915685601, "learning_rate": 4.685300293521146e-06, "loss": 1.5428, "step": 5156 }, { "epoch": 0.6881505204163331, "grad_norm": 0.9175407597697326, "learning_rate": 4.68163980320458e-06, "loss": 1.5406, "step": 5157 }, { "epoch": 0.6882839605017347, "grad_norm": 0.9219686003018891, "learning_rate": 4.677980306297153e-06, "loss": 1.5645, "step": 5158 }, { "epoch": 0.6884174005871364, "grad_norm": 0.9931694076801224, "learning_rate": 4.674321803482409e-06, "loss": 1.5528, "step": 5159 }, { "epoch": 0.688550840672538, "grad_norm": 0.9871448513395474, "learning_rate": 4.670664295443718e-06, "loss": 1.5606, "step": 5160 }, { "epoch": 0.6886842807579396, "grad_norm": 0.9678884161793881, "learning_rate": 4.667007782864256e-06, "loss": 1.6019, "step": 5161 }, { "epoch": 0.6888177208433414, "grad_norm": 0.9310915342747638, "learning_rate": 4.663352266427026e-06, "loss": 1.6019, "step": 5162 }, { "epoch": 0.688951160928743, "grad_norm": 0.9779613924239206, "learning_rate": 4.659697746814832e-06, "loss": 1.5946, "step": 5163 }, { "epoch": 0.6890846010141447, "grad_norm": 0.944018936409953, "learning_rate": 4.656044224710297e-06, "loss": 1.6024, "step": 5164 }, { "epoch": 0.6892180410995463, "grad_norm": 0.9489255197624183, "learning_rate": 4.652391700795854e-06, "loss": 1.6148, "step": 5165 }, { "epoch": 0.6893514811849479, "grad_norm": 0.9554477607577454, "learning_rate": 4.64874017575376e-06, "loss": 1.6305, "step": 5166 }, { "epoch": 0.6894849212703497, "grad_norm": 0.9578515697117463, "learning_rate": 4.645089650266074e-06, "loss": 1.5761, "step": 5167 }, { "epoch": 0.6896183613557513, "grad_norm": 1.089494909741061, "learning_rate": 4.641440125014672e-06, "loss": 1.552, "step": 5168 }, { "epoch": 0.6897518014411529, "grad_norm": 0.9764757676054326, "learning_rate": 4.637791600681247e-06, "loss": 1.6047, "step": 5169 }, { "epoch": 0.6898852415265546, "grad_norm": 0.9704402753837931, "learning_rate": 4.634144077947301e-06, "loss": 1.5901, "step": 5170 }, { "epoch": 0.6900186816119562, "grad_norm": 0.9450629354753408, "learning_rate": 4.630497557494149e-06, "loss": 1.5672, "step": 5171 }, { "epoch": 0.6901521216973578, "grad_norm": 0.9265680379145714, "learning_rate": 4.626852040002915e-06, "loss": 1.5683, "step": 5172 }, { "epoch": 0.6902855617827596, "grad_norm": 1.0282668336579435, "learning_rate": 4.6232075261545476e-06, "loss": 1.5511, "step": 5173 }, { "epoch": 0.6904190018681612, "grad_norm": 0.9406742239628743, "learning_rate": 4.6195640166297975e-06, "loss": 1.5473, "step": 5174 }, { "epoch": 0.6905524419535628, "grad_norm": 0.9331259548380249, "learning_rate": 4.61592151210923e-06, "loss": 1.5373, "step": 5175 }, { "epoch": 0.6906858820389645, "grad_norm": 0.9424269744921971, "learning_rate": 4.612280013273221e-06, "loss": 1.5352, "step": 5176 }, { "epoch": 0.6908193221243661, "grad_norm": 0.9992155998336348, "learning_rate": 4.608639520801967e-06, "loss": 1.5999, "step": 5177 }, { "epoch": 0.6909527622097679, "grad_norm": 1.0114712743867609, "learning_rate": 4.605000035375469e-06, "loss": 1.5355, "step": 5178 }, { "epoch": 0.6910862022951695, "grad_norm": 0.9838800177008744, "learning_rate": 4.6013615576735385e-06, "loss": 1.6047, "step": 5179 }, { "epoch": 0.6912196423805711, "grad_norm": 0.9546619375864034, "learning_rate": 4.597724088375803e-06, "loss": 1.5293, "step": 5180 }, { "epoch": 0.6913530824659728, "grad_norm": 1.3199845007698923, "learning_rate": 4.594087628161702e-06, "loss": 1.6278, "step": 5181 }, { "epoch": 0.6914865225513744, "grad_norm": 0.971042571067602, "learning_rate": 4.590452177710479e-06, "loss": 1.5705, "step": 5182 }, { "epoch": 0.691619962636776, "grad_norm": 0.9499493704833659, "learning_rate": 4.586817737701204e-06, "loss": 1.5513, "step": 5183 }, { "epoch": 0.6917534027221778, "grad_norm": 1.0111784613671377, "learning_rate": 4.583184308812742e-06, "loss": 1.5944, "step": 5184 }, { "epoch": 0.6918868428075794, "grad_norm": 0.9428606553786436, "learning_rate": 4.579551891723781e-06, "loss": 1.5875, "step": 5185 }, { "epoch": 0.692020282892981, "grad_norm": 1.017385930406295, "learning_rate": 4.575920487112806e-06, "loss": 1.4778, "step": 5186 }, { "epoch": 0.6921537229783827, "grad_norm": 0.9481626619930875, "learning_rate": 4.572290095658134e-06, "loss": 1.5244, "step": 5187 }, { "epoch": 0.6922871630637843, "grad_norm": 1.0138210258708102, "learning_rate": 4.568660718037875e-06, "loss": 1.5753, "step": 5188 }, { "epoch": 0.692420603149186, "grad_norm": 1.0491600377755286, "learning_rate": 4.565032354929951e-06, "loss": 1.5974, "step": 5189 }, { "epoch": 0.6925540432345877, "grad_norm": 1.2073763312887016, "learning_rate": 4.561405007012109e-06, "loss": 1.5599, "step": 5190 }, { "epoch": 0.6926874833199893, "grad_norm": 1.0293910333928922, "learning_rate": 4.55777867496189e-06, "loss": 1.5523, "step": 5191 }, { "epoch": 0.692820923405391, "grad_norm": 0.9189903325399789, "learning_rate": 4.5541533594566535e-06, "loss": 1.5528, "step": 5192 }, { "epoch": 0.6929543634907926, "grad_norm": 0.9080583203789369, "learning_rate": 4.550529061173562e-06, "loss": 1.5131, "step": 5193 }, { "epoch": 0.6930878035761943, "grad_norm": 0.952626918394523, "learning_rate": 4.5469057807896e-06, "loss": 1.5493, "step": 5194 }, { "epoch": 0.693221243661596, "grad_norm": 0.9394419115149728, "learning_rate": 4.5432835189815535e-06, "loss": 1.5554, "step": 5195 }, { "epoch": 0.6933546837469976, "grad_norm": 0.9546357659052805, "learning_rate": 4.539662276426018e-06, "loss": 1.5209, "step": 5196 }, { "epoch": 0.6934881238323992, "grad_norm": 1.003350765525599, "learning_rate": 4.536042053799398e-06, "loss": 1.5391, "step": 5197 }, { "epoch": 0.6936215639178009, "grad_norm": 0.9836741751211436, "learning_rate": 4.532422851777916e-06, "loss": 1.5573, "step": 5198 }, { "epoch": 0.6937550040032026, "grad_norm": 0.9574473385195087, "learning_rate": 4.528804671037594e-06, "loss": 1.5952, "step": 5199 }, { "epoch": 0.6938884440886042, "grad_norm": 0.9311362883331841, "learning_rate": 4.525187512254266e-06, "loss": 1.5085, "step": 5200 }, { "epoch": 0.6940218841740059, "grad_norm": 0.9609583738468779, "learning_rate": 4.521571376103578e-06, "loss": 1.5997, "step": 5201 }, { "epoch": 0.6941553242594075, "grad_norm": 0.9375491622362763, "learning_rate": 4.517956263260985e-06, "loss": 1.5734, "step": 5202 }, { "epoch": 0.6942887643448091, "grad_norm": 0.9381515614250917, "learning_rate": 4.514342174401747e-06, "loss": 1.5768, "step": 5203 }, { "epoch": 0.6944222044302109, "grad_norm": 0.9464641129012018, "learning_rate": 4.510729110200929e-06, "loss": 1.5765, "step": 5204 }, { "epoch": 0.6945556445156125, "grad_norm": 0.9461797676889874, "learning_rate": 4.50711707133342e-06, "loss": 1.5685, "step": 5205 }, { "epoch": 0.6946890846010142, "grad_norm": 0.9925712528011031, "learning_rate": 4.503506058473903e-06, "loss": 1.53, "step": 5206 }, { "epoch": 0.6948225246864158, "grad_norm": 0.932476678094929, "learning_rate": 4.499896072296871e-06, "loss": 1.5794, "step": 5207 }, { "epoch": 0.6949559647718174, "grad_norm": 1.0005881114895052, "learning_rate": 4.496287113476635e-06, "loss": 1.5897, "step": 5208 }, { "epoch": 0.6950894048572192, "grad_norm": 0.945425425153403, "learning_rate": 4.492679182687304e-06, "loss": 1.5572, "step": 5209 }, { "epoch": 0.6952228449426208, "grad_norm": 0.9279450617465351, "learning_rate": 4.489072280602799e-06, "loss": 1.5723, "step": 5210 }, { "epoch": 0.6953562850280224, "grad_norm": 0.9947713480204747, "learning_rate": 4.485466407896844e-06, "loss": 1.5525, "step": 5211 }, { "epoch": 0.6954897251134241, "grad_norm": 1.0073089670685307, "learning_rate": 4.481861565242982e-06, "loss": 1.538, "step": 5212 }, { "epoch": 0.6956231651988257, "grad_norm": 0.9775029711915599, "learning_rate": 4.478257753314554e-06, "loss": 1.478, "step": 5213 }, { "epoch": 0.6957566052842273, "grad_norm": 1.1051713607759426, "learning_rate": 4.474654972784705e-06, "loss": 1.578, "step": 5214 }, { "epoch": 0.6958900453696291, "grad_norm": 0.9162253285174159, "learning_rate": 4.471053224326404e-06, "loss": 1.5747, "step": 5215 }, { "epoch": 0.6960234854550307, "grad_norm": 0.9337462353761629, "learning_rate": 4.46745250861241e-06, "loss": 1.5675, "step": 5216 }, { "epoch": 0.6961569255404323, "grad_norm": 1.0028329737784027, "learning_rate": 4.463852826315298e-06, "loss": 1.5358, "step": 5217 }, { "epoch": 0.696290365625834, "grad_norm": 0.9098824085690614, "learning_rate": 4.460254178107446e-06, "loss": 1.5468, "step": 5218 }, { "epoch": 0.6964238057112356, "grad_norm": 0.954411548951353, "learning_rate": 4.456656564661037e-06, "loss": 1.5395, "step": 5219 }, { "epoch": 0.6965572457966374, "grad_norm": 0.9723768754629288, "learning_rate": 4.453059986648073e-06, "loss": 1.5383, "step": 5220 }, { "epoch": 0.696690685882039, "grad_norm": 0.9714260510102523, "learning_rate": 4.449464444740349e-06, "loss": 1.5604, "step": 5221 }, { "epoch": 0.6968241259674406, "grad_norm": 0.9812563625331769, "learning_rate": 4.445869939609472e-06, "loss": 1.5926, "step": 5222 }, { "epoch": 0.6969575660528423, "grad_norm": 1.1501049604817195, "learning_rate": 4.442276471926853e-06, "loss": 1.5484, "step": 5223 }, { "epoch": 0.6970910061382439, "grad_norm": 0.9626957186688674, "learning_rate": 4.4386840423637124e-06, "loss": 1.5799, "step": 5224 }, { "epoch": 0.6972244462236455, "grad_norm": 1.0216025455702171, "learning_rate": 4.435092651591072e-06, "loss": 1.5409, "step": 5225 }, { "epoch": 0.6973578863090473, "grad_norm": 0.9485116910427717, "learning_rate": 4.4315023002797675e-06, "loss": 1.5487, "step": 5226 }, { "epoch": 0.6974913263944489, "grad_norm": 0.9715863993865531, "learning_rate": 4.427912989100434e-06, "loss": 1.596, "step": 5227 }, { "epoch": 0.6976247664798505, "grad_norm": 0.968567527529441, "learning_rate": 4.42432471872351e-06, "loss": 1.5294, "step": 5228 }, { "epoch": 0.6977582065652522, "grad_norm": 0.9687730959837667, "learning_rate": 4.420737489819253e-06, "loss": 1.5265, "step": 5229 }, { "epoch": 0.6978916466506538, "grad_norm": 1.0885904607859849, "learning_rate": 4.417151303057707e-06, "loss": 1.5883, "step": 5230 }, { "epoch": 0.6980250867360555, "grad_norm": 1.025218940735289, "learning_rate": 4.413566159108736e-06, "loss": 1.514, "step": 5231 }, { "epoch": 0.6981585268214572, "grad_norm": 2.751796441629812, "learning_rate": 4.409982058641999e-06, "loss": 1.5411, "step": 5232 }, { "epoch": 0.6982919669068588, "grad_norm": 0.9234092508289905, "learning_rate": 4.406399002326973e-06, "loss": 1.5368, "step": 5233 }, { "epoch": 0.6984254069922605, "grad_norm": 1.0403344284539555, "learning_rate": 4.402816990832928e-06, "loss": 1.5848, "step": 5234 }, { "epoch": 0.6985588470776621, "grad_norm": 0.9148227947424791, "learning_rate": 4.3992360248289416e-06, "loss": 1.5145, "step": 5235 }, { "epoch": 0.6986922871630638, "grad_norm": 0.9779207787661259, "learning_rate": 4.395656104983895e-06, "loss": 1.5853, "step": 5236 }, { "epoch": 0.6988257272484655, "grad_norm": 1.111013896532024, "learning_rate": 4.392077231966484e-06, "loss": 1.5895, "step": 5237 }, { "epoch": 0.6989591673338671, "grad_norm": 1.0099682804012988, "learning_rate": 4.388499406445198e-06, "loss": 1.5454, "step": 5238 }, { "epoch": 0.6990926074192687, "grad_norm": 0.9508070443731435, "learning_rate": 4.384922629088329e-06, "loss": 1.5156, "step": 5239 }, { "epoch": 0.6992260475046704, "grad_norm": 0.9772725704881033, "learning_rate": 4.381346900563988e-06, "loss": 1.5688, "step": 5240 }, { "epoch": 0.699359487590072, "grad_norm": 1.1327443354133715, "learning_rate": 4.377772221540073e-06, "loss": 1.5734, "step": 5241 }, { "epoch": 0.6994929276754737, "grad_norm": 1.080615395417145, "learning_rate": 4.374198592684296e-06, "loss": 1.5525, "step": 5242 }, { "epoch": 0.6996263677608754, "grad_norm": 0.9705398879426412, "learning_rate": 4.370626014664168e-06, "loss": 1.5905, "step": 5243 }, { "epoch": 0.699759807846277, "grad_norm": 0.9423985690259806, "learning_rate": 4.36705448814701e-06, "loss": 1.5543, "step": 5244 }, { "epoch": 0.6998932479316787, "grad_norm": 1.0013253966326623, "learning_rate": 4.36348401379994e-06, "loss": 1.6252, "step": 5245 }, { "epoch": 0.7000266880170803, "grad_norm": 0.9849762775796472, "learning_rate": 4.359914592289879e-06, "loss": 1.5735, "step": 5246 }, { "epoch": 0.700160128102482, "grad_norm": 0.9466224944504175, "learning_rate": 4.3563462242835605e-06, "loss": 1.5474, "step": 5247 }, { "epoch": 0.7002935681878837, "grad_norm": 1.0452884135924572, "learning_rate": 4.352778910447513e-06, "loss": 1.55, "step": 5248 }, { "epoch": 0.7004270082732853, "grad_norm": 0.9636258481231191, "learning_rate": 4.349212651448067e-06, "loss": 1.5592, "step": 5249 }, { "epoch": 0.7005604483586869, "grad_norm": 0.9266243703586854, "learning_rate": 4.345647447951359e-06, "loss": 1.5426, "step": 5250 }, { "epoch": 0.7006938884440886, "grad_norm": 0.9465277944454074, "learning_rate": 4.3420833006233335e-06, "loss": 1.5223, "step": 5251 }, { "epoch": 0.7008273285294903, "grad_norm": 0.9476657743051038, "learning_rate": 4.338520210129729e-06, "loss": 1.5138, "step": 5252 }, { "epoch": 0.7009607686148919, "grad_norm": 0.92673177526777, "learning_rate": 4.33495817713609e-06, "loss": 1.5719, "step": 5253 }, { "epoch": 0.7010942087002936, "grad_norm": 1.1410712640987881, "learning_rate": 4.3313972023077656e-06, "loss": 1.5309, "step": 5254 }, { "epoch": 0.7012276487856952, "grad_norm": 0.8997294098548428, "learning_rate": 4.327837286309905e-06, "loss": 1.5379, "step": 5255 }, { "epoch": 0.7013610888710968, "grad_norm": 1.117573499608535, "learning_rate": 4.324278429807459e-06, "loss": 1.5973, "step": 5256 }, { "epoch": 0.7014945289564986, "grad_norm": 0.9591244120794552, "learning_rate": 4.320720633465178e-06, "loss": 1.576, "step": 5257 }, { "epoch": 0.7016279690419002, "grad_norm": 1.246892551311886, "learning_rate": 4.317163897947626e-06, "loss": 1.5957, "step": 5258 }, { "epoch": 0.7017614091273019, "grad_norm": 0.9650925501800929, "learning_rate": 4.3136082239191565e-06, "loss": 1.5404, "step": 5259 }, { "epoch": 0.7018948492127035, "grad_norm": 0.937754472050194, "learning_rate": 4.310053612043928e-06, "loss": 1.5491, "step": 5260 }, { "epoch": 0.7020282892981051, "grad_norm": 0.918105960005196, "learning_rate": 4.306500062985903e-06, "loss": 1.5244, "step": 5261 }, { "epoch": 0.7021617293835068, "grad_norm": 0.9586676779477473, "learning_rate": 4.302947577408839e-06, "loss": 1.6228, "step": 5262 }, { "epoch": 0.7022951694689085, "grad_norm": 0.9517519387872295, "learning_rate": 4.299396155976308e-06, "loss": 1.6056, "step": 5263 }, { "epoch": 0.7024286095543101, "grad_norm": 1.495533408695814, "learning_rate": 4.295845799351672e-06, "loss": 1.5421, "step": 5264 }, { "epoch": 0.7025620496397118, "grad_norm": 0.9864875327078859, "learning_rate": 4.2922965081980964e-06, "loss": 1.5468, "step": 5265 }, { "epoch": 0.7026954897251134, "grad_norm": 0.9301887930563217, "learning_rate": 4.288748283178546e-06, "loss": 1.5442, "step": 5266 }, { "epoch": 0.702828929810515, "grad_norm": 0.9633255736311295, "learning_rate": 4.285201124955795e-06, "loss": 1.6113, "step": 5267 }, { "epoch": 0.7029623698959168, "grad_norm": 0.9458457528853886, "learning_rate": 4.281655034192402e-06, "loss": 1.5682, "step": 5268 }, { "epoch": 0.7030958099813184, "grad_norm": 0.9396435856415133, "learning_rate": 4.278110011550748e-06, "loss": 1.5206, "step": 5269 }, { "epoch": 0.70322925006672, "grad_norm": 0.932365193699769, "learning_rate": 4.274566057692996e-06, "loss": 1.5637, "step": 5270 }, { "epoch": 0.7033626901521217, "grad_norm": 0.9356770658793856, "learning_rate": 4.271023173281116e-06, "loss": 1.5513, "step": 5271 }, { "epoch": 0.7034961302375233, "grad_norm": 0.9470539843139907, "learning_rate": 4.267481358976883e-06, "loss": 1.5313, "step": 5272 }, { "epoch": 0.7036295703229251, "grad_norm": 0.936074803599193, "learning_rate": 4.263940615441865e-06, "loss": 1.5915, "step": 5273 }, { "epoch": 0.7037630104083267, "grad_norm": 0.9601362770329818, "learning_rate": 4.260400943337433e-06, "loss": 1.5683, "step": 5274 }, { "epoch": 0.7038964504937283, "grad_norm": 0.9200436045608203, "learning_rate": 4.2568623433247534e-06, "loss": 1.5353, "step": 5275 }, { "epoch": 0.70402989057913, "grad_norm": 1.2183542283592865, "learning_rate": 4.253324816064803e-06, "loss": 1.5893, "step": 5276 }, { "epoch": 0.7041633306645316, "grad_norm": 0.9608670542983757, "learning_rate": 4.24978836221835e-06, "loss": 1.5719, "step": 5277 }, { "epoch": 0.7042967707499332, "grad_norm": 1.0831628265033406, "learning_rate": 4.246252982445957e-06, "loss": 1.5496, "step": 5278 }, { "epoch": 0.704430210835335, "grad_norm": 0.9288733637532146, "learning_rate": 4.242718677408002e-06, "loss": 1.5955, "step": 5279 }, { "epoch": 0.7045636509207366, "grad_norm": 0.9220556310697627, "learning_rate": 4.23918544776465e-06, "loss": 1.5466, "step": 5280 }, { "epoch": 0.7046970910061382, "grad_norm": 1.0053641177822847, "learning_rate": 4.235653294175866e-06, "loss": 1.5668, "step": 5281 }, { "epoch": 0.7048305310915399, "grad_norm": 0.9592861362485582, "learning_rate": 4.232122217301414e-06, "loss": 1.5207, "step": 5282 }, { "epoch": 0.7049639711769415, "grad_norm": 1.003209125290528, "learning_rate": 4.228592217800865e-06, "loss": 1.5636, "step": 5283 }, { "epoch": 0.7050974112623432, "grad_norm": 0.9377732106678393, "learning_rate": 4.2250632963335805e-06, "loss": 1.5727, "step": 5284 }, { "epoch": 0.7052308513477449, "grad_norm": 10.633257878300137, "learning_rate": 4.221535453558718e-06, "loss": 1.6243, "step": 5285 }, { "epoch": 0.7053642914331465, "grad_norm": 0.9780546181932777, "learning_rate": 4.218008690135247e-06, "loss": 1.5263, "step": 5286 }, { "epoch": 0.7054977315185482, "grad_norm": 1.016085908744265, "learning_rate": 4.214483006721921e-06, "loss": 1.5835, "step": 5287 }, { "epoch": 0.7056311716039498, "grad_norm": 0.946908828245464, "learning_rate": 4.2109584039773e-06, "loss": 1.6154, "step": 5288 }, { "epoch": 0.7057646116893515, "grad_norm": 0.9794302729269099, "learning_rate": 4.207434882559734e-06, "loss": 1.5523, "step": 5289 }, { "epoch": 0.7058980517747532, "grad_norm": 1.1316618652316528, "learning_rate": 4.2039124431273845e-06, "loss": 1.5279, "step": 5290 }, { "epoch": 0.7060314918601548, "grad_norm": 0.9378659449314672, "learning_rate": 4.2003910863382005e-06, "loss": 1.565, "step": 5291 }, { "epoch": 0.7061649319455564, "grad_norm": 0.9503258372477722, "learning_rate": 4.1968708128499256e-06, "loss": 1.5009, "step": 5292 }, { "epoch": 0.7062983720309581, "grad_norm": 0.9519765695098307, "learning_rate": 4.1933516233201165e-06, "loss": 1.6068, "step": 5293 }, { "epoch": 0.7064318121163597, "grad_norm": 1.1619371544024075, "learning_rate": 4.189833518406113e-06, "loss": 1.5333, "step": 5294 }, { "epoch": 0.7065652522017614, "grad_norm": 0.9543539047132761, "learning_rate": 4.1863164987650575e-06, "loss": 1.5793, "step": 5295 }, { "epoch": 0.7066986922871631, "grad_norm": 0.9515900226999982, "learning_rate": 4.182800565053884e-06, "loss": 1.5893, "step": 5296 }, { "epoch": 0.7068321323725647, "grad_norm": 0.9355762162950707, "learning_rate": 4.179285717929338e-06, "loss": 1.5553, "step": 5297 }, { "epoch": 0.7069655724579663, "grad_norm": 0.9421727010797218, "learning_rate": 4.175771958047947e-06, "loss": 1.5825, "step": 5298 }, { "epoch": 0.707099012543368, "grad_norm": 1.0799208667442957, "learning_rate": 4.172259286066045e-06, "loss": 1.5503, "step": 5299 }, { "epoch": 0.7072324526287697, "grad_norm": 0.9679399491700288, "learning_rate": 4.168747702639753e-06, "loss": 1.5524, "step": 5300 }, { "epoch": 0.7073658927141714, "grad_norm": 0.9691270167436293, "learning_rate": 4.165237208425001e-06, "loss": 1.5649, "step": 5301 }, { "epoch": 0.707499332799573, "grad_norm": 0.9311122890360443, "learning_rate": 4.1617278040775086e-06, "loss": 1.514, "step": 5302 }, { "epoch": 0.7076327728849746, "grad_norm": 0.8967366726125487, "learning_rate": 4.158219490252791e-06, "loss": 1.5358, "step": 5303 }, { "epoch": 0.7077662129703763, "grad_norm": 0.9509487136930983, "learning_rate": 4.154712267606161e-06, "loss": 1.5899, "step": 5304 }, { "epoch": 0.707899653055778, "grad_norm": 0.9712447192685107, "learning_rate": 4.151206136792725e-06, "loss": 1.5993, "step": 5305 }, { "epoch": 0.7080330931411796, "grad_norm": 0.9453208811579048, "learning_rate": 4.147701098467395e-06, "loss": 1.5903, "step": 5306 }, { "epoch": 0.7081665332265813, "grad_norm": 0.955132045264726, "learning_rate": 4.144197153284869e-06, "loss": 1.513, "step": 5307 }, { "epoch": 0.7082999733119829, "grad_norm": 0.9400188884444305, "learning_rate": 4.140694301899645e-06, "loss": 1.551, "step": 5308 }, { "epoch": 0.7084334133973845, "grad_norm": 0.9193593248190934, "learning_rate": 4.137192544966013e-06, "loss": 1.585, "step": 5309 }, { "epoch": 0.7085668534827863, "grad_norm": 0.9312434980756322, "learning_rate": 4.13369188313806e-06, "loss": 1.5386, "step": 5310 }, { "epoch": 0.7087002935681879, "grad_norm": 0.9271931561265911, "learning_rate": 4.130192317069677e-06, "loss": 1.5208, "step": 5311 }, { "epoch": 0.7088337336535896, "grad_norm": 0.9970719656664363, "learning_rate": 4.126693847414538e-06, "loss": 1.5826, "step": 5312 }, { "epoch": 0.7089671737389912, "grad_norm": 0.9479786344427346, "learning_rate": 4.123196474826119e-06, "loss": 1.5403, "step": 5313 }, { "epoch": 0.7091006138243928, "grad_norm": 0.8979702441638332, "learning_rate": 4.119700199957684e-06, "loss": 1.5509, "step": 5314 }, { "epoch": 0.7092340539097945, "grad_norm": 0.9895920970366872, "learning_rate": 4.116205023462306e-06, "loss": 1.5785, "step": 5315 }, { "epoch": 0.7093674939951962, "grad_norm": 0.9499405479425314, "learning_rate": 4.112710945992842e-06, "loss": 1.5824, "step": 5316 }, { "epoch": 0.7095009340805978, "grad_norm": 0.9355717622213187, "learning_rate": 4.109217968201937e-06, "loss": 1.5706, "step": 5317 }, { "epoch": 0.7096343741659995, "grad_norm": 0.9477283832957076, "learning_rate": 4.1057260907420524e-06, "loss": 1.497, "step": 5318 }, { "epoch": 0.7097678142514011, "grad_norm": 0.9551646576385708, "learning_rate": 4.102235314265425e-06, "loss": 1.5367, "step": 5319 }, { "epoch": 0.7099012543368027, "grad_norm": 0.9248106472529288, "learning_rate": 4.098745639424091e-06, "loss": 1.5693, "step": 5320 }, { "epoch": 0.7100346944222045, "grad_norm": 0.963099005180929, "learning_rate": 4.095257066869881e-06, "loss": 1.573, "step": 5321 }, { "epoch": 0.7101681345076061, "grad_norm": 0.9687592868939011, "learning_rate": 4.091769597254426e-06, "loss": 1.5924, "step": 5322 }, { "epoch": 0.7103015745930077, "grad_norm": 0.9896034088236875, "learning_rate": 4.088283231229142e-06, "loss": 1.5621, "step": 5323 }, { "epoch": 0.7104350146784094, "grad_norm": 0.9341590041901182, "learning_rate": 4.08479796944524e-06, "loss": 1.552, "step": 5324 }, { "epoch": 0.710568454763811, "grad_norm": 0.9304265266280125, "learning_rate": 4.081313812553734e-06, "loss": 1.5652, "step": 5325 }, { "epoch": 0.7107018948492128, "grad_norm": 0.9605863893931225, "learning_rate": 4.07783076120542e-06, "loss": 1.5676, "step": 5326 }, { "epoch": 0.7108353349346144, "grad_norm": 1.0997008119225524, "learning_rate": 4.074348816050895e-06, "loss": 1.6232, "step": 5327 }, { "epoch": 0.710968775020016, "grad_norm": 0.9045291420444287, "learning_rate": 4.07086797774054e-06, "loss": 1.5255, "step": 5328 }, { "epoch": 0.7111022151054177, "grad_norm": 0.9985345986185172, "learning_rate": 4.067388246924545e-06, "loss": 1.6026, "step": 5329 }, { "epoch": 0.7112356551908193, "grad_norm": 0.9841974283402879, "learning_rate": 4.063909624252881e-06, "loss": 1.5454, "step": 5330 }, { "epoch": 0.7113690952762209, "grad_norm": 0.9526315404945778, "learning_rate": 4.060432110375314e-06, "loss": 1.5019, "step": 5331 }, { "epoch": 0.7115025353616227, "grad_norm": 0.9491732500227824, "learning_rate": 4.0569557059414e-06, "loss": 1.5423, "step": 5332 }, { "epoch": 0.7116359754470243, "grad_norm": 0.9491096007519367, "learning_rate": 4.0534804116005e-06, "loss": 1.5221, "step": 5333 }, { "epoch": 0.7117694155324259, "grad_norm": 0.9284527190069253, "learning_rate": 4.0500062280017545e-06, "loss": 1.5625, "step": 5334 }, { "epoch": 0.7119028556178276, "grad_norm": 0.9759261683989565, "learning_rate": 4.0465331557941e-06, "loss": 1.5516, "step": 5335 }, { "epoch": 0.7120362957032292, "grad_norm": 1.0927488467118913, "learning_rate": 4.0430611956262735e-06, "loss": 1.5364, "step": 5336 }, { "epoch": 0.7121697357886309, "grad_norm": 2.0805229151915445, "learning_rate": 4.039590348146791e-06, "loss": 1.5377, "step": 5337 }, { "epoch": 0.7123031758740326, "grad_norm": 0.9353765748626967, "learning_rate": 4.036120614003972e-06, "loss": 1.571, "step": 5338 }, { "epoch": 0.7124366159594342, "grad_norm": 0.9903701860089882, "learning_rate": 4.032651993845917e-06, "loss": 1.5247, "step": 5339 }, { "epoch": 0.7125700560448359, "grad_norm": 0.9235475362427148, "learning_rate": 4.029184488320531e-06, "loss": 1.515, "step": 5340 }, { "epoch": 0.7127034961302375, "grad_norm": 0.9026258019298207, "learning_rate": 4.0257180980755025e-06, "loss": 1.5408, "step": 5341 }, { "epoch": 0.7128369362156391, "grad_norm": 1.2538215164072486, "learning_rate": 4.022252823758311e-06, "loss": 1.6143, "step": 5342 }, { "epoch": 0.7129703763010409, "grad_norm": 0.9545237012787869, "learning_rate": 4.018788666016236e-06, "loss": 1.5585, "step": 5343 }, { "epoch": 0.7131038163864425, "grad_norm": 0.9280503299040727, "learning_rate": 4.015325625496339e-06, "loss": 1.573, "step": 5344 }, { "epoch": 0.7132372564718441, "grad_norm": 0.9990855517950481, "learning_rate": 4.011863702845477e-06, "loss": 1.5783, "step": 5345 }, { "epoch": 0.7133706965572458, "grad_norm": 0.9614841419311843, "learning_rate": 4.008402898710299e-06, "loss": 1.5716, "step": 5346 }, { "epoch": 0.7135041366426474, "grad_norm": 0.9450318885739145, "learning_rate": 4.004943213737238e-06, "loss": 1.5561, "step": 5347 }, { "epoch": 0.7136375767280491, "grad_norm": 0.9957058765830085, "learning_rate": 4.001484648572532e-06, "loss": 1.5563, "step": 5348 }, { "epoch": 0.7137710168134508, "grad_norm": 1.0067457396072523, "learning_rate": 3.998027203862199e-06, "loss": 1.5962, "step": 5349 }, { "epoch": 0.7139044568988524, "grad_norm": 0.9853384038154768, "learning_rate": 3.994570880252049e-06, "loss": 1.5547, "step": 5350 }, { "epoch": 0.714037896984254, "grad_norm": 0.9554713328733841, "learning_rate": 3.991115678387684e-06, "loss": 1.5627, "step": 5351 }, { "epoch": 0.7141713370696557, "grad_norm": 1.0861226656873586, "learning_rate": 3.987661598914496e-06, "loss": 1.6133, "step": 5352 }, { "epoch": 0.7143047771550574, "grad_norm": 0.9363068359607128, "learning_rate": 3.984208642477665e-06, "loss": 1.5493, "step": 5353 }, { "epoch": 0.7144382172404591, "grad_norm": 0.9496779214889088, "learning_rate": 3.9807568097221705e-06, "loss": 1.5819, "step": 5354 }, { "epoch": 0.7145716573258607, "grad_norm": 0.9293168489795315, "learning_rate": 3.977306101292773e-06, "loss": 1.5272, "step": 5355 }, { "epoch": 0.7147050974112623, "grad_norm": 0.962632697448326, "learning_rate": 3.973856517834021e-06, "loss": 1.5798, "step": 5356 }, { "epoch": 0.714838537496664, "grad_norm": 0.9584702412606565, "learning_rate": 3.9704080599902635e-06, "loss": 1.5512, "step": 5357 }, { "epoch": 0.7149719775820657, "grad_norm": 0.9815244757813827, "learning_rate": 3.966960728405633e-06, "loss": 1.6353, "step": 5358 }, { "epoch": 0.7151054176674673, "grad_norm": 0.9928728802532544, "learning_rate": 3.963514523724049e-06, "loss": 1.5299, "step": 5359 }, { "epoch": 0.715238857752869, "grad_norm": 0.9623909333997922, "learning_rate": 3.96006944658922e-06, "loss": 1.5684, "step": 5360 }, { "epoch": 0.7153722978382706, "grad_norm": 0.9112193396740259, "learning_rate": 3.956625497644655e-06, "loss": 1.5723, "step": 5361 }, { "epoch": 0.7155057379236722, "grad_norm": 0.9459655781359935, "learning_rate": 3.95318267753364e-06, "loss": 1.5027, "step": 5362 }, { "epoch": 0.715639178009074, "grad_norm": 1.0216963512845265, "learning_rate": 3.949740986899257e-06, "loss": 1.5619, "step": 5363 }, { "epoch": 0.7157726180944756, "grad_norm": 14.9027623425261, "learning_rate": 3.946300426384368e-06, "loss": 1.5481, "step": 5364 }, { "epoch": 0.7159060581798772, "grad_norm": 1.0993613787167713, "learning_rate": 3.942860996631639e-06, "loss": 1.587, "step": 5365 }, { "epoch": 0.7160394982652789, "grad_norm": 1.0313006265813407, "learning_rate": 3.939422698283513e-06, "loss": 1.5491, "step": 5366 }, { "epoch": 0.7161729383506805, "grad_norm": 0.9390749627476853, "learning_rate": 3.9359855319822205e-06, "loss": 1.5661, "step": 5367 }, { "epoch": 0.7163063784360822, "grad_norm": 0.9760457226921616, "learning_rate": 3.932549498369793e-06, "loss": 1.6101, "step": 5368 }, { "epoch": 0.7164398185214839, "grad_norm": 0.958734457032826, "learning_rate": 3.92911459808804e-06, "loss": 1.5229, "step": 5369 }, { "epoch": 0.7165732586068855, "grad_norm": 0.9526647222869271, "learning_rate": 3.925680831778559e-06, "loss": 1.5667, "step": 5370 }, { "epoch": 0.7167066986922872, "grad_norm": 1.071079462379526, "learning_rate": 3.922248200082736e-06, "loss": 1.5439, "step": 5371 }, { "epoch": 0.7168401387776888, "grad_norm": 1.0754749545538824, "learning_rate": 3.918816703641757e-06, "loss": 1.4997, "step": 5372 }, { "epoch": 0.7169735788630904, "grad_norm": 0.9839191618810875, "learning_rate": 3.915386343096579e-06, "loss": 1.5861, "step": 5373 }, { "epoch": 0.7171070189484922, "grad_norm": 0.9696932839295033, "learning_rate": 3.911957119087953e-06, "loss": 1.5767, "step": 5374 }, { "epoch": 0.7172404590338938, "grad_norm": 1.0878185921141816, "learning_rate": 3.908529032256426e-06, "loss": 1.5695, "step": 5375 }, { "epoch": 0.7173738991192954, "grad_norm": 1.0356241772862398, "learning_rate": 3.905102083242321e-06, "loss": 1.5772, "step": 5376 }, { "epoch": 0.7175073392046971, "grad_norm": 0.9534440152222651, "learning_rate": 3.901676272685755e-06, "loss": 1.5578, "step": 5377 }, { "epoch": 0.7176407792900987, "grad_norm": 1.0211708448584833, "learning_rate": 3.898251601226626e-06, "loss": 1.4683, "step": 5378 }, { "epoch": 0.7177742193755005, "grad_norm": 1.0468960541282286, "learning_rate": 3.894828069504629e-06, "loss": 1.5454, "step": 5379 }, { "epoch": 0.7179076594609021, "grad_norm": 0.9536008377149614, "learning_rate": 3.89140567815924e-06, "loss": 1.5293, "step": 5380 }, { "epoch": 0.7180410995463037, "grad_norm": 1.0458155687344466, "learning_rate": 3.8879844278297164e-06, "loss": 1.6136, "step": 5381 }, { "epoch": 0.7181745396317054, "grad_norm": 0.9300674323746942, "learning_rate": 3.884564319155119e-06, "loss": 1.5283, "step": 5382 }, { "epoch": 0.718307979717107, "grad_norm": 1.147948065522506, "learning_rate": 3.881145352774278e-06, "loss": 1.4485, "step": 5383 }, { "epoch": 0.7184414198025086, "grad_norm": 0.9414728729601566, "learning_rate": 3.877727529325821e-06, "loss": 1.5722, "step": 5384 }, { "epoch": 0.7185748598879104, "grad_norm": 0.9845671466215753, "learning_rate": 3.874310849448152e-06, "loss": 1.5702, "step": 5385 }, { "epoch": 0.718708299973312, "grad_norm": 1.2446034917882773, "learning_rate": 3.870895313779477e-06, "loss": 1.5208, "step": 5386 }, { "epoch": 0.7188417400587136, "grad_norm": 0.9504776390771918, "learning_rate": 3.867480922957775e-06, "loss": 1.5833, "step": 5387 }, { "epoch": 0.7189751801441153, "grad_norm": 0.9383944211772788, "learning_rate": 3.864067677620815e-06, "loss": 1.5096, "step": 5388 }, { "epoch": 0.7191086202295169, "grad_norm": 0.9197073694471657, "learning_rate": 3.860655578406153e-06, "loss": 1.4891, "step": 5389 }, { "epoch": 0.7192420603149186, "grad_norm": 1.293587939218792, "learning_rate": 3.857244625951125e-06, "loss": 1.5577, "step": 5390 }, { "epoch": 0.7193755004003203, "grad_norm": 0.9425333596458038, "learning_rate": 3.8538348208928675e-06, "loss": 1.547, "step": 5391 }, { "epoch": 0.7195089404857219, "grad_norm": 0.9187340276232536, "learning_rate": 3.850426163868289e-06, "loss": 1.5602, "step": 5392 }, { "epoch": 0.7196423805711236, "grad_norm": 1.0066203632714577, "learning_rate": 3.847018655514087e-06, "loss": 1.5686, "step": 5393 }, { "epoch": 0.7197758206565252, "grad_norm": 0.9101557342470021, "learning_rate": 3.843612296466747e-06, "loss": 1.5074, "step": 5394 }, { "epoch": 0.7199092607419268, "grad_norm": 1.0091781445483228, "learning_rate": 3.840207087362535e-06, "loss": 1.5506, "step": 5395 }, { "epoch": 0.7200427008273286, "grad_norm": 0.9766556965941918, "learning_rate": 3.836803028837506e-06, "loss": 1.575, "step": 5396 }, { "epoch": 0.7201761409127302, "grad_norm": 0.9326384838247909, "learning_rate": 3.833400121527502e-06, "loss": 1.5406, "step": 5397 }, { "epoch": 0.7203095809981318, "grad_norm": 0.9729805047938272, "learning_rate": 3.829998366068147e-06, "loss": 1.583, "step": 5398 }, { "epoch": 0.7204430210835335, "grad_norm": 1.010555826446732, "learning_rate": 3.826597763094844e-06, "loss": 1.533, "step": 5399 }, { "epoch": 0.7205764611689351, "grad_norm": 0.9416257579609061, "learning_rate": 3.823198313242799e-06, "loss": 1.5651, "step": 5400 }, { "epoch": 0.7207099012543368, "grad_norm": 0.9745957493904607, "learning_rate": 3.81980001714698e-06, "loss": 1.5579, "step": 5401 }, { "epoch": 0.7208433413397385, "grad_norm": 0.9508022115629777, "learning_rate": 3.816402875442155e-06, "loss": 1.5297, "step": 5402 }, { "epoch": 0.7209767814251401, "grad_norm": 9.996627671307085, "learning_rate": 3.813006888762867e-06, "loss": 1.5849, "step": 5403 }, { "epoch": 0.7211102215105417, "grad_norm": 0.9362163294366803, "learning_rate": 3.809612057743454e-06, "loss": 1.5874, "step": 5404 }, { "epoch": 0.7212436615959434, "grad_norm": 0.9650373486942456, "learning_rate": 3.8062183830180278e-06, "loss": 1.5313, "step": 5405 }, { "epoch": 0.7213771016813451, "grad_norm": 0.9201142210045637, "learning_rate": 3.802825865220485e-06, "loss": 1.5155, "step": 5406 }, { "epoch": 0.7215105417667468, "grad_norm": 0.9383591060889741, "learning_rate": 3.799434504984517e-06, "loss": 1.5318, "step": 5407 }, { "epoch": 0.7216439818521484, "grad_norm": 0.9356509582385691, "learning_rate": 3.7960443029435876e-06, "loss": 1.5374, "step": 5408 }, { "epoch": 0.72177742193755, "grad_norm": 0.9314768342510902, "learning_rate": 3.7926552597309472e-06, "loss": 1.597, "step": 5409 }, { "epoch": 0.7219108620229517, "grad_norm": 1.230724666512068, "learning_rate": 3.7892673759796285e-06, "loss": 1.5506, "step": 5410 }, { "epoch": 0.7220443021083534, "grad_norm": 0.8916809377299305, "learning_rate": 3.7858806523224546e-06, "loss": 1.4881, "step": 5411 }, { "epoch": 0.722177742193755, "grad_norm": 1.150653767752609, "learning_rate": 3.7824950893920256e-06, "loss": 1.5351, "step": 5412 }, { "epoch": 0.7223111822791567, "grad_norm": 0.9860122847150068, "learning_rate": 3.7791106878207206e-06, "loss": 1.5517, "step": 5413 }, { "epoch": 0.7224446223645583, "grad_norm": 0.9187176873452039, "learning_rate": 3.7757274482407146e-06, "loss": 1.5182, "step": 5414 }, { "epoch": 0.7225780624499599, "grad_norm": 0.9502222725296599, "learning_rate": 3.7723453712839566e-06, "loss": 1.5383, "step": 5415 }, { "epoch": 0.7227115025353616, "grad_norm": 0.9501143358026882, "learning_rate": 3.768964457582177e-06, "loss": 1.6326, "step": 5416 }, { "epoch": 0.7228449426207633, "grad_norm": 0.9523336545668618, "learning_rate": 3.7655847077668915e-06, "loss": 1.5764, "step": 5417 }, { "epoch": 0.7229783827061649, "grad_norm": 1.0389137372207382, "learning_rate": 3.7622061224694038e-06, "loss": 1.5737, "step": 5418 }, { "epoch": 0.7231118227915666, "grad_norm": 0.9455855773831311, "learning_rate": 3.758828702320794e-06, "loss": 1.5635, "step": 5419 }, { "epoch": 0.7232452628769682, "grad_norm": 0.9693626405402851, "learning_rate": 3.7554524479519196e-06, "loss": 1.5343, "step": 5420 }, { "epoch": 0.72337870296237, "grad_norm": 0.9574938541001241, "learning_rate": 3.7520773599934347e-06, "loss": 1.5699, "step": 5421 }, { "epoch": 0.7235121430477716, "grad_norm": 0.940692924931385, "learning_rate": 3.748703439075764e-06, "loss": 1.5674, "step": 5422 }, { "epoch": 0.7236455831331732, "grad_norm": 0.9270797016669773, "learning_rate": 3.7453306858291163e-06, "loss": 1.517, "step": 5423 }, { "epoch": 0.7237790232185749, "grad_norm": 0.9375714221926796, "learning_rate": 3.741959100883481e-06, "loss": 1.5181, "step": 5424 }, { "epoch": 0.7239124633039765, "grad_norm": 0.9283234071509436, "learning_rate": 3.738588684868639e-06, "loss": 1.5591, "step": 5425 }, { "epoch": 0.7240459033893781, "grad_norm": 0.9733008805447092, "learning_rate": 3.7352194384141426e-06, "loss": 1.5131, "step": 5426 }, { "epoch": 0.7241793434747799, "grad_norm": 0.9570280233761831, "learning_rate": 3.731851362149327e-06, "loss": 1.5628, "step": 5427 }, { "epoch": 0.7243127835601815, "grad_norm": 1.2042047907577633, "learning_rate": 3.7284844567033083e-06, "loss": 1.538, "step": 5428 }, { "epoch": 0.7244462236455831, "grad_norm": 0.9741593159201551, "learning_rate": 3.7251187227049924e-06, "loss": 1.5553, "step": 5429 }, { "epoch": 0.7245796637309848, "grad_norm": 0.9197479513774383, "learning_rate": 3.7217541607830576e-06, "loss": 1.5162, "step": 5430 }, { "epoch": 0.7247131038163864, "grad_norm": 0.9403320524684858, "learning_rate": 3.718390771565964e-06, "loss": 1.5288, "step": 5431 }, { "epoch": 0.724846543901788, "grad_norm": 0.95182884051891, "learning_rate": 3.7150285556819563e-06, "loss": 1.5433, "step": 5432 }, { "epoch": 0.7249799839871898, "grad_norm": 1.0304654498405337, "learning_rate": 3.711667513759053e-06, "loss": 1.622, "step": 5433 }, { "epoch": 0.7251134240725914, "grad_norm": 0.98382338711681, "learning_rate": 3.708307646425068e-06, "loss": 1.5138, "step": 5434 }, { "epoch": 0.7252468641579931, "grad_norm": 1.058008764362752, "learning_rate": 3.704948954307579e-06, "loss": 1.5539, "step": 5435 }, { "epoch": 0.7253803042433947, "grad_norm": 1.0067806065661722, "learning_rate": 3.7015914380339545e-06, "loss": 1.5266, "step": 5436 }, { "epoch": 0.7255137443287963, "grad_norm": 0.9964621996103804, "learning_rate": 3.6982350982313387e-06, "loss": 1.5728, "step": 5437 }, { "epoch": 0.7256471844141981, "grad_norm": 1.2415140773272089, "learning_rate": 3.694879935526655e-06, "loss": 1.4833, "step": 5438 }, { "epoch": 0.7257806244995997, "grad_norm": 1.0064068768265315, "learning_rate": 3.691525950546617e-06, "loss": 1.5633, "step": 5439 }, { "epoch": 0.7259140645850013, "grad_norm": 1.335840952388961, "learning_rate": 3.6881731439177058e-06, "loss": 1.5479, "step": 5440 }, { "epoch": 0.726047504670403, "grad_norm": 0.9765575128964399, "learning_rate": 3.6848215162661894e-06, "loss": 1.5502, "step": 5441 }, { "epoch": 0.7261809447558046, "grad_norm": 0.9318261067807464, "learning_rate": 3.6814710682181088e-06, "loss": 1.525, "step": 5442 }, { "epoch": 0.7263143848412063, "grad_norm": 0.9722119023080255, "learning_rate": 3.6781218003992967e-06, "loss": 1.5279, "step": 5443 }, { "epoch": 0.726447824926608, "grad_norm": 0.9517245541221244, "learning_rate": 3.6747737134353557e-06, "loss": 1.5848, "step": 5444 }, { "epoch": 0.7265812650120096, "grad_norm": 9.955006013526363, "learning_rate": 3.6714268079516657e-06, "loss": 1.6209, "step": 5445 }, { "epoch": 0.7267147050974113, "grad_norm": 0.9547952003339406, "learning_rate": 3.668081084573398e-06, "loss": 1.6035, "step": 5446 }, { "epoch": 0.7268481451828129, "grad_norm": 0.965245818039803, "learning_rate": 3.6647365439254923e-06, "loss": 1.5966, "step": 5447 }, { "epoch": 0.7269815852682145, "grad_norm": 0.9878697703561382, "learning_rate": 3.6613931866326704e-06, "loss": 1.5274, "step": 5448 }, { "epoch": 0.7271150253536163, "grad_norm": 0.9432582883437258, "learning_rate": 3.658051013319429e-06, "loss": 1.5539, "step": 5449 }, { "epoch": 0.7272484654390179, "grad_norm": 0.9559304328129197, "learning_rate": 3.6547100246100576e-06, "loss": 1.5629, "step": 5450 }, { "epoch": 0.7273819055244195, "grad_norm": 1.0818678239934452, "learning_rate": 3.6513702211286086e-06, "loss": 1.5481, "step": 5451 }, { "epoch": 0.7275153456098212, "grad_norm": 0.9568005743025887, "learning_rate": 3.6480316034989173e-06, "loss": 1.5597, "step": 5452 }, { "epoch": 0.7276487856952228, "grad_norm": 0.9041772250496767, "learning_rate": 3.644694172344606e-06, "loss": 1.534, "step": 5453 }, { "epoch": 0.7277822257806245, "grad_norm": 0.9169233659753793, "learning_rate": 3.6413579282890655e-06, "loss": 1.5407, "step": 5454 }, { "epoch": 0.7279156658660262, "grad_norm": 0.9466397250058866, "learning_rate": 3.638022871955469e-06, "loss": 1.5682, "step": 5455 }, { "epoch": 0.7280491059514278, "grad_norm": 1.1961458887332446, "learning_rate": 3.6346890039667616e-06, "loss": 1.5361, "step": 5456 }, { "epoch": 0.7281825460368294, "grad_norm": 0.9721270068626529, "learning_rate": 3.6313563249456806e-06, "loss": 1.5553, "step": 5457 }, { "epoch": 0.7283159861222311, "grad_norm": 0.9791647245219625, "learning_rate": 3.6280248355147273e-06, "loss": 1.5297, "step": 5458 }, { "epoch": 0.7284494262076328, "grad_norm": 0.9573169731204293, "learning_rate": 3.6246945362961882e-06, "loss": 1.5585, "step": 5459 }, { "epoch": 0.7285828662930345, "grad_norm": 1.0013874444620199, "learning_rate": 3.62136542791212e-06, "loss": 1.5126, "step": 5460 }, { "epoch": 0.7287163063784361, "grad_norm": 0.9187983530032887, "learning_rate": 3.6180375109843703e-06, "loss": 1.5512, "step": 5461 }, { "epoch": 0.7288497464638377, "grad_norm": 1.0234386688020545, "learning_rate": 3.614710786134552e-06, "loss": 1.5312, "step": 5462 }, { "epoch": 0.7289831865492394, "grad_norm": 0.9315553183193896, "learning_rate": 3.6113852539840567e-06, "loss": 1.5634, "step": 5463 }, { "epoch": 0.729116626634641, "grad_norm": 0.925019388625177, "learning_rate": 3.6080609151540613e-06, "loss": 1.5594, "step": 5464 }, { "epoch": 0.7292500667200427, "grad_norm": 0.935710933008227, "learning_rate": 3.604737770265513e-06, "loss": 1.4976, "step": 5465 }, { "epoch": 0.7293835068054444, "grad_norm": 1.4420915957678724, "learning_rate": 3.6014158199391358e-06, "loss": 1.5603, "step": 5466 }, { "epoch": 0.729516946890846, "grad_norm": 1.0497135426493642, "learning_rate": 3.5980950647954294e-06, "loss": 1.5248, "step": 5467 }, { "epoch": 0.7296503869762476, "grad_norm": 0.9269061560231096, "learning_rate": 3.5947755054546795e-06, "loss": 1.5422, "step": 5468 }, { "epoch": 0.7297838270616493, "grad_norm": 0.9527020311342057, "learning_rate": 3.5914571425369393e-06, "loss": 1.5373, "step": 5469 }, { "epoch": 0.729917267147051, "grad_norm": 0.9372367174715784, "learning_rate": 3.5881399766620373e-06, "loss": 1.5657, "step": 5470 }, { "epoch": 0.7300507072324526, "grad_norm": 0.9581054862273867, "learning_rate": 3.58482400844959e-06, "loss": 1.5739, "step": 5471 }, { "epoch": 0.7301841473178543, "grad_norm": 0.947441074687153, "learning_rate": 3.5815092385189777e-06, "loss": 1.5728, "step": 5472 }, { "epoch": 0.7303175874032559, "grad_norm": 1.0228746400085942, "learning_rate": 3.5781956674893626e-06, "loss": 1.5334, "step": 5473 }, { "epoch": 0.7304510274886576, "grad_norm": 0.9536334081802301, "learning_rate": 3.574883295979682e-06, "loss": 1.562, "step": 5474 }, { "epoch": 0.7305844675740593, "grad_norm": 0.9606672650917066, "learning_rate": 3.5715721246086486e-06, "loss": 1.5526, "step": 5475 }, { "epoch": 0.7307179076594609, "grad_norm": 0.9577088048584481, "learning_rate": 3.568262153994749e-06, "loss": 1.5837, "step": 5476 }, { "epoch": 0.7308513477448626, "grad_norm": 0.9719144515399968, "learning_rate": 3.5649533847562544e-06, "loss": 1.5316, "step": 5477 }, { "epoch": 0.7309847878302642, "grad_norm": 1.004032695088233, "learning_rate": 3.5616458175112013e-06, "loss": 1.593, "step": 5478 }, { "epoch": 0.7311182279156658, "grad_norm": 0.9494207677824381, "learning_rate": 3.558339452877406e-06, "loss": 1.5683, "step": 5479 }, { "epoch": 0.7312516680010676, "grad_norm": 0.9698864247819369, "learning_rate": 3.5550342914724613e-06, "loss": 1.585, "step": 5480 }, { "epoch": 0.7313851080864692, "grad_norm": 0.9508423495123989, "learning_rate": 3.5517303339137267e-06, "loss": 1.5207, "step": 5481 }, { "epoch": 0.7315185481718708, "grad_norm": 1.2166047965486122, "learning_rate": 3.5484275808183544e-06, "loss": 1.5507, "step": 5482 }, { "epoch": 0.7316519882572725, "grad_norm": 0.9436409445639349, "learning_rate": 3.5451260328032555e-06, "loss": 1.579, "step": 5483 }, { "epoch": 0.7317854283426741, "grad_norm": 1.2067580184607811, "learning_rate": 3.541825690485119e-06, "loss": 1.5399, "step": 5484 }, { "epoch": 0.7319188684280757, "grad_norm": 0.9128886571978432, "learning_rate": 3.5385265544804172e-06, "loss": 1.5099, "step": 5485 }, { "epoch": 0.7320523085134775, "grad_norm": 0.9232766943936563, "learning_rate": 3.535228625405389e-06, "loss": 1.5688, "step": 5486 }, { "epoch": 0.7321857485988791, "grad_norm": 0.9443761629982292, "learning_rate": 3.531931903876049e-06, "loss": 1.5515, "step": 5487 }, { "epoch": 0.7323191886842808, "grad_norm": 0.9281491684957837, "learning_rate": 3.5286363905081843e-06, "loss": 1.5491, "step": 5488 }, { "epoch": 0.7324526287696824, "grad_norm": 1.0926741045880084, "learning_rate": 3.525342085917366e-06, "loss": 1.5447, "step": 5489 }, { "epoch": 0.732586068855084, "grad_norm": 0.9386932615205383, "learning_rate": 3.5220489907189293e-06, "loss": 1.5314, "step": 5490 }, { "epoch": 0.7327195089404858, "grad_norm": 0.9166276573659027, "learning_rate": 3.518757105527988e-06, "loss": 1.5458, "step": 5491 }, { "epoch": 0.7328529490258874, "grad_norm": 0.9123097547816297, "learning_rate": 3.5154664309594224e-06, "loss": 1.5524, "step": 5492 }, { "epoch": 0.732986389111289, "grad_norm": 1.1139458388619161, "learning_rate": 3.5121769676279026e-06, "loss": 1.5188, "step": 5493 }, { "epoch": 0.7331198291966907, "grad_norm": 0.958270690060531, "learning_rate": 3.508888716147859e-06, "loss": 1.5596, "step": 5494 }, { "epoch": 0.7332532692820923, "grad_norm": 0.9709805252564494, "learning_rate": 3.5056016771334954e-06, "loss": 1.5756, "step": 5495 }, { "epoch": 0.733386709367494, "grad_norm": 1.2247215933789883, "learning_rate": 3.5023158511988e-06, "loss": 1.597, "step": 5496 }, { "epoch": 0.7335201494528957, "grad_norm": 0.9334813075768208, "learning_rate": 3.4990312389575253e-06, "loss": 1.5761, "step": 5497 }, { "epoch": 0.7336535895382973, "grad_norm": 0.9468394042300734, "learning_rate": 3.495747841023198e-06, "loss": 1.547, "step": 5498 }, { "epoch": 0.7337870296236989, "grad_norm": 0.9478146984584698, "learning_rate": 3.4924656580091176e-06, "loss": 1.6384, "step": 5499 }, { "epoch": 0.7339204697091006, "grad_norm": 0.967091384617016, "learning_rate": 3.4891846905283645e-06, "loss": 1.5693, "step": 5500 }, { "epoch": 0.7340539097945022, "grad_norm": 0.9249763240908678, "learning_rate": 3.4859049391937827e-06, "loss": 1.5253, "step": 5501 }, { "epoch": 0.734187349879904, "grad_norm": 0.96990837542742, "learning_rate": 3.48262640461799e-06, "loss": 1.5783, "step": 5502 }, { "epoch": 0.7343207899653056, "grad_norm": 0.9655104010410549, "learning_rate": 3.479349087413384e-06, "loss": 1.5925, "step": 5503 }, { "epoch": 0.7344542300507072, "grad_norm": 0.9165403211065593, "learning_rate": 3.4760729881921286e-06, "loss": 1.4963, "step": 5504 }, { "epoch": 0.7345876701361089, "grad_norm": 0.9683466520946085, "learning_rate": 3.472798107566161e-06, "loss": 1.6019, "step": 5505 }, { "epoch": 0.7347211102215105, "grad_norm": 0.9552291626544579, "learning_rate": 3.469524446147189e-06, "loss": 1.5571, "step": 5506 }, { "epoch": 0.7348545503069122, "grad_norm": 0.9311445516993911, "learning_rate": 3.466252004546702e-06, "loss": 1.5499, "step": 5507 }, { "epoch": 0.7349879903923139, "grad_norm": 1.1146712718754999, "learning_rate": 3.46298078337595e-06, "loss": 1.5284, "step": 5508 }, { "epoch": 0.7351214304777155, "grad_norm": 0.9571872321016519, "learning_rate": 3.4597107832459575e-06, "loss": 1.6109, "step": 5509 }, { "epoch": 0.7352548705631171, "grad_norm": 0.9544626733275546, "learning_rate": 3.456442004767531e-06, "loss": 1.584, "step": 5510 }, { "epoch": 0.7353883106485188, "grad_norm": 0.9436856603892109, "learning_rate": 3.4531744485512362e-06, "loss": 1.6014, "step": 5511 }, { "epoch": 0.7355217507339205, "grad_norm": 0.9393458177163648, "learning_rate": 3.4499081152074156e-06, "loss": 1.5539, "step": 5512 }, { "epoch": 0.7356551908193221, "grad_norm": 0.9422174761137512, "learning_rate": 3.446643005346181e-06, "loss": 1.569, "step": 5513 }, { "epoch": 0.7357886309047238, "grad_norm": 0.9392923260054774, "learning_rate": 3.4433791195774237e-06, "loss": 1.545, "step": 5514 }, { "epoch": 0.7359220709901254, "grad_norm": 0.9545864141748973, "learning_rate": 3.440116458510796e-06, "loss": 1.5286, "step": 5515 }, { "epoch": 0.7360555110755271, "grad_norm": 0.9852651403635309, "learning_rate": 3.4368550227557272e-06, "loss": 1.5654, "step": 5516 }, { "epoch": 0.7361889511609288, "grad_norm": 0.9339962134756217, "learning_rate": 3.433594812921416e-06, "loss": 1.507, "step": 5517 }, { "epoch": 0.7363223912463304, "grad_norm": 0.9437597981521707, "learning_rate": 3.4303358296168287e-06, "loss": 1.5623, "step": 5518 }, { "epoch": 0.7364558313317321, "grad_norm": 0.9428139658289024, "learning_rate": 3.4270780734507136e-06, "loss": 1.514, "step": 5519 }, { "epoch": 0.7365892714171337, "grad_norm": 0.9476341999117343, "learning_rate": 3.4238215450315794e-06, "loss": 1.5945, "step": 5520 }, { "epoch": 0.7367227115025353, "grad_norm": 0.9394390246936388, "learning_rate": 3.420566244967707e-06, "loss": 1.5188, "step": 5521 }, { "epoch": 0.736856151587937, "grad_norm": 0.9506593690055114, "learning_rate": 3.417312173867151e-06, "loss": 1.5694, "step": 5522 }, { "epoch": 0.7369895916733387, "grad_norm": 1.0129257670360825, "learning_rate": 3.4140593323377336e-06, "loss": 1.6033, "step": 5523 }, { "epoch": 0.7371230317587403, "grad_norm": 0.9480996004589485, "learning_rate": 3.410807720987046e-06, "loss": 1.5339, "step": 5524 }, { "epoch": 0.737256471844142, "grad_norm": 0.927903797277357, "learning_rate": 3.4075573404224594e-06, "loss": 1.5274, "step": 5525 }, { "epoch": 0.7373899119295436, "grad_norm": 0.9379989494588817, "learning_rate": 3.4043081912511033e-06, "loss": 1.5647, "step": 5526 }, { "epoch": 0.7375233520149453, "grad_norm": 0.9218438261963459, "learning_rate": 3.4010602740798795e-06, "loss": 1.5587, "step": 5527 }, { "epoch": 0.737656792100347, "grad_norm": 0.9400943430420226, "learning_rate": 3.3978135895154674e-06, "loss": 1.5383, "step": 5528 }, { "epoch": 0.7377902321857486, "grad_norm": 0.9268874491253772, "learning_rate": 3.394568138164308e-06, "loss": 1.5851, "step": 5529 }, { "epoch": 0.7379236722711503, "grad_norm": 0.9115511290678305, "learning_rate": 3.3913239206326154e-06, "loss": 1.5734, "step": 5530 }, { "epoch": 0.7380571123565519, "grad_norm": 0.9222620027598017, "learning_rate": 3.388080937526368e-06, "loss": 1.5531, "step": 5531 }, { "epoch": 0.7381905524419535, "grad_norm": 0.9826304238549518, "learning_rate": 3.3848391894513253e-06, "loss": 1.5357, "step": 5532 }, { "epoch": 0.7383239925273553, "grad_norm": 0.9812896742852667, "learning_rate": 3.381598677013006e-06, "loss": 1.6455, "step": 5533 }, { "epoch": 0.7384574326127569, "grad_norm": 1.1940903464741186, "learning_rate": 3.3783594008166963e-06, "loss": 1.5608, "step": 5534 }, { "epoch": 0.7385908726981585, "grad_norm": 1.0051527729116323, "learning_rate": 3.3751213614674647e-06, "loss": 1.4906, "step": 5535 }, { "epoch": 0.7387243127835602, "grad_norm": 0.9448310950987612, "learning_rate": 3.3718845595701356e-06, "loss": 1.5768, "step": 5536 }, { "epoch": 0.7388577528689618, "grad_norm": 0.9253739341701587, "learning_rate": 3.3686489957293057e-06, "loss": 1.5335, "step": 5537 }, { "epoch": 0.7389911929543634, "grad_norm": 0.9565771682432299, "learning_rate": 3.3654146705493407e-06, "loss": 1.5332, "step": 5538 }, { "epoch": 0.7391246330397652, "grad_norm": 0.9417608192865259, "learning_rate": 3.3621815846343797e-06, "loss": 1.5608, "step": 5539 }, { "epoch": 0.7392580731251668, "grad_norm": 0.9655822978265454, "learning_rate": 3.3589497385883253e-06, "loss": 1.5471, "step": 5540 }, { "epoch": 0.7393915132105685, "grad_norm": 0.9690529415186309, "learning_rate": 3.355719133014844e-06, "loss": 1.5502, "step": 5541 }, { "epoch": 0.7395249532959701, "grad_norm": 1.2161957457480974, "learning_rate": 3.3524897685173853e-06, "loss": 1.5819, "step": 5542 }, { "epoch": 0.7396583933813717, "grad_norm": 0.9441515276869841, "learning_rate": 3.3492616456991524e-06, "loss": 1.5041, "step": 5543 }, { "epoch": 0.7397918334667735, "grad_norm": 0.960027360606955, "learning_rate": 3.346034765163123e-06, "loss": 1.5315, "step": 5544 }, { "epoch": 0.7399252735521751, "grad_norm": 0.9330330601144551, "learning_rate": 3.342809127512038e-06, "loss": 1.5264, "step": 5545 }, { "epoch": 0.7400587136375767, "grad_norm": 1.1030181822089198, "learning_rate": 3.3395847333484153e-06, "loss": 1.5608, "step": 5546 }, { "epoch": 0.7401921537229784, "grad_norm": 0.9682087080598382, "learning_rate": 3.3363615832745344e-06, "loss": 1.5755, "step": 5547 }, { "epoch": 0.74032559380838, "grad_norm": 0.9159493952844268, "learning_rate": 3.333139677892436e-06, "loss": 1.555, "step": 5548 }, { "epoch": 0.7404590338937816, "grad_norm": 0.9659206359188842, "learning_rate": 3.3299190178039464e-06, "loss": 1.6404, "step": 5549 }, { "epoch": 0.7405924739791834, "grad_norm": 1.1568043350228006, "learning_rate": 3.3266996036106415e-06, "loss": 1.5864, "step": 5550 }, { "epoch": 0.740725914064585, "grad_norm": 0.923838794736863, "learning_rate": 3.323481435913871e-06, "loss": 1.5286, "step": 5551 }, { "epoch": 0.7408593541499866, "grad_norm": 0.9701142939931843, "learning_rate": 3.320264515314752e-06, "loss": 1.515, "step": 5552 }, { "epoch": 0.7409927942353883, "grad_norm": 0.9266738295815183, "learning_rate": 3.3170488424141713e-06, "loss": 1.5394, "step": 5553 }, { "epoch": 0.74112623432079, "grad_norm": 0.9206286448502721, "learning_rate": 3.31383441781278e-06, "loss": 1.5438, "step": 5554 }, { "epoch": 0.7412596744061917, "grad_norm": 0.931353809690202, "learning_rate": 3.310621242110994e-06, "loss": 1.5102, "step": 5555 }, { "epoch": 0.7413931144915933, "grad_norm": 0.9508289819339473, "learning_rate": 3.307409315908995e-06, "loss": 1.5811, "step": 5556 }, { "epoch": 0.7415265545769949, "grad_norm": 0.959176272650246, "learning_rate": 3.3041986398067416e-06, "loss": 1.599, "step": 5557 }, { "epoch": 0.7416599946623966, "grad_norm": 0.9375548404744494, "learning_rate": 3.3009892144039478e-06, "loss": 1.5679, "step": 5558 }, { "epoch": 0.7417934347477982, "grad_norm": 0.9217050733117328, "learning_rate": 3.2977810403000977e-06, "loss": 1.5366, "step": 5559 }, { "epoch": 0.7419268748331999, "grad_norm": 0.9481402337066958, "learning_rate": 3.29457411809444e-06, "loss": 1.5539, "step": 5560 }, { "epoch": 0.7420603149186016, "grad_norm": 0.9269200198823844, "learning_rate": 3.291368448385992e-06, "loss": 1.511, "step": 5561 }, { "epoch": 0.7421937550040032, "grad_norm": 0.9484784210641377, "learning_rate": 3.2881640317735386e-06, "loss": 1.5821, "step": 5562 }, { "epoch": 0.7423271950894048, "grad_norm": 0.9728501168391839, "learning_rate": 3.2849608688556276e-06, "loss": 1.5608, "step": 5563 }, { "epoch": 0.7424606351748065, "grad_norm": 0.9582363784642031, "learning_rate": 3.2817589602305732e-06, "loss": 1.5794, "step": 5564 }, { "epoch": 0.7425940752602082, "grad_norm": 0.9398306842093537, "learning_rate": 3.2785583064964545e-06, "loss": 1.5571, "step": 5565 }, { "epoch": 0.7427275153456098, "grad_norm": 0.9467894900781275, "learning_rate": 3.2753589082511152e-06, "loss": 1.5512, "step": 5566 }, { "epoch": 0.7428609554310115, "grad_norm": 0.9367465516942596, "learning_rate": 3.2721607660921716e-06, "loss": 1.5603, "step": 5567 }, { "epoch": 0.7429943955164131, "grad_norm": 0.9499101812547797, "learning_rate": 3.2689638806169977e-06, "loss": 1.5349, "step": 5568 }, { "epoch": 0.7431278356018148, "grad_norm": 0.9191430811722798, "learning_rate": 3.265768252422734e-06, "loss": 1.4816, "step": 5569 }, { "epoch": 0.7432612756872164, "grad_norm": 1.0165076519034235, "learning_rate": 3.2625738821062868e-06, "loss": 1.5599, "step": 5570 }, { "epoch": 0.7433947157726181, "grad_norm": 0.9387670444451917, "learning_rate": 3.259380770264332e-06, "loss": 1.5309, "step": 5571 }, { "epoch": 0.7435281558580198, "grad_norm": 0.970178143993145, "learning_rate": 3.256188917493306e-06, "loss": 1.5265, "step": 5572 }, { "epoch": 0.7436615959434214, "grad_norm": 0.9464180115309929, "learning_rate": 3.2529983243894046e-06, "loss": 1.5373, "step": 5573 }, { "epoch": 0.743795036028823, "grad_norm": 0.9513946770758279, "learning_rate": 3.2498089915486032e-06, "loss": 1.5375, "step": 5574 }, { "epoch": 0.7439284761142247, "grad_norm": 0.9395784172257111, "learning_rate": 3.2466209195666266e-06, "loss": 1.5607, "step": 5575 }, { "epoch": 0.7440619161996264, "grad_norm": 1.081019248269522, "learning_rate": 3.2434341090389734e-06, "loss": 1.5749, "step": 5576 }, { "epoch": 0.744195356285028, "grad_norm": 0.9327852617129883, "learning_rate": 3.240248560560899e-06, "loss": 1.5236, "step": 5577 }, { "epoch": 0.7443287963704297, "grad_norm": 0.9878773231924048, "learning_rate": 3.237064274727433e-06, "loss": 1.5515, "step": 5578 }, { "epoch": 0.7444622364558313, "grad_norm": 1.010601298560983, "learning_rate": 3.233881252133363e-06, "loss": 1.5761, "step": 5579 }, { "epoch": 0.7445956765412329, "grad_norm": 0.9550690993331595, "learning_rate": 3.230699493373236e-06, "loss": 1.5799, "step": 5580 }, { "epoch": 0.7447291166266347, "grad_norm": 0.979629622187405, "learning_rate": 3.2275189990413746e-06, "loss": 1.5323, "step": 5581 }, { "epoch": 0.7448625567120363, "grad_norm": 1.0433957484563827, "learning_rate": 3.224339769731858e-06, "loss": 1.4937, "step": 5582 }, { "epoch": 0.744995996797438, "grad_norm": 0.9320393784593036, "learning_rate": 3.2211618060385285e-06, "loss": 1.5419, "step": 5583 }, { "epoch": 0.7451294368828396, "grad_norm": 0.9316119615350978, "learning_rate": 3.2179851085549897e-06, "loss": 1.5457, "step": 5584 }, { "epoch": 0.7452628769682412, "grad_norm": 0.9283803212880422, "learning_rate": 3.2148096778746195e-06, "loss": 1.5546, "step": 5585 }, { "epoch": 0.745396317053643, "grad_norm": 0.9313780199697975, "learning_rate": 3.21163551459055e-06, "loss": 1.5485, "step": 5586 }, { "epoch": 0.7455297571390446, "grad_norm": 1.001529063138306, "learning_rate": 3.2084626192956745e-06, "loss": 1.5779, "step": 5587 }, { "epoch": 0.7456631972244462, "grad_norm": 1.0662137147201587, "learning_rate": 3.2052909925826604e-06, "loss": 1.5484, "step": 5588 }, { "epoch": 0.7457966373098479, "grad_norm": 0.9485702607614251, "learning_rate": 3.202120635043928e-06, "loss": 1.5674, "step": 5589 }, { "epoch": 0.7459300773952495, "grad_norm": 0.9472743543057982, "learning_rate": 3.198951547271665e-06, "loss": 1.5625, "step": 5590 }, { "epoch": 0.7460635174806511, "grad_norm": 0.9051518915153008, "learning_rate": 3.1957837298578154e-06, "loss": 1.5522, "step": 5591 }, { "epoch": 0.7461969575660529, "grad_norm": 0.9130917382566023, "learning_rate": 3.1926171833941e-06, "loss": 1.5459, "step": 5592 }, { "epoch": 0.7463303976514545, "grad_norm": 1.149337316223058, "learning_rate": 3.18945190847199e-06, "loss": 1.5367, "step": 5593 }, { "epoch": 0.7464638377368562, "grad_norm": 1.0675304023468861, "learning_rate": 3.1862879056827225e-06, "loss": 1.5151, "step": 5594 }, { "epoch": 0.7465972778222578, "grad_norm": 0.9568872060715193, "learning_rate": 3.1831251756172943e-06, "loss": 1.5452, "step": 5595 }, { "epoch": 0.7467307179076594, "grad_norm": 1.0314564637633439, "learning_rate": 3.1799637188664736e-06, "loss": 1.5902, "step": 5596 }, { "epoch": 0.7468641579930612, "grad_norm": 0.9252768296530237, "learning_rate": 3.1768035360207806e-06, "loss": 1.5499, "step": 5597 }, { "epoch": 0.7469975980784628, "grad_norm": 1.2637363917190654, "learning_rate": 3.173644627670499e-06, "loss": 1.505, "step": 5598 }, { "epoch": 0.7471310381638644, "grad_norm": 1.0200228137515415, "learning_rate": 3.1704869944056826e-06, "loss": 1.583, "step": 5599 }, { "epoch": 0.7472644782492661, "grad_norm": 1.2623537137006278, "learning_rate": 3.1673306368161394e-06, "loss": 1.5582, "step": 5600 }, { "epoch": 0.7473979183346677, "grad_norm": 0.9942148402882126, "learning_rate": 3.16417555549144e-06, "loss": 1.5138, "step": 5601 }, { "epoch": 0.7475313584200693, "grad_norm": 0.9769665826208432, "learning_rate": 3.1610217510209184e-06, "loss": 1.5111, "step": 5602 }, { "epoch": 0.7476647985054711, "grad_norm": 1.0229850987939395, "learning_rate": 3.1578692239936693e-06, "loss": 1.5134, "step": 5603 }, { "epoch": 0.7477982385908727, "grad_norm": 0.9351520451791383, "learning_rate": 3.1547179749985445e-06, "loss": 1.6311, "step": 5604 }, { "epoch": 0.7479316786762743, "grad_norm": 0.9204017069129004, "learning_rate": 3.1515680046241693e-06, "loss": 1.5303, "step": 5605 }, { "epoch": 0.748065118761676, "grad_norm": 0.9599046278620328, "learning_rate": 3.148419313458918e-06, "loss": 1.5719, "step": 5606 }, { "epoch": 0.7481985588470776, "grad_norm": 0.948039514652451, "learning_rate": 3.1452719020909317e-06, "loss": 1.5452, "step": 5607 }, { "epoch": 0.7483319989324794, "grad_norm": 0.9481417279723116, "learning_rate": 3.1421257711081097e-06, "loss": 1.5983, "step": 5608 }, { "epoch": 0.748465439017881, "grad_norm": 0.9485239077024988, "learning_rate": 3.13898092109811e-06, "loss": 1.531, "step": 5609 }, { "epoch": 0.7485988791032826, "grad_norm": 1.1358614513622873, "learning_rate": 3.135837352648362e-06, "loss": 1.5357, "step": 5610 }, { "epoch": 0.7487323191886843, "grad_norm": 0.970237163017964, "learning_rate": 3.1326950663460466e-06, "loss": 1.5521, "step": 5611 }, { "epoch": 0.7488657592740859, "grad_norm": 0.9433944906141754, "learning_rate": 3.1295540627781006e-06, "loss": 1.6221, "step": 5612 }, { "epoch": 0.7489991993594876, "grad_norm": 1.0020127389963633, "learning_rate": 3.1264143425312366e-06, "loss": 1.5551, "step": 5613 }, { "epoch": 0.7491326394448893, "grad_norm": 0.9670401814605709, "learning_rate": 3.123275906191915e-06, "loss": 1.5776, "step": 5614 }, { "epoch": 0.7492660795302909, "grad_norm": 0.9361899965589122, "learning_rate": 3.120138754346359e-06, "loss": 1.5674, "step": 5615 }, { "epoch": 0.7493995196156925, "grad_norm": 0.976493101276714, "learning_rate": 3.1170028875805504e-06, "loss": 1.5411, "step": 5616 }, { "epoch": 0.7495329597010942, "grad_norm": 0.9634598582821797, "learning_rate": 3.11386830648024e-06, "loss": 1.5704, "step": 5617 }, { "epoch": 0.7496663997864959, "grad_norm": 0.9640441671842809, "learning_rate": 3.1107350116309275e-06, "loss": 1.5383, "step": 5618 }, { "epoch": 0.7497998398718975, "grad_norm": 0.9555181724485184, "learning_rate": 3.1076030036178763e-06, "loss": 1.6026, "step": 5619 }, { "epoch": 0.7499332799572992, "grad_norm": 1.2807705278680843, "learning_rate": 3.104472283026113e-06, "loss": 1.5881, "step": 5620 }, { "epoch": 0.7500667200427008, "grad_norm": 0.9356127813233456, "learning_rate": 3.1013428504404187e-06, "loss": 1.561, "step": 5621 }, { "epoch": 0.7502001601281025, "grad_norm": 0.9571532107573171, "learning_rate": 3.098214706445336e-06, "loss": 1.5119, "step": 5622 }, { "epoch": 0.7503336002135041, "grad_norm": 0.9491196174548282, "learning_rate": 3.0950878516251636e-06, "loss": 1.5096, "step": 5623 }, { "epoch": 0.7504670402989058, "grad_norm": 0.9417652753789462, "learning_rate": 3.0919622865639677e-06, "loss": 1.5102, "step": 5624 }, { "epoch": 0.7506004803843075, "grad_norm": 0.9630569879115026, "learning_rate": 3.088838011845566e-06, "loss": 1.5818, "step": 5625 }, { "epoch": 0.7507339204697091, "grad_norm": 0.9368704080314552, "learning_rate": 3.0857150280535365e-06, "loss": 1.5337, "step": 5626 }, { "epoch": 0.7508673605551107, "grad_norm": 0.9571714147311596, "learning_rate": 3.082593335771216e-06, "loss": 1.5982, "step": 5627 }, { "epoch": 0.7510008006405124, "grad_norm": 0.9254871682343206, "learning_rate": 3.079472935581704e-06, "loss": 1.5988, "step": 5628 }, { "epoch": 0.7511342407259141, "grad_norm": 0.9612391153764122, "learning_rate": 3.0763538280678563e-06, "loss": 1.5205, "step": 5629 }, { "epoch": 0.7512676808113157, "grad_norm": 0.952348233253775, "learning_rate": 3.0732360138122806e-06, "loss": 1.5686, "step": 5630 }, { "epoch": 0.7514011208967174, "grad_norm": 0.9494761120662082, "learning_rate": 3.0701194933973566e-06, "loss": 1.5929, "step": 5631 }, { "epoch": 0.751534560982119, "grad_norm": 0.917699574591178, "learning_rate": 3.0670042674052116e-06, "loss": 1.524, "step": 5632 }, { "epoch": 0.7516680010675206, "grad_norm": 1.0333026625817825, "learning_rate": 3.0638903364177343e-06, "loss": 1.5439, "step": 5633 }, { "epoch": 0.7518014411529224, "grad_norm": 0.9417912177877552, "learning_rate": 3.0607777010165683e-06, "loss": 1.6148, "step": 5634 }, { "epoch": 0.751934881238324, "grad_norm": 0.9406321738897914, "learning_rate": 3.057666361783126e-06, "loss": 1.583, "step": 5635 }, { "epoch": 0.7520683213237257, "grad_norm": 1.1578117225090305, "learning_rate": 3.054556319298565e-06, "loss": 1.5209, "step": 5636 }, { "epoch": 0.7522017614091273, "grad_norm": 0.9600519785804953, "learning_rate": 3.051447574143803e-06, "loss": 1.5374, "step": 5637 }, { "epoch": 0.7523352014945289, "grad_norm": 0.9270313809638928, "learning_rate": 3.048340126899526e-06, "loss": 1.5212, "step": 5638 }, { "epoch": 0.7524686415799307, "grad_norm": 0.9114207753606299, "learning_rate": 3.045233978146166e-06, "loss": 1.5157, "step": 5639 }, { "epoch": 0.7526020816653323, "grad_norm": 0.981499892052798, "learning_rate": 3.0421291284639143e-06, "loss": 1.5459, "step": 5640 }, { "epoch": 0.7527355217507339, "grad_norm": 0.9458653564470572, "learning_rate": 3.0390255784327215e-06, "loss": 1.5687, "step": 5641 }, { "epoch": 0.7528689618361356, "grad_norm": 1.2124512830726673, "learning_rate": 3.0359233286322986e-06, "loss": 1.5388, "step": 5642 }, { "epoch": 0.7530024019215372, "grad_norm": 0.9508117630773106, "learning_rate": 3.0328223796421107e-06, "loss": 1.5721, "step": 5643 }, { "epoch": 0.7531358420069388, "grad_norm": 0.9592182453362529, "learning_rate": 3.029722732041377e-06, "loss": 1.5769, "step": 5644 }, { "epoch": 0.7532692820923406, "grad_norm": 1.08965444186456, "learning_rate": 3.0266243864090772e-06, "loss": 1.5829, "step": 5645 }, { "epoch": 0.7534027221777422, "grad_norm": 2.9774916226895147, "learning_rate": 3.0235273433239475e-06, "loss": 1.5859, "step": 5646 }, { "epoch": 0.7535361622631438, "grad_norm": 1.1074468709821748, "learning_rate": 3.0204316033644765e-06, "loss": 1.5437, "step": 5647 }, { "epoch": 0.7536696023485455, "grad_norm": 0.9565752685671445, "learning_rate": 3.01733716710892e-06, "loss": 1.5924, "step": 5648 }, { "epoch": 0.7538030424339471, "grad_norm": 0.9585562387187216, "learning_rate": 3.0142440351352797e-06, "loss": 1.5454, "step": 5649 }, { "epoch": 0.7539364825193489, "grad_norm": 1.056117358661078, "learning_rate": 3.0111522080213185e-06, "loss": 1.5113, "step": 5650 }, { "epoch": 0.7540699226047505, "grad_norm": 1.2766508430903627, "learning_rate": 3.008061686344551e-06, "loss": 1.5445, "step": 5651 }, { "epoch": 0.7542033626901521, "grad_norm": 0.9812676587742001, "learning_rate": 3.0049724706822557e-06, "loss": 1.5591, "step": 5652 }, { "epoch": 0.7543368027755538, "grad_norm": 0.9349340551684687, "learning_rate": 3.001884561611463e-06, "loss": 1.5658, "step": 5653 }, { "epoch": 0.7544702428609554, "grad_norm": 1.2097788395210352, "learning_rate": 2.998797959708958e-06, "loss": 1.4703, "step": 5654 }, { "epoch": 0.754603682946357, "grad_norm": 0.9150765980891424, "learning_rate": 2.995712665551278e-06, "loss": 1.5338, "step": 5655 }, { "epoch": 0.7547371230317588, "grad_norm": 1.1493932341594215, "learning_rate": 2.9926286797147284e-06, "loss": 1.5119, "step": 5656 }, { "epoch": 0.7548705631171604, "grad_norm": 0.9295681174421622, "learning_rate": 2.989546002775361e-06, "loss": 1.5785, "step": 5657 }, { "epoch": 0.755004003202562, "grad_norm": 0.9844513532497011, "learning_rate": 2.9864646353089822e-06, "loss": 1.513, "step": 5658 }, { "epoch": 0.7551374432879637, "grad_norm": 0.9691752292557538, "learning_rate": 2.983384577891154e-06, "loss": 1.5196, "step": 5659 }, { "epoch": 0.7552708833733653, "grad_norm": 1.0348255829580888, "learning_rate": 2.980305831097203e-06, "loss": 1.5305, "step": 5660 }, { "epoch": 0.7554043234587671, "grad_norm": 0.9795461493134596, "learning_rate": 2.9772283955022006e-06, "loss": 1.5619, "step": 5661 }, { "epoch": 0.7555377635441687, "grad_norm": 0.9480708314986227, "learning_rate": 2.9741522716809734e-06, "loss": 1.5426, "step": 5662 }, { "epoch": 0.7556712036295703, "grad_norm": 0.9365703829056337, "learning_rate": 2.971077460208113e-06, "loss": 1.5243, "step": 5663 }, { "epoch": 0.755804643714972, "grad_norm": 0.9664797943870821, "learning_rate": 2.9680039616579558e-06, "loss": 1.5866, "step": 5664 }, { "epoch": 0.7559380838003736, "grad_norm": 0.9326504176989024, "learning_rate": 2.964931776604596e-06, "loss": 1.5703, "step": 5665 }, { "epoch": 0.7560715238857753, "grad_norm": 0.9662933536347439, "learning_rate": 2.9618609056218797e-06, "loss": 1.5854, "step": 5666 }, { "epoch": 0.756204963971177, "grad_norm": 1.0951318602049294, "learning_rate": 2.9587913492834174e-06, "loss": 1.5744, "step": 5667 }, { "epoch": 0.7563384040565786, "grad_norm": 0.9387753384543569, "learning_rate": 2.9557231081625637e-06, "loss": 1.591, "step": 5668 }, { "epoch": 0.7564718441419802, "grad_norm": 0.9885761349123516, "learning_rate": 2.9526561828324286e-06, "loss": 1.5488, "step": 5669 }, { "epoch": 0.7566052842273819, "grad_norm": 0.9272808115972163, "learning_rate": 2.9495905738658846e-06, "loss": 1.5201, "step": 5670 }, { "epoch": 0.7567387243127836, "grad_norm": 0.9268527670988117, "learning_rate": 2.946526281835549e-06, "loss": 1.571, "step": 5671 }, { "epoch": 0.7568721643981852, "grad_norm": 1.0147610911957214, "learning_rate": 2.9434633073137976e-06, "loss": 1.586, "step": 5672 }, { "epoch": 0.7570056044835869, "grad_norm": 0.9383855121743576, "learning_rate": 2.940401650872755e-06, "loss": 1.5405, "step": 5673 }, { "epoch": 0.7571390445689885, "grad_norm": 1.0683692922351982, "learning_rate": 2.9373413130843108e-06, "loss": 1.5319, "step": 5674 }, { "epoch": 0.7572724846543902, "grad_norm": 0.9501481416038345, "learning_rate": 2.934282294520099e-06, "loss": 1.5894, "step": 5675 }, { "epoch": 0.7574059247397918, "grad_norm": 0.9176546375459858, "learning_rate": 2.931224595751504e-06, "loss": 1.4954, "step": 5676 }, { "epoch": 0.7575393648251935, "grad_norm": 0.947992780591222, "learning_rate": 2.9281682173496764e-06, "loss": 1.5466, "step": 5677 }, { "epoch": 0.7576728049105952, "grad_norm": 0.9790622488674117, "learning_rate": 2.925113159885511e-06, "loss": 1.5469, "step": 5678 }, { "epoch": 0.7578062449959968, "grad_norm": 0.9636768377878497, "learning_rate": 2.922059423929656e-06, "loss": 1.5419, "step": 5679 }, { "epoch": 0.7579396850813984, "grad_norm": 0.9219842776449305, "learning_rate": 2.9190070100525124e-06, "loss": 1.4979, "step": 5680 }, { "epoch": 0.7580731251668001, "grad_norm": 0.9863967160238304, "learning_rate": 2.9159559188242428e-06, "loss": 1.5115, "step": 5681 }, { "epoch": 0.7582065652522018, "grad_norm": 0.9540589917238507, "learning_rate": 2.9129061508147514e-06, "loss": 1.5829, "step": 5682 }, { "epoch": 0.7583400053376034, "grad_norm": 0.9415496333259665, "learning_rate": 2.9098577065937027e-06, "loss": 1.5155, "step": 5683 }, { "epoch": 0.7584734454230051, "grad_norm": 1.0264214386562416, "learning_rate": 2.906810586730506e-06, "loss": 1.5762, "step": 5684 }, { "epoch": 0.7586068855084067, "grad_norm": 1.0085987174197029, "learning_rate": 2.903764791794337e-06, "loss": 1.6053, "step": 5685 }, { "epoch": 0.7587403255938083, "grad_norm": 0.9746022220580336, "learning_rate": 2.9007203223541104e-06, "loss": 1.5505, "step": 5686 }, { "epoch": 0.75887376567921, "grad_norm": 1.1436325712704063, "learning_rate": 2.8976771789784986e-06, "loss": 1.5532, "step": 5687 }, { "epoch": 0.7590072057646117, "grad_norm": 0.9458780762382005, "learning_rate": 2.894635362235928e-06, "loss": 1.5335, "step": 5688 }, { "epoch": 0.7591406458500134, "grad_norm": 0.9586718389826383, "learning_rate": 2.8915948726945696e-06, "loss": 1.5686, "step": 5689 }, { "epoch": 0.759274085935415, "grad_norm": 0.9374091977052081, "learning_rate": 2.8885557109223615e-06, "loss": 1.5533, "step": 5690 }, { "epoch": 0.7594075260208166, "grad_norm": 0.9210874426208345, "learning_rate": 2.8855178774869784e-06, "loss": 1.6033, "step": 5691 }, { "epoch": 0.7595409661062184, "grad_norm": 0.9425967721286689, "learning_rate": 2.882481372955855e-06, "loss": 1.5192, "step": 5692 }, { "epoch": 0.75967440619162, "grad_norm": 0.9409853280626861, "learning_rate": 2.879446197896176e-06, "loss": 1.5309, "step": 5693 }, { "epoch": 0.7598078462770216, "grad_norm": 0.9175409560809977, "learning_rate": 2.8764123528748724e-06, "loss": 1.5253, "step": 5694 }, { "epoch": 0.7599412863624233, "grad_norm": 1.0633079652522899, "learning_rate": 2.8733798384586398e-06, "loss": 1.5564, "step": 5695 }, { "epoch": 0.7600747264478249, "grad_norm": 1.0083618844062685, "learning_rate": 2.870348655213914e-06, "loss": 1.5223, "step": 5696 }, { "epoch": 0.7602081665332265, "grad_norm": 1.1484670895236269, "learning_rate": 2.8673188037068843e-06, "loss": 1.5488, "step": 5697 }, { "epoch": 0.7603416066186283, "grad_norm": 0.9648612510439487, "learning_rate": 2.8642902845034914e-06, "loss": 1.62, "step": 5698 }, { "epoch": 0.7604750467040299, "grad_norm": 0.9409488162926377, "learning_rate": 2.8612630981694322e-06, "loss": 1.5146, "step": 5699 }, { "epoch": 0.7606084867894315, "grad_norm": 1.0185521319833666, "learning_rate": 2.85823724527015e-06, "loss": 1.5524, "step": 5700 }, { "epoch": 0.7607419268748332, "grad_norm": 0.9261998030680365, "learning_rate": 2.855212726370834e-06, "loss": 1.5362, "step": 5701 }, { "epoch": 0.7608753669602348, "grad_norm": 0.9131576322837639, "learning_rate": 2.852189542036439e-06, "loss": 1.5431, "step": 5702 }, { "epoch": 0.7610088070456366, "grad_norm": 0.9672674722537751, "learning_rate": 2.849167692831655e-06, "loss": 1.5413, "step": 5703 }, { "epoch": 0.7611422471310382, "grad_norm": 0.9672341780718317, "learning_rate": 2.8461471793209318e-06, "loss": 1.5816, "step": 5704 }, { "epoch": 0.7612756872164398, "grad_norm": 1.024093269678312, "learning_rate": 2.8431280020684615e-06, "loss": 1.5202, "step": 5705 }, { "epoch": 0.7614091273018415, "grad_norm": 0.9730376429468712, "learning_rate": 2.840110161638201e-06, "loss": 1.5619, "step": 5706 }, { "epoch": 0.7615425673872431, "grad_norm": 0.9277118027287636, "learning_rate": 2.8370936585938425e-06, "loss": 1.5741, "step": 5707 }, { "epoch": 0.7616760074726447, "grad_norm": 0.9385522263679275, "learning_rate": 2.834078493498833e-06, "loss": 1.5243, "step": 5708 }, { "epoch": 0.7618094475580465, "grad_norm": 0.921781169116257, "learning_rate": 2.8310646669163777e-06, "loss": 1.5539, "step": 5709 }, { "epoch": 0.7619428876434481, "grad_norm": 0.9708668936556057, "learning_rate": 2.8280521794094205e-06, "loss": 1.5721, "step": 5710 }, { "epoch": 0.7620763277288497, "grad_norm": 0.9753523420806991, "learning_rate": 2.8250410315406606e-06, "loss": 1.5386, "step": 5711 }, { "epoch": 0.7622097678142514, "grad_norm": 1.1177484636132677, "learning_rate": 2.822031223872543e-06, "loss": 1.5526, "step": 5712 }, { "epoch": 0.762343207899653, "grad_norm": 0.9770528135249951, "learning_rate": 2.8190227569672722e-06, "loss": 1.5853, "step": 5713 }, { "epoch": 0.7624766479850547, "grad_norm": 1.3032469298982818, "learning_rate": 2.8160156313867915e-06, "loss": 1.6291, "step": 5714 }, { "epoch": 0.7626100880704564, "grad_norm": 0.9638843608824444, "learning_rate": 2.813009847692795e-06, "loss": 1.5735, "step": 5715 }, { "epoch": 0.762743528155858, "grad_norm": 1.1791110749699807, "learning_rate": 2.8100054064467355e-06, "loss": 1.5285, "step": 5716 }, { "epoch": 0.7628769682412597, "grad_norm": 16.86319433750556, "learning_rate": 2.807002308209804e-06, "loss": 1.5693, "step": 5717 }, { "epoch": 0.7630104083266613, "grad_norm": 0.9439903919518662, "learning_rate": 2.8040005535429472e-06, "loss": 1.5474, "step": 5718 }, { "epoch": 0.763143848412063, "grad_norm": 0.961074528153183, "learning_rate": 2.8010001430068535e-06, "loss": 1.5005, "step": 5719 }, { "epoch": 0.7632772884974647, "grad_norm": 0.9524749124010052, "learning_rate": 2.7980010771619727e-06, "loss": 1.5212, "step": 5720 }, { "epoch": 0.7634107285828663, "grad_norm": 0.9533980355725618, "learning_rate": 2.795003356568492e-06, "loss": 1.5894, "step": 5721 }, { "epoch": 0.7635441686682679, "grad_norm": 0.9213712133256243, "learning_rate": 2.792006981786354e-06, "loss": 1.5208, "step": 5722 }, { "epoch": 0.7636776087536696, "grad_norm": 0.9294323574854917, "learning_rate": 2.7890119533752415e-06, "loss": 1.5441, "step": 5723 }, { "epoch": 0.7638110488390712, "grad_norm": 0.9784510201172563, "learning_rate": 2.7860182718945993e-06, "loss": 1.581, "step": 5724 }, { "epoch": 0.7639444889244729, "grad_norm": 0.9588683754336024, "learning_rate": 2.7830259379036095e-06, "loss": 1.5307, "step": 5725 }, { "epoch": 0.7640779290098746, "grad_norm": 0.9143625087310671, "learning_rate": 2.7800349519612023e-06, "loss": 1.5518, "step": 5726 }, { "epoch": 0.7642113690952762, "grad_norm": 0.9792840961883873, "learning_rate": 2.777045314626068e-06, "loss": 1.4893, "step": 5727 }, { "epoch": 0.7643448091806779, "grad_norm": 0.9591101744910538, "learning_rate": 2.7740570264566325e-06, "loss": 1.576, "step": 5728 }, { "epoch": 0.7644782492660795, "grad_norm": 1.0493610713948585, "learning_rate": 2.771070088011073e-06, "loss": 1.5944, "step": 5729 }, { "epoch": 0.7646116893514812, "grad_norm": 0.940470963878361, "learning_rate": 2.7680844998473176e-06, "loss": 1.6017, "step": 5730 }, { "epoch": 0.7647451294368829, "grad_norm": 0.9114372371817887, "learning_rate": 2.7651002625230394e-06, "loss": 1.5043, "step": 5731 }, { "epoch": 0.7648785695222845, "grad_norm": 0.9514804602724689, "learning_rate": 2.7621173765956553e-06, "loss": 1.556, "step": 5732 }, { "epoch": 0.7650120096076861, "grad_norm": 0.931967453227579, "learning_rate": 2.7591358426223437e-06, "loss": 1.5529, "step": 5733 }, { "epoch": 0.7651454496930878, "grad_norm": 0.9641034664380269, "learning_rate": 2.756155661160015e-06, "loss": 1.5584, "step": 5734 }, { "epoch": 0.7652788897784895, "grad_norm": 1.0406330527700243, "learning_rate": 2.753176832765334e-06, "loss": 1.5685, "step": 5735 }, { "epoch": 0.7654123298638911, "grad_norm": 1.1171214620153784, "learning_rate": 2.750199357994714e-06, "loss": 1.5532, "step": 5736 }, { "epoch": 0.7655457699492928, "grad_norm": 0.9866430386535575, "learning_rate": 2.7472232374043084e-06, "loss": 1.5261, "step": 5737 }, { "epoch": 0.7656792100346944, "grad_norm": 0.9361087966486119, "learning_rate": 2.7442484715500286e-06, "loss": 1.5359, "step": 5738 }, { "epoch": 0.765812650120096, "grad_norm": 0.9609366943677766, "learning_rate": 2.741275060987525e-06, "loss": 1.544, "step": 5739 }, { "epoch": 0.7659460902054978, "grad_norm": 1.0456283664316062, "learning_rate": 2.7383030062721926e-06, "loss": 1.5098, "step": 5740 }, { "epoch": 0.7660795302908994, "grad_norm": 0.9071979960109646, "learning_rate": 2.7353323079591842e-06, "loss": 1.5621, "step": 5741 }, { "epoch": 0.7662129703763011, "grad_norm": 0.9422303587878669, "learning_rate": 2.7323629666033903e-06, "loss": 1.5661, "step": 5742 }, { "epoch": 0.7663464104617027, "grad_norm": 1.0122385866589452, "learning_rate": 2.729394982759449e-06, "loss": 1.5319, "step": 5743 }, { "epoch": 0.7664798505471043, "grad_norm": 1.1141139885458986, "learning_rate": 2.726428356981742e-06, "loss": 1.5295, "step": 5744 }, { "epoch": 0.766613290632506, "grad_norm": 0.982862912585683, "learning_rate": 2.7234630898244084e-06, "loss": 1.5591, "step": 5745 }, { "epoch": 0.7667467307179077, "grad_norm": 1.075501803864991, "learning_rate": 2.720499181841324e-06, "loss": 1.5615, "step": 5746 }, { "epoch": 0.7668801708033093, "grad_norm": 1.028976839916406, "learning_rate": 2.7175366335861087e-06, "loss": 1.5477, "step": 5747 }, { "epoch": 0.767013610888711, "grad_norm": 1.0645308095468675, "learning_rate": 2.714575445612139e-06, "loss": 1.5542, "step": 5748 }, { "epoch": 0.7671470509741126, "grad_norm": 0.9392883647391629, "learning_rate": 2.7116156184725285e-06, "loss": 1.5699, "step": 5749 }, { "epoch": 0.7672804910595142, "grad_norm": 0.965446906684568, "learning_rate": 2.708657152720139e-06, "loss": 1.5796, "step": 5750 }, { "epoch": 0.767413931144916, "grad_norm": 1.0070562909813836, "learning_rate": 2.705700048907576e-06, "loss": 1.5419, "step": 5751 }, { "epoch": 0.7675473712303176, "grad_norm": 0.9415702130765551, "learning_rate": 2.7027443075871974e-06, "loss": 1.5344, "step": 5752 }, { "epoch": 0.7676808113157192, "grad_norm": 1.007240476174913, "learning_rate": 2.6997899293110997e-06, "loss": 1.5223, "step": 5753 }, { "epoch": 0.7678142514011209, "grad_norm": 0.9583583605966772, "learning_rate": 2.696836914631127e-06, "loss": 1.4856, "step": 5754 }, { "epoch": 0.7679476914865225, "grad_norm": 0.9286247656808373, "learning_rate": 2.6938852640988666e-06, "loss": 1.5845, "step": 5755 }, { "epoch": 0.7680811315719243, "grad_norm": 1.0066591400064955, "learning_rate": 2.690934978265659e-06, "loss": 1.5767, "step": 5756 }, { "epoch": 0.7682145716573259, "grad_norm": 1.052076839954032, "learning_rate": 2.68798605768258e-06, "loss": 1.5927, "step": 5757 }, { "epoch": 0.7683480117427275, "grad_norm": 1.0098347870196225, "learning_rate": 2.685038502900452e-06, "loss": 1.5831, "step": 5758 }, { "epoch": 0.7684814518281292, "grad_norm": 1.0546240360318806, "learning_rate": 2.682092314469851e-06, "loss": 1.5924, "step": 5759 }, { "epoch": 0.7686148919135308, "grad_norm": 0.985560507673923, "learning_rate": 2.6791474929410877e-06, "loss": 1.5819, "step": 5760 }, { "epoch": 0.7687483319989324, "grad_norm": 0.9490509821938993, "learning_rate": 2.6762040388642217e-06, "loss": 1.5278, "step": 5761 }, { "epoch": 0.7688817720843342, "grad_norm": 0.9668194942766439, "learning_rate": 2.6732619527890547e-06, "loss": 1.5719, "step": 5762 }, { "epoch": 0.7690152121697358, "grad_norm": 1.0376817559211673, "learning_rate": 2.670321235265139e-06, "loss": 1.5694, "step": 5763 }, { "epoch": 0.7691486522551374, "grad_norm": 1.1459513474130882, "learning_rate": 2.6673818868417646e-06, "loss": 1.5727, "step": 5764 }, { "epoch": 0.7692820923405391, "grad_norm": 0.9389863744334982, "learning_rate": 2.6644439080679662e-06, "loss": 1.5679, "step": 5765 }, { "epoch": 0.7694155324259407, "grad_norm": 0.9437950047313849, "learning_rate": 2.6615072994925308e-06, "loss": 1.5559, "step": 5766 }, { "epoch": 0.7695489725113424, "grad_norm": 0.955667607095649, "learning_rate": 2.65857206166398e-06, "loss": 1.526, "step": 5767 }, { "epoch": 0.7696824125967441, "grad_norm": 0.8988996567956318, "learning_rate": 2.655638195130582e-06, "loss": 1.4886, "step": 5768 }, { "epoch": 0.7698158526821457, "grad_norm": 1.0552144404288448, "learning_rate": 2.652705700440348e-06, "loss": 1.5902, "step": 5769 }, { "epoch": 0.7699492927675474, "grad_norm": 0.9570299871105503, "learning_rate": 2.64977457814104e-06, "loss": 1.5706, "step": 5770 }, { "epoch": 0.770082732852949, "grad_norm": 0.9664283679747206, "learning_rate": 2.6468448287801552e-06, "loss": 1.5595, "step": 5771 }, { "epoch": 0.7702161729383507, "grad_norm": 0.929178580968516, "learning_rate": 2.6439164529049376e-06, "loss": 1.5301, "step": 5772 }, { "epoch": 0.7703496130237524, "grad_norm": 0.9305117830654621, "learning_rate": 2.6409894510623744e-06, "loss": 1.5678, "step": 5773 }, { "epoch": 0.770483053109154, "grad_norm": 1.0322434008439814, "learning_rate": 2.6380638237991963e-06, "loss": 1.5298, "step": 5774 }, { "epoch": 0.7706164931945556, "grad_norm": 0.9590848759112884, "learning_rate": 2.6351395716618746e-06, "loss": 1.5437, "step": 5775 }, { "epoch": 0.7707499332799573, "grad_norm": 0.9231804585207396, "learning_rate": 2.632216695196631e-06, "loss": 1.4736, "step": 5776 }, { "epoch": 0.770883373365359, "grad_norm": 0.9786231108386787, "learning_rate": 2.6292951949494237e-06, "loss": 1.5593, "step": 5777 }, { "epoch": 0.7710168134507606, "grad_norm": 0.9603881195608965, "learning_rate": 2.626375071465955e-06, "loss": 1.5821, "step": 5778 }, { "epoch": 0.7711502535361623, "grad_norm": 0.9430362885685091, "learning_rate": 2.623456325291669e-06, "loss": 1.5404, "step": 5779 }, { "epoch": 0.7712836936215639, "grad_norm": 0.9446912811828378, "learning_rate": 2.6205389569717586e-06, "loss": 1.5394, "step": 5780 }, { "epoch": 0.7714171337069655, "grad_norm": 0.9412298808949228, "learning_rate": 2.6176229670511533e-06, "loss": 1.5538, "step": 5781 }, { "epoch": 0.7715505737923672, "grad_norm": 1.3095768335252589, "learning_rate": 2.6147083560745257e-06, "loss": 1.5564, "step": 5782 }, { "epoch": 0.7716840138777689, "grad_norm": 0.9918252751196945, "learning_rate": 2.6117951245862893e-06, "loss": 1.5451, "step": 5783 }, { "epoch": 0.7718174539631706, "grad_norm": 0.996958048566631, "learning_rate": 2.608883273130609e-06, "loss": 1.5229, "step": 5784 }, { "epoch": 0.7719508940485722, "grad_norm": 0.957110530564296, "learning_rate": 2.6059728022513832e-06, "loss": 1.55, "step": 5785 }, { "epoch": 0.7720843341339738, "grad_norm": 1.0399836848407493, "learning_rate": 2.603063712492252e-06, "loss": 1.579, "step": 5786 }, { "epoch": 0.7722177742193755, "grad_norm": 0.9408064311310185, "learning_rate": 2.6001560043966e-06, "loss": 1.5572, "step": 5787 }, { "epoch": 0.7723512143047772, "grad_norm": 0.9412201777682861, "learning_rate": 2.5972496785075594e-06, "loss": 1.544, "step": 5788 }, { "epoch": 0.7724846543901788, "grad_norm": 1.0115310422207648, "learning_rate": 2.594344735367995e-06, "loss": 1.5008, "step": 5789 }, { "epoch": 0.7726180944755805, "grad_norm": 0.9227438226603408, "learning_rate": 2.591441175520514e-06, "loss": 1.5712, "step": 5790 }, { "epoch": 0.7727515345609821, "grad_norm": 0.9594213056108911, "learning_rate": 2.5885389995074752e-06, "loss": 1.5903, "step": 5791 }, { "epoch": 0.7728849746463837, "grad_norm": 1.086165595831062, "learning_rate": 2.5856382078709685e-06, "loss": 1.5691, "step": 5792 }, { "epoch": 0.7730184147317855, "grad_norm": 0.9505020761449048, "learning_rate": 2.5827388011528296e-06, "loss": 1.5793, "step": 5793 }, { "epoch": 0.7731518548171871, "grad_norm": 3.5110780857670107, "learning_rate": 2.57984077989463e-06, "loss": 1.5527, "step": 5794 }, { "epoch": 0.7732852949025887, "grad_norm": 0.9455488863259419, "learning_rate": 2.576944144637694e-06, "loss": 1.5232, "step": 5795 }, { "epoch": 0.7734187349879904, "grad_norm": 0.9672606415809805, "learning_rate": 2.574048895923078e-06, "loss": 1.5559, "step": 5796 }, { "epoch": 0.773552175073392, "grad_norm": 0.9385093821290592, "learning_rate": 2.571155034291577e-06, "loss": 1.5565, "step": 5797 }, { "epoch": 0.7736856151587937, "grad_norm": 0.9520678330195554, "learning_rate": 2.5682625602837384e-06, "loss": 1.5558, "step": 5798 }, { "epoch": 0.7738190552441954, "grad_norm": 0.99051318050388, "learning_rate": 2.5653714744398393e-06, "loss": 1.5109, "step": 5799 }, { "epoch": 0.773952495329597, "grad_norm": 0.984714941570732, "learning_rate": 2.5624817772999033e-06, "loss": 1.5115, "step": 5800 }, { "epoch": 0.7740859354149987, "grad_norm": 0.9194604559966288, "learning_rate": 2.5595934694036882e-06, "loss": 1.4929, "step": 5801 }, { "epoch": 0.7742193755004003, "grad_norm": 0.9247157704040789, "learning_rate": 2.556706551290704e-06, "loss": 1.5369, "step": 5802 }, { "epoch": 0.7743528155858019, "grad_norm": 0.9360390213094082, "learning_rate": 2.5538210235001913e-06, "loss": 1.5576, "step": 5803 }, { "epoch": 0.7744862556712037, "grad_norm": 0.9064471735808803, "learning_rate": 2.5509368865711304e-06, "loss": 1.5415, "step": 5804 }, { "epoch": 0.7746196957566053, "grad_norm": 0.9513137240894838, "learning_rate": 2.548054141042251e-06, "loss": 1.5708, "step": 5805 }, { "epoch": 0.7747531358420069, "grad_norm": 1.2175807518906996, "learning_rate": 2.5451727874520148e-06, "loss": 1.514, "step": 5806 }, { "epoch": 0.7748865759274086, "grad_norm": 0.9892844938681642, "learning_rate": 2.542292826338626e-06, "loss": 1.5524, "step": 5807 }, { "epoch": 0.7750200160128102, "grad_norm": 1.0937981716259788, "learning_rate": 2.5394142582400238e-06, "loss": 1.4936, "step": 5808 }, { "epoch": 0.775153456098212, "grad_norm": 1.169626597958446, "learning_rate": 2.5365370836938997e-06, "loss": 1.5551, "step": 5809 }, { "epoch": 0.7752868961836136, "grad_norm": 0.9516314581228, "learning_rate": 2.5336613032376744e-06, "loss": 1.5305, "step": 5810 }, { "epoch": 0.7754203362690152, "grad_norm": 0.9404267765234818, "learning_rate": 2.5307869174085086e-06, "loss": 1.5901, "step": 5811 }, { "epoch": 0.7755537763544169, "grad_norm": 0.9359769036295512, "learning_rate": 2.527913926743305e-06, "loss": 1.5633, "step": 5812 }, { "epoch": 0.7756872164398185, "grad_norm": 0.9494830051173815, "learning_rate": 2.5250423317787086e-06, "loss": 1.5296, "step": 5813 }, { "epoch": 0.7758206565252201, "grad_norm": 0.9336095498356144, "learning_rate": 2.522172133051101e-06, "loss": 1.5527, "step": 5814 }, { "epoch": 0.7759540966106219, "grad_norm": 0.9206894715103576, "learning_rate": 2.5193033310965987e-06, "loss": 1.5765, "step": 5815 }, { "epoch": 0.7760875366960235, "grad_norm": 0.9285054392212316, "learning_rate": 2.516435926451064e-06, "loss": 1.5186, "step": 5816 }, { "epoch": 0.7762209767814251, "grad_norm": 0.9469454273921929, "learning_rate": 2.513569919650095e-06, "loss": 1.5748, "step": 5817 }, { "epoch": 0.7763544168668268, "grad_norm": 1.0422340407226904, "learning_rate": 2.510705311229025e-06, "loss": 1.5367, "step": 5818 }, { "epoch": 0.7764878569522284, "grad_norm": 0.9341799059535522, "learning_rate": 2.507842101722938e-06, "loss": 1.5679, "step": 5819 }, { "epoch": 0.77662129703763, "grad_norm": 0.9183397021685292, "learning_rate": 2.5049802916666445e-06, "loss": 1.5037, "step": 5820 }, { "epoch": 0.7767547371230318, "grad_norm": 0.932957030523527, "learning_rate": 2.5021198815946978e-06, "loss": 1.5195, "step": 5821 }, { "epoch": 0.7768881772084334, "grad_norm": 0.9241883977653392, "learning_rate": 2.4992608720413893e-06, "loss": 1.5227, "step": 5822 }, { "epoch": 0.7770216172938351, "grad_norm": 1.2172435346645842, "learning_rate": 2.496403263540752e-06, "loss": 1.5402, "step": 5823 }, { "epoch": 0.7771550573792367, "grad_norm": 0.9449596951384638, "learning_rate": 2.493547056626554e-06, "loss": 1.5355, "step": 5824 }, { "epoch": 0.7772884974646384, "grad_norm": 1.0524048742416596, "learning_rate": 2.490692251832302e-06, "loss": 1.5999, "step": 5825 }, { "epoch": 0.7774219375500401, "grad_norm": 0.984791837305669, "learning_rate": 2.4878388496912377e-06, "loss": 1.5482, "step": 5826 }, { "epoch": 0.7775553776354417, "grad_norm": 0.9575185903107368, "learning_rate": 2.4849868507363506e-06, "loss": 1.5152, "step": 5827 }, { "epoch": 0.7776888177208433, "grad_norm": 0.9431849505627886, "learning_rate": 2.482136255500357e-06, "loss": 1.5557, "step": 5828 }, { "epoch": 0.777822257806245, "grad_norm": 0.9027138718964596, "learning_rate": 2.479287064515714e-06, "loss": 1.5192, "step": 5829 }, { "epoch": 0.7779556978916466, "grad_norm": 0.9769963398115894, "learning_rate": 2.476439278314624e-06, "loss": 1.5003, "step": 5830 }, { "epoch": 0.7780891379770483, "grad_norm": 1.0871003730614877, "learning_rate": 2.473592897429018e-06, "loss": 1.5801, "step": 5831 }, { "epoch": 0.77822257806245, "grad_norm": 0.9763548093718147, "learning_rate": 2.470747922390567e-06, "loss": 1.5494, "step": 5832 }, { "epoch": 0.7783560181478516, "grad_norm": 0.984785198847868, "learning_rate": 2.467904353730678e-06, "loss": 1.5478, "step": 5833 }, { "epoch": 0.7784894582332532, "grad_norm": 0.9601413586909818, "learning_rate": 2.465062191980503e-06, "loss": 1.5631, "step": 5834 }, { "epoch": 0.7786228983186549, "grad_norm": 1.0051540823117064, "learning_rate": 2.462221437670921e-06, "loss": 1.5519, "step": 5835 }, { "epoch": 0.7787563384040566, "grad_norm": 1.0448166746922907, "learning_rate": 2.4593820913325507e-06, "loss": 1.5673, "step": 5836 }, { "epoch": 0.7788897784894583, "grad_norm": 1.0626411844360724, "learning_rate": 2.4565441534957558e-06, "loss": 1.5583, "step": 5837 }, { "epoch": 0.7790232185748599, "grad_norm": 1.0838658214565313, "learning_rate": 2.453707624690628e-06, "loss": 1.5399, "step": 5838 }, { "epoch": 0.7791566586602615, "grad_norm": 1.0694003456730499, "learning_rate": 2.4508725054469973e-06, "loss": 1.5874, "step": 5839 }, { "epoch": 0.7792900987456632, "grad_norm": 1.0215015403096692, "learning_rate": 2.4480387962944297e-06, "loss": 1.545, "step": 5840 }, { "epoch": 0.7794235388310649, "grad_norm": 0.947422869064629, "learning_rate": 2.4452064977622357e-06, "loss": 1.541, "step": 5841 }, { "epoch": 0.7795569789164665, "grad_norm": 0.9555266023407392, "learning_rate": 2.4423756103794538e-06, "loss": 1.5314, "step": 5842 }, { "epoch": 0.7796904190018682, "grad_norm": 1.012624888514248, "learning_rate": 2.439546134674858e-06, "loss": 1.5672, "step": 5843 }, { "epoch": 0.7798238590872698, "grad_norm": 1.2038001979285369, "learning_rate": 2.436718071176969e-06, "loss": 1.5503, "step": 5844 }, { "epoch": 0.7799572991726714, "grad_norm": 0.9750829570129861, "learning_rate": 2.4338914204140317e-06, "loss": 1.5958, "step": 5845 }, { "epoch": 0.7800907392580732, "grad_norm": 1.0711410399768173, "learning_rate": 2.4310661829140335e-06, "loss": 1.5094, "step": 5846 }, { "epoch": 0.7802241793434748, "grad_norm": 0.960236661793123, "learning_rate": 2.4282423592046956e-06, "loss": 1.5084, "step": 5847 }, { "epoch": 0.7803576194288764, "grad_norm": 0.9702210353839494, "learning_rate": 2.425419949813479e-06, "loss": 1.5571, "step": 5848 }, { "epoch": 0.7804910595142781, "grad_norm": 0.9253780960251425, "learning_rate": 2.4225989552675756e-06, "loss": 1.579, "step": 5849 }, { "epoch": 0.7806244995996797, "grad_norm": 0.9233717044203158, "learning_rate": 2.419779376093916e-06, "loss": 1.5668, "step": 5850 }, { "epoch": 0.7807579396850814, "grad_norm": 0.9348350250160402, "learning_rate": 2.416961212819162e-06, "loss": 1.536, "step": 5851 }, { "epoch": 0.7808913797704831, "grad_norm": 0.9548401099327721, "learning_rate": 2.41414446596972e-06, "loss": 1.5628, "step": 5852 }, { "epoch": 0.7810248198558847, "grad_norm": 0.956552292282773, "learning_rate": 2.411329136071724e-06, "loss": 1.6029, "step": 5853 }, { "epoch": 0.7811582599412864, "grad_norm": 0.9985446373657281, "learning_rate": 2.4085152236510445e-06, "loss": 1.5279, "step": 5854 }, { "epoch": 0.781291700026688, "grad_norm": 0.9286530398266631, "learning_rate": 2.4057027292332857e-06, "loss": 1.5534, "step": 5855 }, { "epoch": 0.7814251401120896, "grad_norm": 0.9838982529058777, "learning_rate": 2.4028916533437963e-06, "loss": 1.5773, "step": 5856 }, { "epoch": 0.7815585801974914, "grad_norm": 0.9685070922452822, "learning_rate": 2.4000819965076506e-06, "loss": 1.5407, "step": 5857 }, { "epoch": 0.781692020282893, "grad_norm": 0.9289471086756744, "learning_rate": 2.3972737592496576e-06, "loss": 1.5773, "step": 5858 }, { "epoch": 0.7818254603682946, "grad_norm": 0.9607754330554751, "learning_rate": 2.3944669420943678e-06, "loss": 1.4774, "step": 5859 }, { "epoch": 0.7819589004536963, "grad_norm": 1.0361591280939995, "learning_rate": 2.3916615455660574e-06, "loss": 1.5427, "step": 5860 }, { "epoch": 0.7820923405390979, "grad_norm": 0.9200656629737055, "learning_rate": 2.3888575701887483e-06, "loss": 1.5319, "step": 5861 }, { "epoch": 0.7822257806244995, "grad_norm": 1.2418620489944565, "learning_rate": 2.3860550164861908e-06, "loss": 1.5005, "step": 5862 }, { "epoch": 0.7823592207099013, "grad_norm": 0.9438642577089267, "learning_rate": 2.3832538849818663e-06, "loss": 1.5266, "step": 5863 }, { "epoch": 0.7824926607953029, "grad_norm": 0.9488414068156699, "learning_rate": 2.3804541761989974e-06, "loss": 1.568, "step": 5864 }, { "epoch": 0.7826261008807046, "grad_norm": 0.9217470739999812, "learning_rate": 2.377655890660533e-06, "loss": 1.4917, "step": 5865 }, { "epoch": 0.7827595409661062, "grad_norm": 0.9403617005912336, "learning_rate": 2.374859028889168e-06, "loss": 1.5904, "step": 5866 }, { "epoch": 0.7828929810515078, "grad_norm": 0.9377360220679098, "learning_rate": 2.372063591407321e-06, "loss": 1.529, "step": 5867 }, { "epoch": 0.7830264211369096, "grad_norm": 0.9289834664828129, "learning_rate": 2.3692695787371443e-06, "loss": 1.5424, "step": 5868 }, { "epoch": 0.7831598612223112, "grad_norm": 0.9284928064699219, "learning_rate": 2.366476991400535e-06, "loss": 1.5683, "step": 5869 }, { "epoch": 0.7832933013077128, "grad_norm": 0.9414971322161758, "learning_rate": 2.3636858299191113e-06, "loss": 1.5869, "step": 5870 }, { "epoch": 0.7834267413931145, "grad_norm": 1.049554007951963, "learning_rate": 2.3608960948142334e-06, "loss": 1.581, "step": 5871 }, { "epoch": 0.7835601814785161, "grad_norm": 0.9261270821311429, "learning_rate": 2.3581077866069868e-06, "loss": 1.5777, "step": 5872 }, { "epoch": 0.7836936215639178, "grad_norm": 0.9228475363078417, "learning_rate": 2.3553209058182025e-06, "loss": 1.5916, "step": 5873 }, { "epoch": 0.7838270616493195, "grad_norm": 0.9372723449214597, "learning_rate": 2.3525354529684354e-06, "loss": 1.4841, "step": 5874 }, { "epoch": 0.7839605017347211, "grad_norm": 1.0271631020714844, "learning_rate": 2.349751428577972e-06, "loss": 1.5588, "step": 5875 }, { "epoch": 0.7840939418201228, "grad_norm": 0.9083784084217819, "learning_rate": 2.346968833166844e-06, "loss": 1.5394, "step": 5876 }, { "epoch": 0.7842273819055244, "grad_norm": 0.950058509885086, "learning_rate": 2.3441876672548046e-06, "loss": 1.5469, "step": 5877 }, { "epoch": 0.784360821990926, "grad_norm": 0.9649329509240152, "learning_rate": 2.341407931361345e-06, "loss": 1.6095, "step": 5878 }, { "epoch": 0.7844942620763278, "grad_norm": 0.9865778802161118, "learning_rate": 2.338629626005684e-06, "loss": 1.5117, "step": 5879 }, { "epoch": 0.7846277021617294, "grad_norm": 0.9479757386340179, "learning_rate": 2.335852751706784e-06, "loss": 1.5221, "step": 5880 }, { "epoch": 0.784761142247131, "grad_norm": 1.0079567810813972, "learning_rate": 2.3330773089833303e-06, "loss": 1.5785, "step": 5881 }, { "epoch": 0.7848945823325327, "grad_norm": 0.9604536184535951, "learning_rate": 2.3303032983537445e-06, "loss": 1.5498, "step": 5882 }, { "epoch": 0.7850280224179343, "grad_norm": 0.9554406788386689, "learning_rate": 2.327530720336176e-06, "loss": 1.4614, "step": 5883 }, { "epoch": 0.785161462503336, "grad_norm": 0.9339548764005035, "learning_rate": 2.3247595754485185e-06, "loss": 1.5512, "step": 5884 }, { "epoch": 0.7852949025887377, "grad_norm": 0.9499142303394195, "learning_rate": 2.321989864208386e-06, "loss": 1.5241, "step": 5885 }, { "epoch": 0.7854283426741393, "grad_norm": 0.976577687622226, "learning_rate": 2.319221587133127e-06, "loss": 1.5734, "step": 5886 }, { "epoch": 0.7855617827595409, "grad_norm": 1.0810206584465782, "learning_rate": 2.316454744739829e-06, "loss": 1.5224, "step": 5887 }, { "epoch": 0.7856952228449426, "grad_norm": 0.9179787586115475, "learning_rate": 2.313689337545304e-06, "loss": 1.472, "step": 5888 }, { "epoch": 0.7858286629303443, "grad_norm": 0.9423241752395348, "learning_rate": 2.310925366066099e-06, "loss": 1.5131, "step": 5889 }, { "epoch": 0.785962103015746, "grad_norm": 1.0074056359823236, "learning_rate": 2.308162830818489e-06, "loss": 1.5842, "step": 5890 }, { "epoch": 0.7860955431011476, "grad_norm": 1.140943510869096, "learning_rate": 2.3054017323184907e-06, "loss": 1.5383, "step": 5891 }, { "epoch": 0.7862289831865492, "grad_norm": 0.9686268722021224, "learning_rate": 2.3026420710818433e-06, "loss": 1.569, "step": 5892 }, { "epoch": 0.7863624232719509, "grad_norm": 1.0903976628772232, "learning_rate": 2.299883847624015e-06, "loss": 1.5378, "step": 5893 }, { "epoch": 0.7864958633573526, "grad_norm": 1.0249636474092916, "learning_rate": 2.297127062460218e-06, "loss": 1.4946, "step": 5894 }, { "epoch": 0.7866293034427542, "grad_norm": 0.9160337674447874, "learning_rate": 2.2943717161053856e-06, "loss": 1.563, "step": 5895 }, { "epoch": 0.7867627435281559, "grad_norm": 0.992905443952185, "learning_rate": 2.2916178090741848e-06, "loss": 1.5425, "step": 5896 }, { "epoch": 0.7868961836135575, "grad_norm": 1.0099924911665865, "learning_rate": 2.2888653418810114e-06, "loss": 1.5827, "step": 5897 }, { "epoch": 0.7870296236989591, "grad_norm": 0.9732471135416901, "learning_rate": 2.2861143150400002e-06, "loss": 1.5571, "step": 5898 }, { "epoch": 0.7871630637843609, "grad_norm": 0.9352056643017831, "learning_rate": 2.2833647290650084e-06, "loss": 1.526, "step": 5899 }, { "epoch": 0.7872965038697625, "grad_norm": 0.9444113116845154, "learning_rate": 2.2806165844696284e-06, "loss": 1.5729, "step": 5900 }, { "epoch": 0.7874299439551641, "grad_norm": 0.9816140583735885, "learning_rate": 2.277869881767182e-06, "loss": 1.5734, "step": 5901 }, { "epoch": 0.7875633840405658, "grad_norm": 1.2377672190640088, "learning_rate": 2.2751246214707223e-06, "loss": 1.5564, "step": 5902 }, { "epoch": 0.7876968241259674, "grad_norm": 0.9171027423735342, "learning_rate": 2.2723808040930282e-06, "loss": 1.4997, "step": 5903 }, { "epoch": 0.7878302642113691, "grad_norm": 1.0105329476647573, "learning_rate": 2.2696384301466224e-06, "loss": 1.5311, "step": 5904 }, { "epoch": 0.7879637042967708, "grad_norm": 0.9299980163921587, "learning_rate": 2.2668975001437433e-06, "loss": 1.5344, "step": 5905 }, { "epoch": 0.7880971443821724, "grad_norm": 0.9457043818410217, "learning_rate": 2.264158014596366e-06, "loss": 1.5521, "step": 5906 }, { "epoch": 0.7882305844675741, "grad_norm": 1.2092222254834237, "learning_rate": 2.2614199740161935e-06, "loss": 1.5422, "step": 5907 }, { "epoch": 0.7883640245529757, "grad_norm": 1.0681054745909435, "learning_rate": 2.2586833789146657e-06, "loss": 1.5889, "step": 5908 }, { "epoch": 0.7884974646383773, "grad_norm": 0.9368681322416695, "learning_rate": 2.2559482298029447e-06, "loss": 1.5816, "step": 5909 }, { "epoch": 0.7886309047237791, "grad_norm": 0.9228201723152973, "learning_rate": 2.2532145271919247e-06, "loss": 1.5449, "step": 5910 }, { "epoch": 0.7887643448091807, "grad_norm": 0.943660701996512, "learning_rate": 2.250482271592228e-06, "loss": 1.5683, "step": 5911 }, { "epoch": 0.7888977848945823, "grad_norm": 0.929628779619005, "learning_rate": 2.247751463514214e-06, "loss": 1.5122, "step": 5912 }, { "epoch": 0.789031224979984, "grad_norm": 0.9387161790518015, "learning_rate": 2.245022103467964e-06, "loss": 1.5627, "step": 5913 }, { "epoch": 0.7891646650653856, "grad_norm": 1.0924957479088928, "learning_rate": 2.242294191963291e-06, "loss": 1.5348, "step": 5914 }, { "epoch": 0.7892981051507872, "grad_norm": 0.9504151391255395, "learning_rate": 2.2395677295097362e-06, "loss": 1.523, "step": 5915 }, { "epoch": 0.789431545236189, "grad_norm": 0.9629607665332615, "learning_rate": 2.236842716616575e-06, "loss": 1.549, "step": 5916 }, { "epoch": 0.7895649853215906, "grad_norm": 1.0660833347570207, "learning_rate": 2.2341191537928087e-06, "loss": 1.5227, "step": 5917 }, { "epoch": 0.7896984254069923, "grad_norm": 0.953518244934922, "learning_rate": 2.2313970415471643e-06, "loss": 1.5568, "step": 5918 }, { "epoch": 0.7898318654923939, "grad_norm": 1.0025411935106783, "learning_rate": 2.2286763803881052e-06, "loss": 1.5277, "step": 5919 }, { "epoch": 0.7899653055777955, "grad_norm": 0.9253963712749954, "learning_rate": 2.2259571708238194e-06, "loss": 1.506, "step": 5920 }, { "epoch": 0.7900987456631973, "grad_norm": 0.9896713961958903, "learning_rate": 2.2232394133622225e-06, "loss": 1.5484, "step": 5921 }, { "epoch": 0.7902321857485989, "grad_norm": 1.1168051427517722, "learning_rate": 2.220523108510959e-06, "loss": 1.5591, "step": 5922 }, { "epoch": 0.7903656258340005, "grad_norm": 0.9692435749687704, "learning_rate": 2.2178082567774086e-06, "loss": 1.5639, "step": 5923 }, { "epoch": 0.7904990659194022, "grad_norm": 1.0227885427358632, "learning_rate": 2.2150948586686728e-06, "loss": 1.5236, "step": 5924 }, { "epoch": 0.7906325060048038, "grad_norm": 0.9422956336171604, "learning_rate": 2.21238291469158e-06, "loss": 1.5855, "step": 5925 }, { "epoch": 0.7907659460902055, "grad_norm": 1.3638108258879078, "learning_rate": 2.2096724253526956e-06, "loss": 1.5657, "step": 5926 }, { "epoch": 0.7908993861756072, "grad_norm": 0.8997297461212911, "learning_rate": 2.2069633911583067e-06, "loss": 1.5429, "step": 5927 }, { "epoch": 0.7910328262610088, "grad_norm": 0.964122119886273, "learning_rate": 2.204255812614429e-06, "loss": 1.5413, "step": 5928 }, { "epoch": 0.7911662663464104, "grad_norm": 0.9482056082342423, "learning_rate": 2.201549690226804e-06, "loss": 1.5358, "step": 5929 }, { "epoch": 0.7912997064318121, "grad_norm": 1.0861037743666542, "learning_rate": 2.198845024500912e-06, "loss": 1.5515, "step": 5930 }, { "epoch": 0.7914331465172137, "grad_norm": 0.9464827990376123, "learning_rate": 2.196141815941949e-06, "loss": 1.5723, "step": 5931 }, { "epoch": 0.7915665866026155, "grad_norm": 0.934897802356449, "learning_rate": 2.193440065054843e-06, "loss": 1.5334, "step": 5932 }, { "epoch": 0.7917000266880171, "grad_norm": 0.9891163916956548, "learning_rate": 2.1907397723442536e-06, "loss": 1.5606, "step": 5933 }, { "epoch": 0.7918334667734187, "grad_norm": 0.9667703054129473, "learning_rate": 2.188040938314564e-06, "loss": 1.5315, "step": 5934 }, { "epoch": 0.7919669068588204, "grad_norm": 0.9296819166809209, "learning_rate": 2.1853435634698837e-06, "loss": 1.5643, "step": 5935 }, { "epoch": 0.792100346944222, "grad_norm": 0.9552094294250992, "learning_rate": 2.1826476483140503e-06, "loss": 1.5044, "step": 5936 }, { "epoch": 0.7922337870296237, "grad_norm": 1.0811395195378388, "learning_rate": 2.1799531933506346e-06, "loss": 1.5888, "step": 5937 }, { "epoch": 0.7923672271150254, "grad_norm": 1.0395532411915318, "learning_rate": 2.177260199082928e-06, "loss": 1.5514, "step": 5938 }, { "epoch": 0.792500667200427, "grad_norm": 0.9561248759072963, "learning_rate": 2.174568666013951e-06, "loss": 1.5294, "step": 5939 }, { "epoch": 0.7926341072858286, "grad_norm": 0.9242513920785822, "learning_rate": 2.1718785946464483e-06, "loss": 1.5342, "step": 5940 }, { "epoch": 0.7927675473712303, "grad_norm": 0.9534837808168115, "learning_rate": 2.1691899854829014e-06, "loss": 1.5325, "step": 5941 }, { "epoch": 0.792900987456632, "grad_norm": 0.9322289028522723, "learning_rate": 2.1665028390255073e-06, "loss": 1.5002, "step": 5942 }, { "epoch": 0.7930344275420337, "grad_norm": 1.1723532471213491, "learning_rate": 2.1638171557761943e-06, "loss": 1.5653, "step": 5943 }, { "epoch": 0.7931678676274353, "grad_norm": 1.1432508740043539, "learning_rate": 2.1611329362366195e-06, "loss": 1.5889, "step": 5944 }, { "epoch": 0.7933013077128369, "grad_norm": 0.9622094585479455, "learning_rate": 2.1584501809081628e-06, "loss": 1.604, "step": 5945 }, { "epoch": 0.7934347477982386, "grad_norm": 0.9865589871665827, "learning_rate": 2.1557688902919305e-06, "loss": 1.5216, "step": 5946 }, { "epoch": 0.7935681878836403, "grad_norm": 1.0241441606366657, "learning_rate": 2.1530890648887628e-06, "loss": 1.5134, "step": 5947 }, { "epoch": 0.7937016279690419, "grad_norm": 0.9301015988878937, "learning_rate": 2.150410705199216e-06, "loss": 1.5294, "step": 5948 }, { "epoch": 0.7938350680544436, "grad_norm": 0.9621334353445851, "learning_rate": 2.147733811723579e-06, "loss": 1.5781, "step": 5949 }, { "epoch": 0.7939685081398452, "grad_norm": 0.9655006569053254, "learning_rate": 2.145058384961862e-06, "loss": 1.5538, "step": 5950 }, { "epoch": 0.7941019482252468, "grad_norm": 1.0251786646717194, "learning_rate": 2.1423844254138105e-06, "loss": 1.5959, "step": 5951 }, { "epoch": 0.7942353883106485, "grad_norm": 0.9424616777571333, "learning_rate": 2.139711933578885e-06, "loss": 1.5048, "step": 5952 }, { "epoch": 0.7943688283960502, "grad_norm": 0.9412547582328011, "learning_rate": 2.1370409099562774e-06, "loss": 1.5633, "step": 5953 }, { "epoch": 0.7945022684814518, "grad_norm": 1.016228232058081, "learning_rate": 2.134371355044902e-06, "loss": 1.5567, "step": 5954 }, { "epoch": 0.7946357085668535, "grad_norm": 0.9500451975852726, "learning_rate": 2.131703269343407e-06, "loss": 1.5571, "step": 5955 }, { "epoch": 0.7947691486522551, "grad_norm": 0.9985865820172933, "learning_rate": 2.1290366533501572e-06, "loss": 1.5439, "step": 5956 }, { "epoch": 0.7949025887376568, "grad_norm": 1.0616401273574188, "learning_rate": 2.126371507563244e-06, "loss": 1.5213, "step": 5957 }, { "epoch": 0.7950360288230585, "grad_norm": 0.9523800662623815, "learning_rate": 2.1237078324804906e-06, "loss": 1.5169, "step": 5958 }, { "epoch": 0.7951694689084601, "grad_norm": 0.9444862343077112, "learning_rate": 2.1210456285994397e-06, "loss": 1.5903, "step": 5959 }, { "epoch": 0.7953029089938618, "grad_norm": 1.037122868292527, "learning_rate": 2.1183848964173604e-06, "loss": 1.5931, "step": 5960 }, { "epoch": 0.7954363490792634, "grad_norm": 0.9387631641570781, "learning_rate": 2.115725636431243e-06, "loss": 1.5488, "step": 5961 }, { "epoch": 0.795569789164665, "grad_norm": 1.1749210941871775, "learning_rate": 2.1130678491378143e-06, "loss": 1.544, "step": 5962 }, { "epoch": 0.7957032292500668, "grad_norm": 0.9411833484581066, "learning_rate": 2.110411535033515e-06, "loss": 1.5385, "step": 5963 }, { "epoch": 0.7958366693354684, "grad_norm": 0.9404179032536638, "learning_rate": 2.1077566946145124e-06, "loss": 1.5881, "step": 5964 }, { "epoch": 0.79597010942087, "grad_norm": 0.9781497662602053, "learning_rate": 2.1051033283767054e-06, "loss": 1.5534, "step": 5965 }, { "epoch": 0.7961035495062717, "grad_norm": 0.9400404759624681, "learning_rate": 2.102451436815709e-06, "loss": 1.5625, "step": 5966 }, { "epoch": 0.7962369895916733, "grad_norm": 0.91952723034791, "learning_rate": 2.0998010204268683e-06, "loss": 1.5542, "step": 5967 }, { "epoch": 0.7963704296770749, "grad_norm": 0.9219414927078943, "learning_rate": 2.0971520797052468e-06, "loss": 1.5401, "step": 5968 }, { "epoch": 0.7965038697624767, "grad_norm": 0.9602262059129528, "learning_rate": 2.0945046151456417e-06, "loss": 1.5379, "step": 5969 }, { "epoch": 0.7966373098478783, "grad_norm": 0.9345892591110488, "learning_rate": 2.091858627242568e-06, "loss": 1.5246, "step": 5970 }, { "epoch": 0.79677074993328, "grad_norm": 0.9407983789526347, "learning_rate": 2.089214116490261e-06, "loss": 1.5373, "step": 5971 }, { "epoch": 0.7969041900186816, "grad_norm": 0.9833161312725881, "learning_rate": 2.0865710833826936e-06, "loss": 1.547, "step": 5972 }, { "epoch": 0.7970376301040832, "grad_norm": 1.0593932491674578, "learning_rate": 2.0839295284135486e-06, "loss": 1.5509, "step": 5973 }, { "epoch": 0.797171070189485, "grad_norm": 1.046152778746016, "learning_rate": 2.0812894520762416e-06, "loss": 1.6058, "step": 5974 }, { "epoch": 0.7973045102748866, "grad_norm": 1.0912168422367754, "learning_rate": 2.078650854863903e-06, "loss": 1.5468, "step": 5975 }, { "epoch": 0.7974379503602882, "grad_norm": 0.9464811881645303, "learning_rate": 2.0760137372694e-06, "loss": 1.551, "step": 5976 }, { "epoch": 0.7975713904456899, "grad_norm": 0.9185730065970497, "learning_rate": 2.0733780997853124e-06, "loss": 1.4854, "step": 5977 }, { "epoch": 0.7977048305310915, "grad_norm": 0.9526089149618532, "learning_rate": 2.0707439429039454e-06, "loss": 1.6068, "step": 5978 }, { "epoch": 0.7978382706164932, "grad_norm": 0.9587953235425581, "learning_rate": 2.0681112671173344e-06, "loss": 1.6037, "step": 5979 }, { "epoch": 0.7979717107018949, "grad_norm": 0.9361644772074595, "learning_rate": 2.065480072917231e-06, "loss": 1.5868, "step": 5980 }, { "epoch": 0.7981051507872965, "grad_norm": 0.9534441446175175, "learning_rate": 2.062850360795112e-06, "loss": 1.5701, "step": 5981 }, { "epoch": 0.7982385908726981, "grad_norm": 0.9946923698582439, "learning_rate": 2.060222131242178e-06, "loss": 1.5967, "step": 5982 }, { "epoch": 0.7983720309580998, "grad_norm": 0.9626053858910402, "learning_rate": 2.057595384749349e-06, "loss": 1.578, "step": 5983 }, { "epoch": 0.7985054710435014, "grad_norm": 1.1359335916882283, "learning_rate": 2.054970121807278e-06, "loss": 1.5129, "step": 5984 }, { "epoch": 0.7986389111289032, "grad_norm": 1.1202578382909025, "learning_rate": 2.0523463429063295e-06, "loss": 1.5608, "step": 5985 }, { "epoch": 0.7987723512143048, "grad_norm": 1.043896917536381, "learning_rate": 2.0497240485365975e-06, "loss": 1.5364, "step": 5986 }, { "epoch": 0.7989057912997064, "grad_norm": 1.0753022531810645, "learning_rate": 2.0471032391878963e-06, "loss": 1.5591, "step": 5987 }, { "epoch": 0.7990392313851081, "grad_norm": 1.2319921435864252, "learning_rate": 2.0444839153497618e-06, "loss": 1.5409, "step": 5988 }, { "epoch": 0.7991726714705097, "grad_norm": 1.00815621813523, "learning_rate": 2.041866077511453e-06, "loss": 1.5381, "step": 5989 }, { "epoch": 0.7993061115559114, "grad_norm": 0.9256965403165084, "learning_rate": 2.039249726161957e-06, "loss": 1.5051, "step": 5990 }, { "epoch": 0.7994395516413131, "grad_norm": 0.9415742466538367, "learning_rate": 2.0366348617899745e-06, "loss": 1.4909, "step": 5991 }, { "epoch": 0.7995729917267147, "grad_norm": 1.204722600477448, "learning_rate": 2.0340214848839347e-06, "loss": 1.5813, "step": 5992 }, { "epoch": 0.7997064318121163, "grad_norm": 0.9223199394194576, "learning_rate": 2.0314095959319822e-06, "loss": 1.5269, "step": 5993 }, { "epoch": 0.799839871897518, "grad_norm": 0.9377131565914819, "learning_rate": 2.0287991954219945e-06, "loss": 1.4804, "step": 5994 }, { "epoch": 0.7999733119829197, "grad_norm": 0.9766250429689536, "learning_rate": 2.0261902838415605e-06, "loss": 1.5481, "step": 5995 }, { "epoch": 0.8001067520683213, "grad_norm": 0.9549561111606707, "learning_rate": 2.023582861677995e-06, "loss": 1.5329, "step": 5996 }, { "epoch": 0.800240192153723, "grad_norm": 0.9789891454720687, "learning_rate": 2.020976929418338e-06, "loss": 1.5963, "step": 5997 }, { "epoch": 0.8003736322391246, "grad_norm": 0.9398469925192877, "learning_rate": 2.018372487549346e-06, "loss": 1.4924, "step": 5998 }, { "epoch": 0.8005070723245263, "grad_norm": 0.9346327961340182, "learning_rate": 2.015769536557499e-06, "loss": 1.5465, "step": 5999 }, { "epoch": 0.800640512409928, "grad_norm": 0.9516873692804836, "learning_rate": 2.0131680769289964e-06, "loss": 1.6011, "step": 6000 }, { "epoch": 0.8007739524953296, "grad_norm": 1.0067538076216085, "learning_rate": 2.0105681091497653e-06, "loss": 1.5703, "step": 6001 }, { "epoch": 0.8009073925807313, "grad_norm": 0.9585095677361529, "learning_rate": 2.007969633705449e-06, "loss": 1.5311, "step": 6002 }, { "epoch": 0.8010408326661329, "grad_norm": 0.9360385818007098, "learning_rate": 2.00537265108141e-06, "loss": 1.5563, "step": 6003 }, { "epoch": 0.8011742727515345, "grad_norm": 0.9620281711842354, "learning_rate": 2.002777161762739e-06, "loss": 1.5605, "step": 6004 }, { "epoch": 0.8013077128369362, "grad_norm": 0.9717123448452647, "learning_rate": 2.000183166234244e-06, "loss": 1.5998, "step": 6005 }, { "epoch": 0.8014411529223379, "grad_norm": 1.2665547727823556, "learning_rate": 1.997590664980451e-06, "loss": 1.5412, "step": 6006 }, { "epoch": 0.8015745930077395, "grad_norm": 0.9436274926419813, "learning_rate": 1.9949996584856093e-06, "loss": 1.5404, "step": 6007 }, { "epoch": 0.8017080330931412, "grad_norm": 0.9485403732848344, "learning_rate": 1.992410147233692e-06, "loss": 1.4864, "step": 6008 }, { "epoch": 0.8018414731785428, "grad_norm": 0.9397520552993349, "learning_rate": 1.989822131708391e-06, "loss": 1.5356, "step": 6009 }, { "epoch": 0.8019749132639445, "grad_norm": 0.9678083881364095, "learning_rate": 1.9872356123931137e-06, "loss": 1.5437, "step": 6010 }, { "epoch": 0.8021083533493462, "grad_norm": 8.488638953717834, "learning_rate": 1.984650589770998e-06, "loss": 1.5579, "step": 6011 }, { "epoch": 0.8022417934347478, "grad_norm": 0.9477151839816731, "learning_rate": 1.982067064324893e-06, "loss": 1.5687, "step": 6012 }, { "epoch": 0.8023752335201495, "grad_norm": 0.9561015940175384, "learning_rate": 1.979485036537373e-06, "loss": 1.5424, "step": 6013 }, { "epoch": 0.8025086736055511, "grad_norm": 0.940578131890771, "learning_rate": 1.976904506890729e-06, "loss": 1.5604, "step": 6014 }, { "epoch": 0.8026421136909527, "grad_norm": 0.971107847461901, "learning_rate": 1.9743254758669794e-06, "loss": 1.5516, "step": 6015 }, { "epoch": 0.8027755537763545, "grad_norm": 1.151724003936446, "learning_rate": 1.971747943947855e-06, "loss": 1.5418, "step": 6016 }, { "epoch": 0.8029089938617561, "grad_norm": 0.9436979624078382, "learning_rate": 1.969171911614809e-06, "loss": 1.5554, "step": 6017 }, { "epoch": 0.8030424339471577, "grad_norm": 1.088649547266759, "learning_rate": 1.9665973793490134e-06, "loss": 1.546, "step": 6018 }, { "epoch": 0.8031758740325594, "grad_norm": 1.1775693108073233, "learning_rate": 1.964024347631367e-06, "loss": 1.5331, "step": 6019 }, { "epoch": 0.803309314117961, "grad_norm": 0.9338043323421088, "learning_rate": 1.9614528169424784e-06, "loss": 1.5825, "step": 6020 }, { "epoch": 0.8034427542033626, "grad_norm": 0.9745864204912411, "learning_rate": 1.958882787762678e-06, "loss": 1.5838, "step": 6021 }, { "epoch": 0.8035761942887644, "grad_norm": 1.5277909690793727, "learning_rate": 1.9563142605720254e-06, "loss": 1.5049, "step": 6022 }, { "epoch": 0.803709634374166, "grad_norm": 0.9821884824130115, "learning_rate": 1.953747235850287e-06, "loss": 1.5272, "step": 6023 }, { "epoch": 0.8038430744595677, "grad_norm": 0.9310206254375682, "learning_rate": 1.9511817140769563e-06, "loss": 1.5323, "step": 6024 }, { "epoch": 0.8039765145449693, "grad_norm": 1.0213597847479, "learning_rate": 1.948617695731242e-06, "loss": 1.5192, "step": 6025 }, { "epoch": 0.8041099546303709, "grad_norm": 0.9562151575602164, "learning_rate": 1.9460551812920703e-06, "loss": 1.5758, "step": 6026 }, { "epoch": 0.8042433947157727, "grad_norm": 0.9387251801749865, "learning_rate": 1.943494171238095e-06, "loss": 1.5662, "step": 6027 }, { "epoch": 0.8043768348011743, "grad_norm": 0.9762010066502882, "learning_rate": 1.9409346660476834e-06, "loss": 1.573, "step": 6028 }, { "epoch": 0.8045102748865759, "grad_norm": 1.087852765493359, "learning_rate": 1.938376666198919e-06, "loss": 1.5465, "step": 6029 }, { "epoch": 0.8046437149719776, "grad_norm": 0.938575775358334, "learning_rate": 1.935820172169609e-06, "loss": 1.5516, "step": 6030 }, { "epoch": 0.8047771550573792, "grad_norm": 0.965176200493375, "learning_rate": 1.933265184437274e-06, "loss": 1.5584, "step": 6031 }, { "epoch": 0.8049105951427808, "grad_norm": 1.2915982271448758, "learning_rate": 1.930711703479162e-06, "loss": 1.594, "step": 6032 }, { "epoch": 0.8050440352281826, "grad_norm": 0.9273187149756227, "learning_rate": 1.928159729772231e-06, "loss": 1.5509, "step": 6033 }, { "epoch": 0.8051774753135842, "grad_norm": 0.9188671634655157, "learning_rate": 1.925609263793162e-06, "loss": 1.5233, "step": 6034 }, { "epoch": 0.8053109153989858, "grad_norm": 1.009702927758053, "learning_rate": 1.9230603060183493e-06, "loss": 1.5312, "step": 6035 }, { "epoch": 0.8054443554843875, "grad_norm": 1.100233587871606, "learning_rate": 1.920512856923914e-06, "loss": 1.5386, "step": 6036 }, { "epoch": 0.8055777955697891, "grad_norm": 0.9308659676568186, "learning_rate": 1.9179669169856896e-06, "loss": 1.537, "step": 6037 }, { "epoch": 0.8057112356551909, "grad_norm": 0.9407626383495175, "learning_rate": 1.915422486679227e-06, "loss": 1.5401, "step": 6038 }, { "epoch": 0.8058446757405925, "grad_norm": 0.9436636824443787, "learning_rate": 1.912879566479795e-06, "loss": 1.5742, "step": 6039 }, { "epoch": 0.8059781158259941, "grad_norm": 0.9418856484629331, "learning_rate": 1.9103381568623868e-06, "loss": 1.5231, "step": 6040 }, { "epoch": 0.8061115559113958, "grad_norm": 0.9938636042780336, "learning_rate": 1.907798258301707e-06, "loss": 1.5326, "step": 6041 }, { "epoch": 0.8062449959967974, "grad_norm": 0.9167069778310091, "learning_rate": 1.9052598712721771e-06, "loss": 1.5517, "step": 6042 }, { "epoch": 0.8063784360821991, "grad_norm": 0.9396867142428339, "learning_rate": 1.9027229962479433e-06, "loss": 1.5704, "step": 6043 }, { "epoch": 0.8065118761676008, "grad_norm": 0.9421359789583224, "learning_rate": 1.9001876337028635e-06, "loss": 1.5256, "step": 6044 }, { "epoch": 0.8066453162530024, "grad_norm": 0.973198022271884, "learning_rate": 1.8976537841105136e-06, "loss": 1.528, "step": 6045 }, { "epoch": 0.806778756338404, "grad_norm": 0.9243015337796414, "learning_rate": 1.895121447944185e-06, "loss": 1.5705, "step": 6046 }, { "epoch": 0.8069121964238057, "grad_norm": 0.9373269634820086, "learning_rate": 1.8925906256768957e-06, "loss": 1.5274, "step": 6047 }, { "epoch": 0.8070456365092074, "grad_norm": 1.108272451485646, "learning_rate": 1.8900613177813708e-06, "loss": 1.5485, "step": 6048 }, { "epoch": 0.807179076594609, "grad_norm": 0.9455270267638988, "learning_rate": 1.8875335247300564e-06, "loss": 1.5525, "step": 6049 }, { "epoch": 0.8073125166800107, "grad_norm": 0.948942726258177, "learning_rate": 1.8850072469951142e-06, "loss": 1.5362, "step": 6050 }, { "epoch": 0.8074459567654123, "grad_norm": 1.0549675256800648, "learning_rate": 1.8824824850484269e-06, "loss": 1.5568, "step": 6051 }, { "epoch": 0.807579396850814, "grad_norm": 0.9693613341191636, "learning_rate": 1.8799592393615906e-06, "loss": 1.5386, "step": 6052 }, { "epoch": 0.8077128369362157, "grad_norm": 1.002495730710032, "learning_rate": 1.877437510405915e-06, "loss": 1.5567, "step": 6053 }, { "epoch": 0.8078462770216173, "grad_norm": 0.9399879061201342, "learning_rate": 1.8749172986524378e-06, "loss": 1.5198, "step": 6054 }, { "epoch": 0.807979717107019, "grad_norm": 0.916955889393747, "learning_rate": 1.8723986045719e-06, "loss": 1.5238, "step": 6055 }, { "epoch": 0.8081131571924206, "grad_norm": 0.9305269696315253, "learning_rate": 1.8698814286347678e-06, "loss": 1.5412, "step": 6056 }, { "epoch": 0.8082465972778222, "grad_norm": 0.9234504269114264, "learning_rate": 1.867365771311216e-06, "loss": 1.4926, "step": 6057 }, { "epoch": 0.808380037363224, "grad_norm": 0.9598446073788602, "learning_rate": 1.8648516330711486e-06, "loss": 1.5225, "step": 6058 }, { "epoch": 0.8085134774486256, "grad_norm": 1.006044534644564, "learning_rate": 1.8623390143841735e-06, "loss": 1.5641, "step": 6059 }, { "epoch": 0.8086469175340272, "grad_norm": 0.9891625291510784, "learning_rate": 1.8598279157196164e-06, "loss": 1.5121, "step": 6060 }, { "epoch": 0.8087803576194289, "grad_norm": 0.9313511828058488, "learning_rate": 1.8573183375465286e-06, "loss": 1.5156, "step": 6061 }, { "epoch": 0.8089137977048305, "grad_norm": 0.9339926309721247, "learning_rate": 1.8548102803336677e-06, "loss": 1.5326, "step": 6062 }, { "epoch": 0.8090472377902321, "grad_norm": 0.9664081912314659, "learning_rate": 1.8523037445495095e-06, "loss": 1.5461, "step": 6063 }, { "epoch": 0.8091806778756339, "grad_norm": 1.12857579777555, "learning_rate": 1.8497987306622446e-06, "loss": 1.5084, "step": 6064 }, { "epoch": 0.8093141179610355, "grad_norm": 1.0205222142779857, "learning_rate": 1.8472952391397846e-06, "loss": 1.5501, "step": 6065 }, { "epoch": 0.8094475580464372, "grad_norm": 0.9269906202615147, "learning_rate": 1.844793270449753e-06, "loss": 1.5647, "step": 6066 }, { "epoch": 0.8095809981318388, "grad_norm": 0.9111441101773377, "learning_rate": 1.8422928250594884e-06, "loss": 1.532, "step": 6067 }, { "epoch": 0.8097144382172404, "grad_norm": 1.0423582302266032, "learning_rate": 1.8397939034360424e-06, "loss": 1.5852, "step": 6068 }, { "epoch": 0.8098478783026422, "grad_norm": 0.9495371122585425, "learning_rate": 1.837296506046189e-06, "loss": 1.5211, "step": 6069 }, { "epoch": 0.8099813183880438, "grad_norm": 0.9361902779942389, "learning_rate": 1.834800633356414e-06, "loss": 1.5148, "step": 6070 }, { "epoch": 0.8101147584734454, "grad_norm": 1.234996354576864, "learning_rate": 1.8323062858329155e-06, "loss": 1.6077, "step": 6071 }, { "epoch": 0.8102481985588471, "grad_norm": 0.9278663038573935, "learning_rate": 1.82981346394161e-06, "loss": 1.5558, "step": 6072 }, { "epoch": 0.8103816386442487, "grad_norm": 0.9476924229244504, "learning_rate": 1.827322168148129e-06, "loss": 1.5618, "step": 6073 }, { "epoch": 0.8105150787296503, "grad_norm": 0.9319818911496207, "learning_rate": 1.8248323989178151e-06, "loss": 1.5607, "step": 6074 }, { "epoch": 0.8106485188150521, "grad_norm": 0.9145783092870352, "learning_rate": 1.8223441567157329e-06, "loss": 1.5446, "step": 6075 }, { "epoch": 0.8107819589004537, "grad_norm": 0.9381651247628452, "learning_rate": 1.8198574420066572e-06, "loss": 1.5931, "step": 6076 }, { "epoch": 0.8109153989858553, "grad_norm": 0.9248267670752973, "learning_rate": 1.8173722552550766e-06, "loss": 1.5598, "step": 6077 }, { "epoch": 0.811048839071257, "grad_norm": 0.933228793651718, "learning_rate": 1.8148885969251928e-06, "loss": 1.5767, "step": 6078 }, { "epoch": 0.8111822791566586, "grad_norm": 0.907509359626052, "learning_rate": 1.8124064674809316e-06, "loss": 1.5065, "step": 6079 }, { "epoch": 0.8113157192420604, "grad_norm": 0.9228783070319467, "learning_rate": 1.8099258673859221e-06, "loss": 1.4855, "step": 6080 }, { "epoch": 0.811449159327462, "grad_norm": 1.0177477019581869, "learning_rate": 1.8074467971035136e-06, "loss": 1.4958, "step": 6081 }, { "epoch": 0.8115825994128636, "grad_norm": 0.9588231215311086, "learning_rate": 1.8049692570967647e-06, "loss": 1.5646, "step": 6082 }, { "epoch": 0.8117160394982653, "grad_norm": 0.9155594177985604, "learning_rate": 1.8024932478284584e-06, "loss": 1.5231, "step": 6083 }, { "epoch": 0.8118494795836669, "grad_norm": 0.9565317839445175, "learning_rate": 1.8000187697610804e-06, "loss": 1.6076, "step": 6084 }, { "epoch": 0.8119829196690685, "grad_norm": 0.9745756544326605, "learning_rate": 1.7975458233568332e-06, "loss": 1.5622, "step": 6085 }, { "epoch": 0.8121163597544703, "grad_norm": 1.0317394403946698, "learning_rate": 1.7950744090776407e-06, "loss": 1.5838, "step": 6086 }, { "epoch": 0.8122497998398719, "grad_norm": 0.9537967660036222, "learning_rate": 1.792604527385131e-06, "loss": 1.524, "step": 6087 }, { "epoch": 0.8123832399252735, "grad_norm": 1.1140810286258387, "learning_rate": 1.7901361787406524e-06, "loss": 1.5551, "step": 6088 }, { "epoch": 0.8125166800106752, "grad_norm": 0.9970484456453595, "learning_rate": 1.7876693636052588e-06, "loss": 1.563, "step": 6089 }, { "epoch": 0.8126501200960768, "grad_norm": 0.975647861270303, "learning_rate": 1.785204082439731e-06, "loss": 1.5874, "step": 6090 }, { "epoch": 0.8127835601814786, "grad_norm": 0.9369203293774957, "learning_rate": 1.7827403357045514e-06, "loss": 1.5696, "step": 6091 }, { "epoch": 0.8129170002668802, "grad_norm": 1.0641811139775401, "learning_rate": 1.7802781238599164e-06, "loss": 1.5656, "step": 6092 }, { "epoch": 0.8130504403522818, "grad_norm": 0.9475062980136262, "learning_rate": 1.777817447365746e-06, "loss": 1.5263, "step": 6093 }, { "epoch": 0.8131838804376835, "grad_norm": 0.9652813026259098, "learning_rate": 1.7753583066816637e-06, "loss": 1.5176, "step": 6094 }, { "epoch": 0.8133173205230851, "grad_norm": 1.1089594490573167, "learning_rate": 1.7729007022670085e-06, "loss": 1.5231, "step": 6095 }, { "epoch": 0.8134507606084868, "grad_norm": 0.9258367136985601, "learning_rate": 1.7704446345808312e-06, "loss": 1.5387, "step": 6096 }, { "epoch": 0.8135842006938885, "grad_norm": 1.0577847232171296, "learning_rate": 1.7679901040819004e-06, "loss": 1.6006, "step": 6097 }, { "epoch": 0.8137176407792901, "grad_norm": 1.1127882285883361, "learning_rate": 1.7655371112286946e-06, "loss": 1.5717, "step": 6098 }, { "epoch": 0.8138510808646917, "grad_norm": 1.0673924071414274, "learning_rate": 1.7630856564793996e-06, "loss": 1.5309, "step": 6099 }, { "epoch": 0.8139845209500934, "grad_norm": 0.9464251733577566, "learning_rate": 1.760635740291926e-06, "loss": 1.5303, "step": 6100 }, { "epoch": 0.814117961035495, "grad_norm": 0.9319478908646294, "learning_rate": 1.758187363123889e-06, "loss": 1.5401, "step": 6101 }, { "epoch": 0.8142514011208967, "grad_norm": 0.9287040927493834, "learning_rate": 1.7557405254326144e-06, "loss": 1.5454, "step": 6102 }, { "epoch": 0.8143848412062984, "grad_norm": 0.9409713573767424, "learning_rate": 1.7532952276751424e-06, "loss": 1.5564, "step": 6103 }, { "epoch": 0.8145182812917, "grad_norm": 1.1149430849686321, "learning_rate": 1.7508514703082336e-06, "loss": 1.5285, "step": 6104 }, { "epoch": 0.8146517213771017, "grad_norm": 0.942572754627203, "learning_rate": 1.748409253788349e-06, "loss": 1.5839, "step": 6105 }, { "epoch": 0.8147851614625033, "grad_norm": 1.0681724159465906, "learning_rate": 1.7459685785716651e-06, "loss": 1.5143, "step": 6106 }, { "epoch": 0.814918601547905, "grad_norm": 0.9013869560225362, "learning_rate": 1.7435294451140783e-06, "loss": 1.5133, "step": 6107 }, { "epoch": 0.8150520416333067, "grad_norm": 0.9616884493441265, "learning_rate": 1.7410918538711875e-06, "loss": 1.6119, "step": 6108 }, { "epoch": 0.8151854817187083, "grad_norm": 0.9680636039329384, "learning_rate": 1.738655805298307e-06, "loss": 1.5284, "step": 6109 }, { "epoch": 0.8153189218041099, "grad_norm": 0.9024891296925446, "learning_rate": 1.7362212998504635e-06, "loss": 1.5047, "step": 6110 }, { "epoch": 0.8154523618895116, "grad_norm": 0.9523905191421255, "learning_rate": 1.7337883379823917e-06, "loss": 1.5871, "step": 6111 }, { "epoch": 0.8155858019749133, "grad_norm": 0.9263795793949324, "learning_rate": 1.731356920148547e-06, "loss": 1.5285, "step": 6112 }, { "epoch": 0.8157192420603149, "grad_norm": 0.9647766098487651, "learning_rate": 1.7289270468030871e-06, "loss": 1.5834, "step": 6113 }, { "epoch": 0.8158526821457166, "grad_norm": 0.9197615859806225, "learning_rate": 1.726498718399886e-06, "loss": 1.5364, "step": 6114 }, { "epoch": 0.8159861222311182, "grad_norm": 0.9626572376096142, "learning_rate": 1.7240719353925262e-06, "loss": 1.5519, "step": 6115 }, { "epoch": 0.8161195623165198, "grad_norm": 0.9396787834195818, "learning_rate": 1.7216466982343039e-06, "loss": 1.5418, "step": 6116 }, { "epoch": 0.8162530024019216, "grad_norm": 0.91235670010565, "learning_rate": 1.7192230073782234e-06, "loss": 1.477, "step": 6117 }, { "epoch": 0.8163864424873232, "grad_norm": 0.9262321266478081, "learning_rate": 1.7168008632770072e-06, "loss": 1.5578, "step": 6118 }, { "epoch": 0.8165198825727249, "grad_norm": 0.9400952842702311, "learning_rate": 1.7143802663830821e-06, "loss": 1.4842, "step": 6119 }, { "epoch": 0.8166533226581265, "grad_norm": 0.9299282662667695, "learning_rate": 1.7119612171485877e-06, "loss": 1.5302, "step": 6120 }, { "epoch": 0.8167867627435281, "grad_norm": 0.9932044315186542, "learning_rate": 1.7095437160253725e-06, "loss": 1.5168, "step": 6121 }, { "epoch": 0.8169202028289299, "grad_norm": 0.9580569276859812, "learning_rate": 1.707127763465004e-06, "loss": 1.5769, "step": 6122 }, { "epoch": 0.8170536429143315, "grad_norm": 1.102986018038682, "learning_rate": 1.7047133599187515e-06, "loss": 1.531, "step": 6123 }, { "epoch": 0.8171870829997331, "grad_norm": 0.9193541580129797, "learning_rate": 1.702300505837594e-06, "loss": 1.5448, "step": 6124 }, { "epoch": 0.8173205230851348, "grad_norm": 0.9456674504166714, "learning_rate": 1.6998892016722334e-06, "loss": 1.5154, "step": 6125 }, { "epoch": 0.8174539631705364, "grad_norm": 1.3332667316333, "learning_rate": 1.6974794478730683e-06, "loss": 1.5605, "step": 6126 }, { "epoch": 0.817587403255938, "grad_norm": 0.9558729747564527, "learning_rate": 1.695071244890215e-06, "loss": 1.5821, "step": 6127 }, { "epoch": 0.8177208433413398, "grad_norm": 1.2620823710527826, "learning_rate": 1.6926645931734964e-06, "loss": 1.5201, "step": 6128 }, { "epoch": 0.8178542834267414, "grad_norm": 0.9179178473951229, "learning_rate": 1.6902594931724503e-06, "loss": 1.5354, "step": 6129 }, { "epoch": 0.817987723512143, "grad_norm": 0.9450627126995589, "learning_rate": 1.687855945336322e-06, "loss": 1.5345, "step": 6130 }, { "epoch": 0.8181211635975447, "grad_norm": 0.9178343790581889, "learning_rate": 1.6854539501140632e-06, "loss": 1.55, "step": 6131 }, { "epoch": 0.8182546036829463, "grad_norm": 0.9964264932180854, "learning_rate": 1.6830535079543432e-06, "loss": 1.5697, "step": 6132 }, { "epoch": 0.8183880437683481, "grad_norm": 0.9890708742005667, "learning_rate": 1.6806546193055374e-06, "loss": 1.5219, "step": 6133 }, { "epoch": 0.8185214838537497, "grad_norm": 0.9229456288469, "learning_rate": 1.6782572846157285e-06, "loss": 1.5209, "step": 6134 }, { "epoch": 0.8186549239391513, "grad_norm": 0.9130464886045846, "learning_rate": 1.6758615043327097e-06, "loss": 1.4973, "step": 6135 }, { "epoch": 0.818788364024553, "grad_norm": 0.953168022773084, "learning_rate": 1.6734672789039907e-06, "loss": 1.5345, "step": 6136 }, { "epoch": 0.8189218041099546, "grad_norm": 0.9178014077533045, "learning_rate": 1.6710746087767826e-06, "loss": 1.5069, "step": 6137 }, { "epoch": 0.8190552441953562, "grad_norm": 0.9433818817785761, "learning_rate": 1.668683494398008e-06, "loss": 1.5628, "step": 6138 }, { "epoch": 0.819188684280758, "grad_norm": 0.955567446026117, "learning_rate": 1.6662939362143028e-06, "loss": 1.5812, "step": 6139 }, { "epoch": 0.8193221243661596, "grad_norm": 1.0316675276310674, "learning_rate": 1.6639059346720065e-06, "loss": 1.5408, "step": 6140 }, { "epoch": 0.8194555644515612, "grad_norm": 1.0994368830180041, "learning_rate": 1.6615194902171728e-06, "loss": 1.5808, "step": 6141 }, { "epoch": 0.8195890045369629, "grad_norm": 0.9318559913000551, "learning_rate": 1.659134603295558e-06, "loss": 1.5227, "step": 6142 }, { "epoch": 0.8197224446223645, "grad_norm": 0.9700753022715083, "learning_rate": 1.6567512743526383e-06, "loss": 1.5809, "step": 6143 }, { "epoch": 0.8198558847077662, "grad_norm": 0.9487897185182752, "learning_rate": 1.6543695038335882e-06, "loss": 1.5474, "step": 6144 }, { "epoch": 0.8199893247931679, "grad_norm": 0.9629831813661353, "learning_rate": 1.651989292183297e-06, "loss": 1.5777, "step": 6145 }, { "epoch": 0.8201227648785695, "grad_norm": 1.057479149875002, "learning_rate": 1.6496106398463574e-06, "loss": 1.5347, "step": 6146 }, { "epoch": 0.8202562049639712, "grad_norm": 0.933470698014775, "learning_rate": 1.6472335472670798e-06, "loss": 1.5632, "step": 6147 }, { "epoch": 0.8203896450493728, "grad_norm": 1.5917968282853159, "learning_rate": 1.6448580148894755e-06, "loss": 1.5685, "step": 6148 }, { "epoch": 0.8205230851347745, "grad_norm": 0.9576309077077291, "learning_rate": 1.6424840431572652e-06, "loss": 1.5568, "step": 6149 }, { "epoch": 0.8206565252201762, "grad_norm": 1.0240140958776593, "learning_rate": 1.6401116325138843e-06, "loss": 1.5005, "step": 6150 }, { "epoch": 0.8207899653055778, "grad_norm": 1.042576719135628, "learning_rate": 1.6377407834024694e-06, "loss": 1.5537, "step": 6151 }, { "epoch": 0.8209234053909794, "grad_norm": 0.9365442898192005, "learning_rate": 1.6353714962658684e-06, "loss": 1.5258, "step": 6152 }, { "epoch": 0.8210568454763811, "grad_norm": 6.035336114616692, "learning_rate": 1.6330037715466373e-06, "loss": 1.5402, "step": 6153 }, { "epoch": 0.8211902855617828, "grad_norm": 1.2568273540776527, "learning_rate": 1.630637609687037e-06, "loss": 1.5612, "step": 6154 }, { "epoch": 0.8213237256471844, "grad_norm": 0.9408521308120555, "learning_rate": 1.6282730111290446e-06, "loss": 1.5436, "step": 6155 }, { "epoch": 0.8214571657325861, "grad_norm": 0.9116594461537203, "learning_rate": 1.6259099763143383e-06, "loss": 1.532, "step": 6156 }, { "epoch": 0.8215906058179877, "grad_norm": 0.9640703464570622, "learning_rate": 1.623548505684306e-06, "loss": 1.5475, "step": 6157 }, { "epoch": 0.8217240459033894, "grad_norm": 0.9628540017226783, "learning_rate": 1.6211885996800426e-06, "loss": 1.5435, "step": 6158 }, { "epoch": 0.821857485988791, "grad_norm": 1.0175932870044642, "learning_rate": 1.6188302587423532e-06, "loss": 1.5133, "step": 6159 }, { "epoch": 0.8219909260741927, "grad_norm": 0.9328929841123439, "learning_rate": 1.6164734833117458e-06, "loss": 1.4553, "step": 6160 }, { "epoch": 0.8221243661595944, "grad_norm": 0.9326921991444765, "learning_rate": 1.6141182738284444e-06, "loss": 1.58, "step": 6161 }, { "epoch": 0.822257806244996, "grad_norm": 0.9198207496240384, "learning_rate": 1.611764630732372e-06, "loss": 1.5411, "step": 6162 }, { "epoch": 0.8223912463303976, "grad_norm": 0.9240771169421748, "learning_rate": 1.60941255446316e-06, "loss": 1.5323, "step": 6163 }, { "epoch": 0.8225246864157993, "grad_norm": 0.9435650366109184, "learning_rate": 1.607062045460156e-06, "loss": 1.5653, "step": 6164 }, { "epoch": 0.822658126501201, "grad_norm": 1.0152052475294857, "learning_rate": 1.6047131041624041e-06, "loss": 1.5638, "step": 6165 }, { "epoch": 0.8227915665866026, "grad_norm": 1.0524123141586792, "learning_rate": 1.6023657310086605e-06, "loss": 1.5782, "step": 6166 }, { "epoch": 0.8229250066720043, "grad_norm": 0.9311840684459297, "learning_rate": 1.600019926437385e-06, "loss": 1.5414, "step": 6167 }, { "epoch": 0.8230584467574059, "grad_norm": 0.9019457165743004, "learning_rate": 1.597675690886753e-06, "loss": 1.5351, "step": 6168 }, { "epoch": 0.8231918868428075, "grad_norm": 0.923450760055443, "learning_rate": 1.5953330247946375e-06, "loss": 1.5313, "step": 6169 }, { "epoch": 0.8233253269282093, "grad_norm": 1.2831560841203558, "learning_rate": 1.5929919285986195e-06, "loss": 1.5338, "step": 6170 }, { "epoch": 0.8234587670136109, "grad_norm": 0.9339060300388902, "learning_rate": 1.5906524027359948e-06, "loss": 1.5511, "step": 6171 }, { "epoch": 0.8235922070990126, "grad_norm": 0.935595003361667, "learning_rate": 1.5883144476437572e-06, "loss": 1.5072, "step": 6172 }, { "epoch": 0.8237256471844142, "grad_norm": 0.9158207412122499, "learning_rate": 1.5859780637586098e-06, "loss": 1.5835, "step": 6173 }, { "epoch": 0.8238590872698158, "grad_norm": 1.050328096524351, "learning_rate": 1.5836432515169608e-06, "loss": 1.5829, "step": 6174 }, { "epoch": 0.8239925273552176, "grad_norm": 1.0263704705140466, "learning_rate": 1.5813100113549307e-06, "loss": 1.5493, "step": 6175 }, { "epoch": 0.8241259674406192, "grad_norm": 0.9214587200849176, "learning_rate": 1.5789783437083406e-06, "loss": 1.5124, "step": 6176 }, { "epoch": 0.8242594075260208, "grad_norm": 0.9624753786839196, "learning_rate": 1.5766482490127176e-06, "loss": 1.5424, "step": 6177 }, { "epoch": 0.8243928476114225, "grad_norm": 0.9397643380771115, "learning_rate": 1.5743197277032974e-06, "loss": 1.5551, "step": 6178 }, { "epoch": 0.8245262876968241, "grad_norm": 1.0598447563742128, "learning_rate": 1.5719927802150236e-06, "loss": 1.5502, "step": 6179 }, { "epoch": 0.8246597277822257, "grad_norm": 1.3413789438510157, "learning_rate": 1.5696674069825425e-06, "loss": 1.5785, "step": 6180 }, { "epoch": 0.8247931678676275, "grad_norm": 1.0362130690683422, "learning_rate": 1.5673436084402039e-06, "loss": 1.5472, "step": 6181 }, { "epoch": 0.8249266079530291, "grad_norm": 1.190451521816014, "learning_rate": 1.565021385022073e-06, "loss": 1.4974, "step": 6182 }, { "epoch": 0.8250600480384307, "grad_norm": 1.0064225238687168, "learning_rate": 1.5627007371619107e-06, "loss": 1.5868, "step": 6183 }, { "epoch": 0.8251934881238324, "grad_norm": 0.9183532599911997, "learning_rate": 1.560381665293189e-06, "loss": 1.5492, "step": 6184 }, { "epoch": 0.825326928209234, "grad_norm": 0.9451229117253995, "learning_rate": 1.5580641698490805e-06, "loss": 1.5507, "step": 6185 }, { "epoch": 0.8254603682946358, "grad_norm": 0.9256676754994859, "learning_rate": 1.5557482512624733e-06, "loss": 1.5438, "step": 6186 }, { "epoch": 0.8255938083800374, "grad_norm": 1.0241913570296761, "learning_rate": 1.5534339099659512e-06, "loss": 1.5336, "step": 6187 }, { "epoch": 0.825727248465439, "grad_norm": 0.9648855141551541, "learning_rate": 1.551121146391804e-06, "loss": 1.5565, "step": 6188 }, { "epoch": 0.8258606885508407, "grad_norm": 0.9417547056164751, "learning_rate": 1.5488099609720353e-06, "loss": 1.5198, "step": 6189 }, { "epoch": 0.8259941286362423, "grad_norm": 1.019075853789808, "learning_rate": 1.546500354138346e-06, "loss": 1.5174, "step": 6190 }, { "epoch": 0.826127568721644, "grad_norm": 0.950534403894064, "learning_rate": 1.5441923263221426e-06, "loss": 1.5386, "step": 6191 }, { "epoch": 0.8262610088070457, "grad_norm": 0.9522247872246755, "learning_rate": 1.5418858779545387e-06, "loss": 1.5558, "step": 6192 }, { "epoch": 0.8263944488924473, "grad_norm": 1.14220578778169, "learning_rate": 1.5395810094663544e-06, "loss": 1.5794, "step": 6193 }, { "epoch": 0.8265278889778489, "grad_norm": 0.9516184349282555, "learning_rate": 1.537277721288113e-06, "loss": 1.5279, "step": 6194 }, { "epoch": 0.8266613290632506, "grad_norm": 0.9428708938228508, "learning_rate": 1.5349760138500414e-06, "loss": 1.5303, "step": 6195 }, { "epoch": 0.8267947691486522, "grad_norm": 0.906052569291839, "learning_rate": 1.5326758875820724e-06, "loss": 1.535, "step": 6196 }, { "epoch": 0.8269282092340539, "grad_norm": 0.9243034140621643, "learning_rate": 1.5303773429138414e-06, "loss": 1.5628, "step": 6197 }, { "epoch": 0.8270616493194556, "grad_norm": 0.9422165125262247, "learning_rate": 1.5280803802746947e-06, "loss": 1.5517, "step": 6198 }, { "epoch": 0.8271950894048572, "grad_norm": 1.0325744245704787, "learning_rate": 1.5257850000936768e-06, "loss": 1.5963, "step": 6199 }, { "epoch": 0.8273285294902589, "grad_norm": 1.1261838747600312, "learning_rate": 1.5234912027995374e-06, "loss": 1.5573, "step": 6200 }, { "epoch": 0.8274619695756605, "grad_norm": 1.0318114988538098, "learning_rate": 1.5211989888207335e-06, "loss": 1.5389, "step": 6201 }, { "epoch": 0.8275954096610622, "grad_norm": 0.9575726653864882, "learning_rate": 1.5189083585854213e-06, "loss": 1.5731, "step": 6202 }, { "epoch": 0.8277288497464639, "grad_norm": 0.9495250274991178, "learning_rate": 1.516619312521469e-06, "loss": 1.5819, "step": 6203 }, { "epoch": 0.8278622898318655, "grad_norm": 0.9001039404937866, "learning_rate": 1.514331851056442e-06, "loss": 1.5126, "step": 6204 }, { "epoch": 0.8279957299172671, "grad_norm": 0.9596548897156311, "learning_rate": 1.512045974617612e-06, "loss": 1.5236, "step": 6205 }, { "epoch": 0.8281291700026688, "grad_norm": 0.948592313867705, "learning_rate": 1.5097616836319528e-06, "loss": 1.5963, "step": 6206 }, { "epoch": 0.8282626100880705, "grad_norm": 0.9678395783216994, "learning_rate": 1.507478978526149e-06, "loss": 1.5601, "step": 6207 }, { "epoch": 0.8283960501734721, "grad_norm": 0.9282502916058483, "learning_rate": 1.5051978597265814e-06, "loss": 1.5221, "step": 6208 }, { "epoch": 0.8285294902588738, "grad_norm": 1.1106582385865, "learning_rate": 1.502918327659335e-06, "loss": 1.5289, "step": 6209 }, { "epoch": 0.8286629303442754, "grad_norm": 1.1265979346871835, "learning_rate": 1.5006403827502014e-06, "loss": 1.5341, "step": 6210 }, { "epoch": 0.828796370429677, "grad_norm": 1.035765628780522, "learning_rate": 1.4983640254246767e-06, "loss": 1.5652, "step": 6211 }, { "epoch": 0.8289298105150787, "grad_norm": 0.9611613457634671, "learning_rate": 1.4960892561079577e-06, "loss": 1.5555, "step": 6212 }, { "epoch": 0.8290632506004804, "grad_norm": 0.9101115150391339, "learning_rate": 1.4938160752249431e-06, "loss": 1.5279, "step": 6213 }, { "epoch": 0.8291966906858821, "grad_norm": 0.9444030552766006, "learning_rate": 1.4915444832002413e-06, "loss": 1.6253, "step": 6214 }, { "epoch": 0.8293301307712837, "grad_norm": 0.9679370076321139, "learning_rate": 1.4892744804581572e-06, "loss": 1.5131, "step": 6215 }, { "epoch": 0.8294635708566853, "grad_norm": 0.9687392610701684, "learning_rate": 1.4870060674227016e-06, "loss": 1.5411, "step": 6216 }, { "epoch": 0.829597010942087, "grad_norm": 0.9324868882302184, "learning_rate": 1.4847392445175867e-06, "loss": 1.5546, "step": 6217 }, { "epoch": 0.8297304510274887, "grad_norm": 0.9659785539366846, "learning_rate": 1.482474012166234e-06, "loss": 1.5354, "step": 6218 }, { "epoch": 0.8298638911128903, "grad_norm": 1.0403468201365482, "learning_rate": 1.4802103707917591e-06, "loss": 1.5384, "step": 6219 }, { "epoch": 0.829997331198292, "grad_norm": 0.9235060298849219, "learning_rate": 1.4779483208169832e-06, "loss": 1.4958, "step": 6220 }, { "epoch": 0.8301307712836936, "grad_norm": 0.9948663890358413, "learning_rate": 1.4756878626644367e-06, "loss": 1.5092, "step": 6221 }, { "epoch": 0.8302642113690952, "grad_norm": 1.2438225582984503, "learning_rate": 1.4734289967563442e-06, "loss": 1.5525, "step": 6222 }, { "epoch": 0.830397651454497, "grad_norm": 0.9394237729404794, "learning_rate": 1.471171723514636e-06, "loss": 1.5704, "step": 6223 }, { "epoch": 0.8305310915398986, "grad_norm": 0.9220696013800785, "learning_rate": 1.468916043360944e-06, "loss": 1.5649, "step": 6224 }, { "epoch": 0.8306645316253003, "grad_norm": 0.9661997734554147, "learning_rate": 1.4666619567166074e-06, "loss": 1.5551, "step": 6225 }, { "epoch": 0.8307979717107019, "grad_norm": 0.9713988388879101, "learning_rate": 1.4644094640026607e-06, "loss": 1.4893, "step": 6226 }, { "epoch": 0.8309314117961035, "grad_norm": 0.9362262546367968, "learning_rate": 1.4621585656398429e-06, "loss": 1.5044, "step": 6227 }, { "epoch": 0.8310648518815053, "grad_norm": 1.070256981646457, "learning_rate": 1.459909262048601e-06, "loss": 1.5317, "step": 6228 }, { "epoch": 0.8311982919669069, "grad_norm": 0.9309802597099771, "learning_rate": 1.4576615536490756e-06, "loss": 1.5314, "step": 6229 }, { "epoch": 0.8313317320523085, "grad_norm": 0.9534061214073728, "learning_rate": 1.4554154408611142e-06, "loss": 1.5432, "step": 6230 }, { "epoch": 0.8314651721377102, "grad_norm": 0.9260408668974777, "learning_rate": 1.4531709241042624e-06, "loss": 1.4923, "step": 6231 }, { "epoch": 0.8315986122231118, "grad_norm": 1.0984291256861187, "learning_rate": 1.4509280037977746e-06, "loss": 1.5121, "step": 6232 }, { "epoch": 0.8317320523085134, "grad_norm": 1.080816725382994, "learning_rate": 1.4486866803606003e-06, "loss": 1.5719, "step": 6233 }, { "epoch": 0.8318654923939152, "grad_norm": 0.9403182913647171, "learning_rate": 1.4464469542113924e-06, "loss": 1.5661, "step": 6234 }, { "epoch": 0.8319989324793168, "grad_norm": 0.9315844916031217, "learning_rate": 1.4442088257685105e-06, "loss": 1.5355, "step": 6235 }, { "epoch": 0.8321323725647184, "grad_norm": 0.9523132017213204, "learning_rate": 1.4419722954500071e-06, "loss": 1.6324, "step": 6236 }, { "epoch": 0.8322658126501201, "grad_norm": 0.9122663743215488, "learning_rate": 1.4397373636736435e-06, "loss": 1.51, "step": 6237 }, { "epoch": 0.8323992527355217, "grad_norm": 1.2517381014691573, "learning_rate": 1.4375040308568765e-06, "loss": 1.6326, "step": 6238 }, { "epoch": 0.8325326928209235, "grad_norm": 1.0826573600314662, "learning_rate": 1.4352722974168675e-06, "loss": 1.5585, "step": 6239 }, { "epoch": 0.8326661329063251, "grad_norm": 0.9348054260521418, "learning_rate": 1.4330421637704828e-06, "loss": 1.4989, "step": 6240 }, { "epoch": 0.8327995729917267, "grad_norm": 0.9451331002738911, "learning_rate": 1.4308136303342835e-06, "loss": 1.5498, "step": 6241 }, { "epoch": 0.8329330130771284, "grad_norm": 0.9455134123416408, "learning_rate": 1.4285866975245333e-06, "loss": 1.4996, "step": 6242 }, { "epoch": 0.83306645316253, "grad_norm": 0.9642660018609069, "learning_rate": 1.4263613657571995e-06, "loss": 1.5439, "step": 6243 }, { "epoch": 0.8331998932479316, "grad_norm": 1.2808082607240958, "learning_rate": 1.4241376354479475e-06, "loss": 1.5403, "step": 6244 }, { "epoch": 0.8333333333333334, "grad_norm": 1.8043818463976273, "learning_rate": 1.4219155070121438e-06, "loss": 1.5046, "step": 6245 }, { "epoch": 0.833466773418735, "grad_norm": 0.949954061324863, "learning_rate": 1.4196949808648597e-06, "loss": 1.5568, "step": 6246 }, { "epoch": 0.8336002135041366, "grad_norm": 0.9241430802845367, "learning_rate": 1.4174760574208634e-06, "loss": 1.5384, "step": 6247 }, { "epoch": 0.8337336535895383, "grad_norm": 0.9247044084518099, "learning_rate": 1.4152587370946235e-06, "loss": 1.5395, "step": 6248 }, { "epoch": 0.8338670936749399, "grad_norm": 0.9470481656442147, "learning_rate": 1.4130430203003088e-06, "loss": 1.5524, "step": 6249 }, { "epoch": 0.8340005337603416, "grad_norm": 0.95349918298535, "learning_rate": 1.4108289074517934e-06, "loss": 1.592, "step": 6250 }, { "epoch": 0.8341339738457433, "grad_norm": 0.930859710005307, "learning_rate": 1.4086163989626467e-06, "loss": 1.5467, "step": 6251 }, { "epoch": 0.8342674139311449, "grad_norm": 0.9827202874058659, "learning_rate": 1.4064054952461382e-06, "loss": 1.5415, "step": 6252 }, { "epoch": 0.8344008540165466, "grad_norm": 1.0609242939930341, "learning_rate": 1.404196196715244e-06, "loss": 1.5581, "step": 6253 }, { "epoch": 0.8345342941019482, "grad_norm": 0.9879789904417466, "learning_rate": 1.401988503782633e-06, "loss": 1.541, "step": 6254 }, { "epoch": 0.8346677341873499, "grad_norm": 0.9589864610692524, "learning_rate": 1.3997824168606777e-06, "loss": 1.5305, "step": 6255 }, { "epoch": 0.8348011742727516, "grad_norm": 0.929209062077639, "learning_rate": 1.397577936361446e-06, "loss": 1.5345, "step": 6256 }, { "epoch": 0.8349346143581532, "grad_norm": 0.9515351783274919, "learning_rate": 1.3953750626967178e-06, "loss": 1.5299, "step": 6257 }, { "epoch": 0.8350680544435548, "grad_norm": 1.000540782983064, "learning_rate": 1.39317379627796e-06, "loss": 1.5766, "step": 6258 }, { "epoch": 0.8352014945289565, "grad_norm": 1.0567054179998463, "learning_rate": 1.3909741375163422e-06, "loss": 1.5723, "step": 6259 }, { "epoch": 0.8353349346143581, "grad_norm": 0.9549905329939615, "learning_rate": 1.3887760868227396e-06, "loss": 1.5789, "step": 6260 }, { "epoch": 0.8354683746997598, "grad_norm": 0.9872576585804954, "learning_rate": 1.386579644607723e-06, "loss": 1.5734, "step": 6261 }, { "epoch": 0.8356018147851615, "grad_norm": 0.9367144021734644, "learning_rate": 1.3843848112815594e-06, "loss": 1.5708, "step": 6262 }, { "epoch": 0.8357352548705631, "grad_norm": 0.9513673423313426, "learning_rate": 1.3821915872542202e-06, "loss": 1.5313, "step": 6263 }, { "epoch": 0.8358686949559647, "grad_norm": 0.9567825688256574, "learning_rate": 1.3799999729353764e-06, "loss": 1.5118, "step": 6264 }, { "epoch": 0.8360021350413664, "grad_norm": 1.0863657842635757, "learning_rate": 1.3778099687343948e-06, "loss": 1.5669, "step": 6265 }, { "epoch": 0.8361355751267681, "grad_norm": 0.9751182715087741, "learning_rate": 1.375621575060343e-06, "loss": 1.5848, "step": 6266 }, { "epoch": 0.8362690152121698, "grad_norm": 0.9552963864700986, "learning_rate": 1.3734347923219893e-06, "loss": 1.5594, "step": 6267 }, { "epoch": 0.8364024552975714, "grad_norm": 0.9334563882233021, "learning_rate": 1.3712496209278004e-06, "loss": 1.5085, "step": 6268 }, { "epoch": 0.836535895382973, "grad_norm": 0.9418447706556196, "learning_rate": 1.3690660612859397e-06, "loss": 1.484, "step": 6269 }, { "epoch": 0.8366693354683747, "grad_norm": 1.2452469016859455, "learning_rate": 1.36688411380427e-06, "loss": 1.5939, "step": 6270 }, { "epoch": 0.8368027755537764, "grad_norm": 0.9630484251295978, "learning_rate": 1.3647037788903582e-06, "loss": 1.5588, "step": 6271 }, { "epoch": 0.836936215639178, "grad_norm": 0.9163101746800517, "learning_rate": 1.3625250569514636e-06, "loss": 1.543, "step": 6272 }, { "epoch": 0.8370696557245797, "grad_norm": 1.0159540192644183, "learning_rate": 1.3603479483945482e-06, "loss": 1.5317, "step": 6273 }, { "epoch": 0.8372030958099813, "grad_norm": 0.9495771661136421, "learning_rate": 1.3581724536262664e-06, "loss": 1.5721, "step": 6274 }, { "epoch": 0.8373365358953829, "grad_norm": 0.9259908173056178, "learning_rate": 1.3559985730529824e-06, "loss": 1.5375, "step": 6275 }, { "epoch": 0.8374699759807847, "grad_norm": 1.0746035612986713, "learning_rate": 1.353826307080749e-06, "loss": 1.545, "step": 6276 }, { "epoch": 0.8376034160661863, "grad_norm": 1.206161587602088, "learning_rate": 1.3516556561153182e-06, "loss": 1.4896, "step": 6277 }, { "epoch": 0.8377368561515879, "grad_norm": 0.9394100024042479, "learning_rate": 1.3494866205621492e-06, "loss": 1.569, "step": 6278 }, { "epoch": 0.8378702962369896, "grad_norm": 0.9298549718077799, "learning_rate": 1.347319200826389e-06, "loss": 1.5184, "step": 6279 }, { "epoch": 0.8380037363223912, "grad_norm": 0.9364031759470136, "learning_rate": 1.3451533973128873e-06, "loss": 1.5611, "step": 6280 }, { "epoch": 0.838137176407793, "grad_norm": 0.9438099018882347, "learning_rate": 1.3429892104261922e-06, "loss": 1.5884, "step": 6281 }, { "epoch": 0.8382706164931946, "grad_norm": 1.0963915983521988, "learning_rate": 1.3408266405705462e-06, "loss": 1.5056, "step": 6282 }, { "epoch": 0.8384040565785962, "grad_norm": 1.0553961263791714, "learning_rate": 1.3386656881498982e-06, "loss": 1.5072, "step": 6283 }, { "epoch": 0.8385374966639979, "grad_norm": 0.9313093194946619, "learning_rate": 1.3365063535678868e-06, "loss": 1.5504, "step": 6284 }, { "epoch": 0.8386709367493995, "grad_norm": 0.9472337824960516, "learning_rate": 1.3343486372278502e-06, "loss": 1.5925, "step": 6285 }, { "epoch": 0.8388043768348011, "grad_norm": 0.9563239409348798, "learning_rate": 1.3321925395328261e-06, "loss": 1.4905, "step": 6286 }, { "epoch": 0.8389378169202029, "grad_norm": 0.9193497426197157, "learning_rate": 1.33003806088555e-06, "loss": 1.5251, "step": 6287 }, { "epoch": 0.8390712570056045, "grad_norm": 0.9463362147811565, "learning_rate": 1.3278852016884491e-06, "loss": 1.5755, "step": 6288 }, { "epoch": 0.8392046970910061, "grad_norm": 1.0465969350369784, "learning_rate": 1.3257339623436606e-06, "loss": 1.558, "step": 6289 }, { "epoch": 0.8393381371764078, "grad_norm": 0.9078117750928425, "learning_rate": 1.323584343253007e-06, "loss": 1.5181, "step": 6290 }, { "epoch": 0.8394715772618094, "grad_norm": 0.9471706272387931, "learning_rate": 1.3214363448180111e-06, "loss": 1.5852, "step": 6291 }, { "epoch": 0.839605017347211, "grad_norm": 0.9438263601613096, "learning_rate": 1.3192899674398985e-06, "loss": 1.5328, "step": 6292 }, { "epoch": 0.8397384574326128, "grad_norm": 12.634716581956164, "learning_rate": 1.317145211519587e-06, "loss": 1.551, "step": 6293 }, { "epoch": 0.8398718975180144, "grad_norm": 0.947237397048608, "learning_rate": 1.315002077457692e-06, "loss": 1.523, "step": 6294 }, { "epoch": 0.8400053376034161, "grad_norm": 0.9268524675831373, "learning_rate": 1.3128605656545245e-06, "loss": 1.5293, "step": 6295 }, { "epoch": 0.8401387776888177, "grad_norm": 0.9206635334147892, "learning_rate": 1.3107206765100987e-06, "loss": 1.5504, "step": 6296 }, { "epoch": 0.8402722177742193, "grad_norm": 0.9437804573296683, "learning_rate": 1.3085824104241185e-06, "loss": 1.5366, "step": 6297 }, { "epoch": 0.8404056578596211, "grad_norm": 0.9532930752867279, "learning_rate": 1.3064457677959874e-06, "loss": 1.5794, "step": 6298 }, { "epoch": 0.8405390979450227, "grad_norm": 0.9331423826329348, "learning_rate": 1.3043107490248086e-06, "loss": 1.5141, "step": 6299 }, { "epoch": 0.8406725380304243, "grad_norm": 0.9831369699762285, "learning_rate": 1.3021773545093775e-06, "loss": 1.5939, "step": 6300 }, { "epoch": 0.840805978115826, "grad_norm": 0.9671730377179253, "learning_rate": 1.3000455846481886e-06, "loss": 1.5556, "step": 6301 }, { "epoch": 0.8409394182012276, "grad_norm": 0.9400385680698677, "learning_rate": 1.29791543983943e-06, "loss": 1.5407, "step": 6302 }, { "epoch": 0.8410728582866293, "grad_norm": 0.9409729894045126, "learning_rate": 1.2957869204809925e-06, "loss": 1.5392, "step": 6303 }, { "epoch": 0.841206298372031, "grad_norm": 0.94097206563248, "learning_rate": 1.2936600269704559e-06, "loss": 1.5605, "step": 6304 }, { "epoch": 0.8413397384574326, "grad_norm": 0.9949943289521407, "learning_rate": 1.291534759705102e-06, "loss": 1.6005, "step": 6305 }, { "epoch": 0.8414731785428343, "grad_norm": 0.9355636029437794, "learning_rate": 1.2894111190819025e-06, "loss": 1.5574, "step": 6306 }, { "epoch": 0.8416066186282359, "grad_norm": 0.9725584670762065, "learning_rate": 1.2872891054975346e-06, "loss": 1.5632, "step": 6307 }, { "epoch": 0.8417400587136376, "grad_norm": 0.9010547164650611, "learning_rate": 1.2851687193483642e-06, "loss": 1.4956, "step": 6308 }, { "epoch": 0.8418734987990393, "grad_norm": 0.975052957442709, "learning_rate": 1.2830499610304526e-06, "loss": 1.6136, "step": 6309 }, { "epoch": 0.8420069388844409, "grad_norm": 0.9766365640836961, "learning_rate": 1.280932830939564e-06, "loss": 1.5266, "step": 6310 }, { "epoch": 0.8421403789698425, "grad_norm": 0.9456532975205986, "learning_rate": 1.2788173294711526e-06, "loss": 1.5745, "step": 6311 }, { "epoch": 0.8422738190552442, "grad_norm": 0.9410865383508794, "learning_rate": 1.2767034570203685e-06, "loss": 1.5397, "step": 6312 }, { "epoch": 0.8424072591406458, "grad_norm": 0.9832507755872817, "learning_rate": 1.2745912139820594e-06, "loss": 1.589, "step": 6313 }, { "epoch": 0.8425406992260475, "grad_norm": 0.9510871209768055, "learning_rate": 1.2724806007507706e-06, "loss": 1.5622, "step": 6314 }, { "epoch": 0.8426741393114492, "grad_norm": 0.9418455606671381, "learning_rate": 1.2703716177207393e-06, "loss": 1.5386, "step": 6315 }, { "epoch": 0.8428075793968508, "grad_norm": 0.9267882316614677, "learning_rate": 1.2682642652858968e-06, "loss": 1.5246, "step": 6316 }, { "epoch": 0.8429410194822524, "grad_norm": 0.9241066105531126, "learning_rate": 1.2661585438398771e-06, "loss": 1.5255, "step": 6317 }, { "epoch": 0.8430744595676541, "grad_norm": 0.9299217006327872, "learning_rate": 1.2640544537760035e-06, "loss": 1.5331, "step": 6318 }, { "epoch": 0.8432078996530558, "grad_norm": 0.9460477011701305, "learning_rate": 1.261951995487295e-06, "loss": 1.5356, "step": 6319 }, { "epoch": 0.8433413397384575, "grad_norm": 0.99568183474739, "learning_rate": 1.259851169366465e-06, "loss": 1.5396, "step": 6320 }, { "epoch": 0.8434747798238591, "grad_norm": 0.9531319408928621, "learning_rate": 1.2577519758059286e-06, "loss": 1.5645, "step": 6321 }, { "epoch": 0.8436082199092607, "grad_norm": 1.4786773551886767, "learning_rate": 1.255654415197789e-06, "loss": 1.5033, "step": 6322 }, { "epoch": 0.8437416599946624, "grad_norm": 0.9837349009581106, "learning_rate": 1.2535584879338469e-06, "loss": 1.5878, "step": 6323 }, { "epoch": 0.8438751000800641, "grad_norm": 0.9380148129468954, "learning_rate": 1.2514641944055961e-06, "loss": 1.5127, "step": 6324 }, { "epoch": 0.8440085401654657, "grad_norm": 0.9800681388655729, "learning_rate": 1.2493715350042267e-06, "loss": 1.5177, "step": 6325 }, { "epoch": 0.8441419802508674, "grad_norm": 0.9363139844260511, "learning_rate": 1.2472805101206265e-06, "loss": 1.5309, "step": 6326 }, { "epoch": 0.844275420336269, "grad_norm": 0.9716477758023239, "learning_rate": 1.2451911201453747e-06, "loss": 1.5331, "step": 6327 }, { "epoch": 0.8444088604216706, "grad_norm": 1.037355252448747, "learning_rate": 1.243103365468743e-06, "loss": 1.5556, "step": 6328 }, { "epoch": 0.8445423005070724, "grad_norm": 1.127301529246472, "learning_rate": 1.2410172464807024e-06, "loss": 1.5456, "step": 6329 }, { "epoch": 0.844675740592474, "grad_norm": 0.9421048807023837, "learning_rate": 1.2389327635709136e-06, "loss": 1.4901, "step": 6330 }, { "epoch": 0.8448091806778756, "grad_norm": 1.1002964846418235, "learning_rate": 1.2368499171287374e-06, "loss": 1.6021, "step": 6331 }, { "epoch": 0.8449426207632773, "grad_norm": 0.9387328752601601, "learning_rate": 1.2347687075432246e-06, "loss": 1.5047, "step": 6332 }, { "epoch": 0.8450760608486789, "grad_norm": 0.9123192171247185, "learning_rate": 1.2326891352031223e-06, "loss": 1.5134, "step": 6333 }, { "epoch": 0.8452095009340806, "grad_norm": 1.1014224330465954, "learning_rate": 1.2306112004968662e-06, "loss": 1.5769, "step": 6334 }, { "epoch": 0.8453429410194823, "grad_norm": 0.9516250282510863, "learning_rate": 1.2285349038125981e-06, "loss": 1.585, "step": 6335 }, { "epoch": 0.8454763811048839, "grad_norm": 0.9572784838289615, "learning_rate": 1.2264602455381424e-06, "loss": 1.5436, "step": 6336 }, { "epoch": 0.8456098211902856, "grad_norm": 0.927276956609323, "learning_rate": 1.2243872260610223e-06, "loss": 1.4966, "step": 6337 }, { "epoch": 0.8457432612756872, "grad_norm": 0.9439895084993872, "learning_rate": 1.2223158457684526e-06, "loss": 1.5114, "step": 6338 }, { "epoch": 0.8458767013610888, "grad_norm": 1.0329741808696042, "learning_rate": 1.2202461050473469e-06, "loss": 1.5154, "step": 6339 }, { "epoch": 0.8460101414464906, "grad_norm": 0.9806884542561266, "learning_rate": 1.2181780042843071e-06, "loss": 1.4914, "step": 6340 }, { "epoch": 0.8461435815318922, "grad_norm": 1.0466476181426838, "learning_rate": 1.2161115438656301e-06, "loss": 1.5529, "step": 6341 }, { "epoch": 0.8462770216172938, "grad_norm": 0.9141443517568483, "learning_rate": 1.2140467241773103e-06, "loss": 1.513, "step": 6342 }, { "epoch": 0.8464104617026955, "grad_norm": 0.9944934691896574, "learning_rate": 1.2119835456050311e-06, "loss": 1.5567, "step": 6343 }, { "epoch": 0.8465439017880971, "grad_norm": 1.0618279777174555, "learning_rate": 1.2099220085341689e-06, "loss": 1.5453, "step": 6344 }, { "epoch": 0.8466773418734987, "grad_norm": 0.9826611891622069, "learning_rate": 1.2078621133497958e-06, "loss": 1.5373, "step": 6345 }, { "epoch": 0.8468107819589005, "grad_norm": 0.9429543528239474, "learning_rate": 1.2058038604366796e-06, "loss": 1.5587, "step": 6346 }, { "epoch": 0.8469442220443021, "grad_norm": 0.9780300370698021, "learning_rate": 1.2037472501792757e-06, "loss": 1.5841, "step": 6347 }, { "epoch": 0.8470776621297038, "grad_norm": 0.9485414768814703, "learning_rate": 1.201692282961735e-06, "loss": 1.532, "step": 6348 }, { "epoch": 0.8472111022151054, "grad_norm": 0.9528273449657707, "learning_rate": 1.1996389591679047e-06, "loss": 1.5611, "step": 6349 }, { "epoch": 0.847344542300507, "grad_norm": 0.9369740412100928, "learning_rate": 1.1975872791813225e-06, "loss": 1.5032, "step": 6350 }, { "epoch": 0.8474779823859088, "grad_norm": 0.9223426425400875, "learning_rate": 1.1955372433852163e-06, "loss": 1.5512, "step": 6351 }, { "epoch": 0.8476114224713104, "grad_norm": 0.9430681441104803, "learning_rate": 1.1934888521625076e-06, "loss": 1.561, "step": 6352 }, { "epoch": 0.847744862556712, "grad_norm": 0.969082370915136, "learning_rate": 1.1914421058958192e-06, "loss": 1.5353, "step": 6353 }, { "epoch": 0.8478783026421137, "grad_norm": 1.0413763556214914, "learning_rate": 1.1893970049674553e-06, "loss": 1.5652, "step": 6354 }, { "epoch": 0.8480117427275153, "grad_norm": 0.9204143577512675, "learning_rate": 1.1873535497594157e-06, "loss": 1.5406, "step": 6355 }, { "epoch": 0.848145182812917, "grad_norm": 1.1508462678819407, "learning_rate": 1.185311740653401e-06, "loss": 1.5223, "step": 6356 }, { "epoch": 0.8482786228983187, "grad_norm": 1.0061378039252045, "learning_rate": 1.1832715780307924e-06, "loss": 1.5438, "step": 6357 }, { "epoch": 0.8484120629837203, "grad_norm": 1.0156019695357563, "learning_rate": 1.181233062272672e-06, "loss": 1.5484, "step": 6358 }, { "epoch": 0.8485455030691219, "grad_norm": 0.9583797003702161, "learning_rate": 1.1791961937598073e-06, "loss": 1.5511, "step": 6359 }, { "epoch": 0.8486789431545236, "grad_norm": 1.77815502053269, "learning_rate": 1.1771609728726674e-06, "loss": 1.5944, "step": 6360 }, { "epoch": 0.8488123832399252, "grad_norm": 0.9601898228216447, "learning_rate": 1.1751273999914059e-06, "loss": 1.5584, "step": 6361 }, { "epoch": 0.848945823325327, "grad_norm": 1.0337878007106784, "learning_rate": 1.173095475495869e-06, "loss": 1.5255, "step": 6362 }, { "epoch": 0.8490792634107286, "grad_norm": 1.069797310519057, "learning_rate": 1.171065199765602e-06, "loss": 1.5929, "step": 6363 }, { "epoch": 0.8492127034961302, "grad_norm": 1.2157650247272633, "learning_rate": 1.1690365731798337e-06, "loss": 1.5466, "step": 6364 }, { "epoch": 0.8493461435815319, "grad_norm": 1.0078551254584747, "learning_rate": 1.1670095961174889e-06, "loss": 1.5735, "step": 6365 }, { "epoch": 0.8494795836669335, "grad_norm": 0.9609917847295425, "learning_rate": 1.1649842689571855e-06, "loss": 1.5789, "step": 6366 }, { "epoch": 0.8496130237523352, "grad_norm": 0.9255278292579112, "learning_rate": 1.1629605920772292e-06, "loss": 1.5726, "step": 6367 }, { "epoch": 0.8497464638377369, "grad_norm": 0.9461037345263595, "learning_rate": 1.1609385658556183e-06, "loss": 1.572, "step": 6368 }, { "epoch": 0.8498799039231385, "grad_norm": 0.9622357864602324, "learning_rate": 1.1589181906700498e-06, "loss": 1.5018, "step": 6369 }, { "epoch": 0.8500133440085401, "grad_norm": 0.9486160725025564, "learning_rate": 1.156899466897904e-06, "loss": 1.5379, "step": 6370 }, { "epoch": 0.8501467840939418, "grad_norm": 1.0361906044731701, "learning_rate": 1.1548823949162546e-06, "loss": 1.5514, "step": 6371 }, { "epoch": 0.8502802241793435, "grad_norm": 0.9657512138409541, "learning_rate": 1.1528669751018684e-06, "loss": 1.508, "step": 6372 }, { "epoch": 0.8504136642647452, "grad_norm": 0.9981316716898595, "learning_rate": 1.1508532078312007e-06, "loss": 1.5569, "step": 6373 }, { "epoch": 0.8505471043501468, "grad_norm": 0.9476506985506088, "learning_rate": 1.1488410934804051e-06, "loss": 1.539, "step": 6374 }, { "epoch": 0.8506805444355484, "grad_norm": 1.026783945248529, "learning_rate": 1.1468306324253187e-06, "loss": 1.55, "step": 6375 }, { "epoch": 0.8508139845209501, "grad_norm": 1.0018298913831105, "learning_rate": 1.1448218250414734e-06, "loss": 1.5283, "step": 6376 }, { "epoch": 0.8509474246063518, "grad_norm": 0.9541809577080766, "learning_rate": 1.1428146717040888e-06, "loss": 1.4845, "step": 6377 }, { "epoch": 0.8510808646917534, "grad_norm": 0.94168687520855, "learning_rate": 1.1408091727880822e-06, "loss": 1.5261, "step": 6378 }, { "epoch": 0.8512143047771551, "grad_norm": 0.9534267240246544, "learning_rate": 1.1388053286680566e-06, "loss": 1.5505, "step": 6379 }, { "epoch": 0.8513477448625567, "grad_norm": 0.9608021930487494, "learning_rate": 1.1368031397183055e-06, "loss": 1.5709, "step": 6380 }, { "epoch": 0.8514811849479583, "grad_norm": 1.0827148794751535, "learning_rate": 1.1348026063128193e-06, "loss": 1.54, "step": 6381 }, { "epoch": 0.85161462503336, "grad_norm": 0.9496776082560817, "learning_rate": 1.1328037288252714e-06, "loss": 1.5394, "step": 6382 }, { "epoch": 0.8517480651187617, "grad_norm": 0.925719277344387, "learning_rate": 1.1308065076290298e-06, "loss": 1.5546, "step": 6383 }, { "epoch": 0.8518815052041633, "grad_norm": 0.99649007906056, "learning_rate": 1.1288109430971516e-06, "loss": 1.5796, "step": 6384 }, { "epoch": 0.852014945289565, "grad_norm": 0.9217233177569367, "learning_rate": 1.1268170356023889e-06, "loss": 1.4987, "step": 6385 }, { "epoch": 0.8521483853749666, "grad_norm": 0.9795549846257534, "learning_rate": 1.124824785517179e-06, "loss": 1.498, "step": 6386 }, { "epoch": 0.8522818254603683, "grad_norm": 0.9466460505441463, "learning_rate": 1.122834193213649e-06, "loss": 1.5069, "step": 6387 }, { "epoch": 0.85241526554577, "grad_norm": 0.9622325848501593, "learning_rate": 1.1208452590636243e-06, "loss": 1.572, "step": 6388 }, { "epoch": 0.8525487056311716, "grad_norm": 0.907176146573117, "learning_rate": 1.1188579834386116e-06, "loss": 1.5447, "step": 6389 }, { "epoch": 0.8526821457165733, "grad_norm": 1.227317725567885, "learning_rate": 1.1168723667098115e-06, "loss": 1.5481, "step": 6390 }, { "epoch": 0.8528155858019749, "grad_norm": 0.9872294854469825, "learning_rate": 1.1148884092481138e-06, "loss": 1.5726, "step": 6391 }, { "epoch": 0.8529490258873765, "grad_norm": 0.9774988409822314, "learning_rate": 1.1129061114241024e-06, "loss": 1.5505, "step": 6392 }, { "epoch": 0.8530824659727783, "grad_norm": 0.9342823752805767, "learning_rate": 1.1109254736080456e-06, "loss": 1.5665, "step": 6393 }, { "epoch": 0.8532159060581799, "grad_norm": 0.945834873384594, "learning_rate": 1.1089464961699025e-06, "loss": 1.5237, "step": 6394 }, { "epoch": 0.8533493461435815, "grad_norm": 1.0390817853688066, "learning_rate": 1.106969179479328e-06, "loss": 1.559, "step": 6395 }, { "epoch": 0.8534827862289832, "grad_norm": 0.9915119704103906, "learning_rate": 1.1049935239056598e-06, "loss": 1.5479, "step": 6396 }, { "epoch": 0.8536162263143848, "grad_norm": 0.9778650816854455, "learning_rate": 1.1030195298179269e-06, "loss": 1.5655, "step": 6397 }, { "epoch": 0.8537496663997864, "grad_norm": 0.9412423616517623, "learning_rate": 1.101047197584849e-06, "loss": 1.5287, "step": 6398 }, { "epoch": 0.8538831064851882, "grad_norm": 0.9125544805452201, "learning_rate": 1.0990765275748383e-06, "loss": 1.5672, "step": 6399 }, { "epoch": 0.8540165465705898, "grad_norm": 0.9887312218815218, "learning_rate": 1.09710752015599e-06, "loss": 1.5265, "step": 6400 }, { "epoch": 0.8541499866559915, "grad_norm": 1.0483588182325054, "learning_rate": 1.0951401756960934e-06, "loss": 1.5733, "step": 6401 }, { "epoch": 0.8542834267413931, "grad_norm": 0.9407674603973352, "learning_rate": 1.0931744945626276e-06, "loss": 1.5581, "step": 6402 }, { "epoch": 0.8544168668267947, "grad_norm": 0.9313568972065337, "learning_rate": 1.0912104771227584e-06, "loss": 1.5329, "step": 6403 }, { "epoch": 0.8545503069121965, "grad_norm": 0.9194823821651573, "learning_rate": 1.0892481237433405e-06, "loss": 1.5163, "step": 6404 }, { "epoch": 0.8546837469975981, "grad_norm": 1.0427301724789466, "learning_rate": 1.08728743479092e-06, "loss": 1.5137, "step": 6405 }, { "epoch": 0.8548171870829997, "grad_norm": 0.9377269194420035, "learning_rate": 1.085328410631733e-06, "loss": 1.4877, "step": 6406 }, { "epoch": 0.8549506271684014, "grad_norm": 1.021530858485102, "learning_rate": 1.0833710516317009e-06, "loss": 1.599, "step": 6407 }, { "epoch": 0.855084067253803, "grad_norm": 0.9614653085487166, "learning_rate": 1.081415358156438e-06, "loss": 1.562, "step": 6408 }, { "epoch": 0.8552175073392047, "grad_norm": 0.9332515315220702, "learning_rate": 1.0794613305712432e-06, "loss": 1.5861, "step": 6409 }, { "epoch": 0.8553509474246064, "grad_norm": 0.9465013594113041, "learning_rate": 1.0775089692411057e-06, "loss": 1.544, "step": 6410 }, { "epoch": 0.855484387510008, "grad_norm": 0.9299855464015608, "learning_rate": 1.075558274530709e-06, "loss": 1.5169, "step": 6411 }, { "epoch": 0.8556178275954096, "grad_norm": 0.9633862952339556, "learning_rate": 1.073609246804418e-06, "loss": 1.5498, "step": 6412 }, { "epoch": 0.8557512676808113, "grad_norm": 0.9683201294799403, "learning_rate": 1.0716618864262885e-06, "loss": 1.4671, "step": 6413 }, { "epoch": 0.855884707766213, "grad_norm": 0.9649479650839727, "learning_rate": 1.0697161937600665e-06, "loss": 1.57, "step": 6414 }, { "epoch": 0.8560181478516147, "grad_norm": 0.9185273611508583, "learning_rate": 1.0677721691691833e-06, "loss": 1.5408, "step": 6415 }, { "epoch": 0.8561515879370163, "grad_norm": 0.9695842140595766, "learning_rate": 1.0658298130167599e-06, "loss": 1.507, "step": 6416 }, { "epoch": 0.8562850280224179, "grad_norm": 0.9303971531538695, "learning_rate": 1.0638891256656103e-06, "loss": 1.4942, "step": 6417 }, { "epoch": 0.8564184681078196, "grad_norm": 1.0549407029198568, "learning_rate": 1.0619501074782313e-06, "loss": 1.5486, "step": 6418 }, { "epoch": 0.8565519081932212, "grad_norm": 3.80436306285889, "learning_rate": 1.0600127588168063e-06, "loss": 1.5436, "step": 6419 }, { "epoch": 0.8566853482786229, "grad_norm": 0.9231151613693299, "learning_rate": 1.0580770800432139e-06, "loss": 1.5291, "step": 6420 }, { "epoch": 0.8568187883640246, "grad_norm": 1.076962491434642, "learning_rate": 1.0561430715190158e-06, "loss": 1.5237, "step": 6421 }, { "epoch": 0.8569522284494262, "grad_norm": 0.9284052147555532, "learning_rate": 1.054210733605462e-06, "loss": 1.5671, "step": 6422 }, { "epoch": 0.8570856685348278, "grad_norm": 1.013864860833294, "learning_rate": 1.0522800666634891e-06, "loss": 1.513, "step": 6423 }, { "epoch": 0.8572191086202295, "grad_norm": 1.0640857162799244, "learning_rate": 1.050351071053729e-06, "loss": 1.5686, "step": 6424 }, { "epoch": 0.8573525487056312, "grad_norm": 1.6541230030547414, "learning_rate": 1.0484237471364922e-06, "loss": 1.5551, "step": 6425 }, { "epoch": 0.8574859887910328, "grad_norm": 1.0049096936614765, "learning_rate": 1.0464980952717807e-06, "loss": 1.507, "step": 6426 }, { "epoch": 0.8576194288764345, "grad_norm": 0.9371213973946467, "learning_rate": 1.0445741158192879e-06, "loss": 1.5747, "step": 6427 }, { "epoch": 0.8577528689618361, "grad_norm": 1.2240293324907436, "learning_rate": 1.0426518091383886e-06, "loss": 1.6061, "step": 6428 }, { "epoch": 0.8578863090472378, "grad_norm": 0.9655483000092794, "learning_rate": 1.0407311755881477e-06, "loss": 1.5201, "step": 6429 }, { "epoch": 0.8580197491326395, "grad_norm": 1.0963997227966682, "learning_rate": 1.0388122155273162e-06, "loss": 1.54, "step": 6430 }, { "epoch": 0.8581531892180411, "grad_norm": 1.0333139188267229, "learning_rate": 1.0368949293143383e-06, "loss": 1.5161, "step": 6431 }, { "epoch": 0.8582866293034428, "grad_norm": 0.9470028994674716, "learning_rate": 1.0349793173073386e-06, "loss": 1.5838, "step": 6432 }, { "epoch": 0.8584200693888444, "grad_norm": 0.945024534455689, "learning_rate": 1.0330653798641288e-06, "loss": 1.5591, "step": 6433 }, { "epoch": 0.858553509474246, "grad_norm": 0.9404662766746317, "learning_rate": 1.0311531173422172e-06, "loss": 1.5622, "step": 6434 }, { "epoch": 0.8586869495596477, "grad_norm": 0.9927286739349651, "learning_rate": 1.0292425300987885e-06, "loss": 1.5112, "step": 6435 }, { "epoch": 0.8588203896450494, "grad_norm": 0.9324477538639491, "learning_rate": 1.027333618490719e-06, "loss": 1.5527, "step": 6436 }, { "epoch": 0.858953829730451, "grad_norm": 0.9841322098310834, "learning_rate": 1.0254263828745704e-06, "loss": 1.5666, "step": 6437 }, { "epoch": 0.8590872698158527, "grad_norm": 0.9705280053887522, "learning_rate": 1.0235208236065964e-06, "loss": 1.4959, "step": 6438 }, { "epoch": 0.8592207099012543, "grad_norm": 0.961832358126531, "learning_rate": 1.0216169410427312e-06, "loss": 1.5583, "step": 6439 }, { "epoch": 0.859354149986656, "grad_norm": 0.937404869373422, "learning_rate": 1.0197147355385983e-06, "loss": 1.4833, "step": 6440 }, { "epoch": 0.8594875900720577, "grad_norm": 0.9488407481430322, "learning_rate": 1.0178142074495068e-06, "loss": 1.5485, "step": 6441 }, { "epoch": 0.8596210301574593, "grad_norm": 0.9318508881776466, "learning_rate": 1.015915357130457e-06, "loss": 1.5505, "step": 6442 }, { "epoch": 0.859754470242861, "grad_norm": 0.9180739689951576, "learning_rate": 1.0140181849361307e-06, "loss": 1.4841, "step": 6443 }, { "epoch": 0.8598879103282626, "grad_norm": 0.9168330491896693, "learning_rate": 1.012122691220897e-06, "loss": 1.4934, "step": 6444 }, { "epoch": 0.8600213504136642, "grad_norm": 0.936550488916631, "learning_rate": 1.0102288763388147e-06, "loss": 1.5639, "step": 6445 }, { "epoch": 0.860154790499066, "grad_norm": 1.2407428273088301, "learning_rate": 1.0083367406436263e-06, "loss": 1.5286, "step": 6446 }, { "epoch": 0.8602882305844676, "grad_norm": 0.9564893007922771, "learning_rate": 1.0064462844887613e-06, "loss": 1.5186, "step": 6447 }, { "epoch": 0.8604216706698692, "grad_norm": 0.9357452140960504, "learning_rate": 1.004557508227333e-06, "loss": 1.5172, "step": 6448 }, { "epoch": 0.8605551107552709, "grad_norm": 0.8972018016158957, "learning_rate": 1.0026704122121466e-06, "loss": 1.5275, "step": 6449 }, { "epoch": 0.8606885508406725, "grad_norm": 0.9998053596687703, "learning_rate": 1.0007849967956884e-06, "loss": 1.5544, "step": 6450 }, { "epoch": 0.8608219909260741, "grad_norm": 0.9489874805787626, "learning_rate": 9.989012623301343e-07, "loss": 1.5508, "step": 6451 }, { "epoch": 0.8609554310114759, "grad_norm": 0.9471306443210268, "learning_rate": 9.970192091673414e-07, "loss": 1.534, "step": 6452 }, { "epoch": 0.8610888710968775, "grad_norm": 0.9137682040739373, "learning_rate": 9.951388376588567e-07, "loss": 1.5459, "step": 6453 }, { "epoch": 0.8612223111822792, "grad_norm": 0.9641556651125667, "learning_rate": 9.932601481559146e-07, "loss": 1.5387, "step": 6454 }, { "epoch": 0.8613557512676808, "grad_norm": 0.93922770841347, "learning_rate": 9.91383141009431e-07, "loss": 1.5581, "step": 6455 }, { "epoch": 0.8614891913530824, "grad_norm": 0.9436691135229323, "learning_rate": 9.8950781657001e-07, "loss": 1.5473, "step": 6456 }, { "epoch": 0.8616226314384842, "grad_norm": 0.9338614269793593, "learning_rate": 9.876341751879404e-07, "loss": 1.5505, "step": 6457 }, { "epoch": 0.8617560715238858, "grad_norm": 0.9397414325734582, "learning_rate": 9.857622172131952e-07, "loss": 1.5642, "step": 6458 }, { "epoch": 0.8618895116092874, "grad_norm": 1.0086274613066035, "learning_rate": 9.838919429954386e-07, "loss": 1.5355, "step": 6459 }, { "epoch": 0.8620229516946891, "grad_norm": 0.9481076250398918, "learning_rate": 9.820233528840151e-07, "loss": 1.5557, "step": 6460 }, { "epoch": 0.8621563917800907, "grad_norm": 0.9155281021750805, "learning_rate": 9.801564472279557e-07, "loss": 1.4963, "step": 6461 }, { "epoch": 0.8622898318654924, "grad_norm": 0.9144924099874533, "learning_rate": 9.782912263759748e-07, "loss": 1.5214, "step": 6462 }, { "epoch": 0.8624232719508941, "grad_norm": 0.9363225569423211, "learning_rate": 9.764276906764792e-07, "loss": 1.5297, "step": 6463 }, { "epoch": 0.8625567120362957, "grad_norm": 0.9435473694327043, "learning_rate": 9.745658404775537e-07, "loss": 1.5394, "step": 6464 }, { "epoch": 0.8626901521216973, "grad_norm": 0.9985762329589031, "learning_rate": 9.727056761269693e-07, "loss": 1.5468, "step": 6465 }, { "epoch": 0.862823592207099, "grad_norm": 0.9290866980377026, "learning_rate": 9.708471979721868e-07, "loss": 1.5702, "step": 6466 }, { "epoch": 0.8629570322925006, "grad_norm": 1.1072478162840556, "learning_rate": 9.689904063603461e-07, "loss": 1.582, "step": 6467 }, { "epoch": 0.8630904723779024, "grad_norm": 0.9126478135993858, "learning_rate": 9.671353016382767e-07, "loss": 1.5269, "step": 6468 }, { "epoch": 0.863223912463304, "grad_norm": 0.9443342912207419, "learning_rate": 9.65281884152487e-07, "loss": 1.5555, "step": 6469 }, { "epoch": 0.8633573525487056, "grad_norm": 0.9209254675884877, "learning_rate": 9.634301542491798e-07, "loss": 1.5324, "step": 6470 }, { "epoch": 0.8634907926341073, "grad_norm": 0.9549939583445304, "learning_rate": 9.61580112274234e-07, "loss": 1.5308, "step": 6471 }, { "epoch": 0.8636242327195089, "grad_norm": 0.9770641027377173, "learning_rate": 9.59731758573217e-07, "loss": 1.544, "step": 6472 }, { "epoch": 0.8637576728049106, "grad_norm": 0.8817507518794163, "learning_rate": 9.578850934913786e-07, "loss": 1.5241, "step": 6473 }, { "epoch": 0.8638911128903123, "grad_norm": 0.9128330688258248, "learning_rate": 9.560401173736588e-07, "loss": 1.5328, "step": 6474 }, { "epoch": 0.8640245529757139, "grad_norm": 0.9271171282021506, "learning_rate": 9.541968305646754e-07, "loss": 1.5089, "step": 6475 }, { "epoch": 0.8641579930611155, "grad_norm": 0.898516087846143, "learning_rate": 9.523552334087316e-07, "loss": 1.4933, "step": 6476 }, { "epoch": 0.8642914331465172, "grad_norm": 0.9455820905578254, "learning_rate": 9.505153262498201e-07, "loss": 1.5412, "step": 6477 }, { "epoch": 0.8644248732319189, "grad_norm": 0.9769692174530545, "learning_rate": 9.486771094316149e-07, "loss": 1.5058, "step": 6478 }, { "epoch": 0.8645583133173205, "grad_norm": 0.9225921262405403, "learning_rate": 9.468405832974714e-07, "loss": 1.4898, "step": 6479 }, { "epoch": 0.8646917534027222, "grad_norm": 0.9336861605888874, "learning_rate": 9.450057481904306e-07, "loss": 1.5869, "step": 6480 }, { "epoch": 0.8648251934881238, "grad_norm": 1.1283612150349465, "learning_rate": 9.431726044532241e-07, "loss": 1.5553, "step": 6481 }, { "epoch": 0.8649586335735255, "grad_norm": 0.9124111539042673, "learning_rate": 9.41341152428259e-07, "loss": 1.5299, "step": 6482 }, { "epoch": 0.8650920736589272, "grad_norm": 0.9550521448834836, "learning_rate": 9.395113924576271e-07, "loss": 1.549, "step": 6483 }, { "epoch": 0.8652255137443288, "grad_norm": 0.9872576025252855, "learning_rate": 9.376833248831119e-07, "loss": 1.5379, "step": 6484 }, { "epoch": 0.8653589538297305, "grad_norm": 0.9222828401540125, "learning_rate": 9.358569500461734e-07, "loss": 1.5166, "step": 6485 }, { "epoch": 0.8654923939151321, "grad_norm": 1.0447537425480853, "learning_rate": 9.340322682879577e-07, "loss": 1.5124, "step": 6486 }, { "epoch": 0.8656258340005337, "grad_norm": 0.9039392609995586, "learning_rate": 9.322092799492921e-07, "loss": 1.5092, "step": 6487 }, { "epoch": 0.8657592740859354, "grad_norm": 0.9498842193910904, "learning_rate": 9.303879853706955e-07, "loss": 1.5568, "step": 6488 }, { "epoch": 0.8658927141713371, "grad_norm": 0.9356401186158558, "learning_rate": 9.285683848923599e-07, "loss": 1.5631, "step": 6489 }, { "epoch": 0.8660261542567387, "grad_norm": 0.9816096213234118, "learning_rate": 9.26750478854167e-07, "loss": 1.5634, "step": 6490 }, { "epoch": 0.8661595943421404, "grad_norm": 0.9902314871977751, "learning_rate": 9.249342675956841e-07, "loss": 1.5368, "step": 6491 }, { "epoch": 0.866293034427542, "grad_norm": 0.913769147225799, "learning_rate": 9.231197514561552e-07, "loss": 1.494, "step": 6492 }, { "epoch": 0.8664264745129436, "grad_norm": 0.9901239051487305, "learning_rate": 9.213069307745137e-07, "loss": 1.529, "step": 6493 }, { "epoch": 0.8665599145983454, "grad_norm": 0.9186278024861386, "learning_rate": 9.194958058893722e-07, "loss": 1.5525, "step": 6494 }, { "epoch": 0.866693354683747, "grad_norm": 0.9601791869003774, "learning_rate": 9.176863771390288e-07, "loss": 1.5595, "step": 6495 }, { "epoch": 0.8668267947691487, "grad_norm": 0.9344491574122844, "learning_rate": 9.158786448614621e-07, "loss": 1.5765, "step": 6496 }, { "epoch": 0.8669602348545503, "grad_norm": 1.0059039142212696, "learning_rate": 9.140726093943409e-07, "loss": 1.5543, "step": 6497 }, { "epoch": 0.8670936749399519, "grad_norm": 1.1316836388517142, "learning_rate": 9.122682710750074e-07, "loss": 1.5017, "step": 6498 }, { "epoch": 0.8672271150253537, "grad_norm": 0.9402893670787278, "learning_rate": 9.104656302404946e-07, "loss": 1.5371, "step": 6499 }, { "epoch": 0.8673605551107553, "grad_norm": 0.9832968180647175, "learning_rate": 9.086646872275129e-07, "loss": 1.5387, "step": 6500 }, { "epoch": 0.8674939951961569, "grad_norm": 0.9514391898183131, "learning_rate": 9.068654423724577e-07, "loss": 1.5823, "step": 6501 }, { "epoch": 0.8676274352815586, "grad_norm": 0.9488421342873816, "learning_rate": 9.050678960114101e-07, "loss": 1.5656, "step": 6502 }, { "epoch": 0.8677608753669602, "grad_norm": 0.9115733795737984, "learning_rate": 9.032720484801294e-07, "loss": 1.5759, "step": 6503 }, { "epoch": 0.8678943154523618, "grad_norm": 0.9981354349705964, "learning_rate": 9.014779001140606e-07, "loss": 1.574, "step": 6504 }, { "epoch": 0.8680277555377636, "grad_norm": 0.9520562761893402, "learning_rate": 8.996854512483277e-07, "loss": 1.6245, "step": 6505 }, { "epoch": 0.8681611956231652, "grad_norm": 1.4726974542567874, "learning_rate": 8.978947022177431e-07, "loss": 1.525, "step": 6506 }, { "epoch": 0.8682946357085669, "grad_norm": 1.184012125670187, "learning_rate": 8.961056533567969e-07, "loss": 1.5583, "step": 6507 }, { "epoch": 0.8684280757939685, "grad_norm": 0.9351096655055477, "learning_rate": 8.943183049996606e-07, "loss": 1.6061, "step": 6508 }, { "epoch": 0.8685615158793701, "grad_norm": 0.9305843754964107, "learning_rate": 8.925326574801952e-07, "loss": 1.5381, "step": 6509 }, { "epoch": 0.8686949559647719, "grad_norm": 1.3119179296076262, "learning_rate": 8.907487111319368e-07, "loss": 1.5312, "step": 6510 }, { "epoch": 0.8688283960501735, "grad_norm": 0.9409917789541443, "learning_rate": 8.889664662881059e-07, "loss": 1.5381, "step": 6511 }, { "epoch": 0.8689618361355751, "grad_norm": 0.9287019422318122, "learning_rate": 8.871859232816049e-07, "loss": 1.5654, "step": 6512 }, { "epoch": 0.8690952762209768, "grad_norm": 0.9231687758110173, "learning_rate": 8.854070824450223e-07, "loss": 1.6082, "step": 6513 }, { "epoch": 0.8692287163063784, "grad_norm": 0.9338271404408033, "learning_rate": 8.836299441106222e-07, "loss": 1.5037, "step": 6514 }, { "epoch": 0.86936215639178, "grad_norm": 0.9727417740878118, "learning_rate": 8.818545086103536e-07, "loss": 1.5739, "step": 6515 }, { "epoch": 0.8694955964771818, "grad_norm": 0.95148914834048, "learning_rate": 8.800807762758501e-07, "loss": 1.5617, "step": 6516 }, { "epoch": 0.8696290365625834, "grad_norm": 0.9361374474665792, "learning_rate": 8.783087474384245e-07, "loss": 1.5317, "step": 6517 }, { "epoch": 0.869762476647985, "grad_norm": 0.9353510556127035, "learning_rate": 8.765384224290697e-07, "loss": 1.5276, "step": 6518 }, { "epoch": 0.8698959167333867, "grad_norm": 0.9385834806918207, "learning_rate": 8.747698015784612e-07, "loss": 1.5294, "step": 6519 }, { "epoch": 0.8700293568187883, "grad_norm": 0.97192379174236, "learning_rate": 8.730028852169614e-07, "loss": 1.516, "step": 6520 }, { "epoch": 0.8701627969041901, "grad_norm": 0.9442829507665126, "learning_rate": 8.712376736746075e-07, "loss": 1.4664, "step": 6521 }, { "epoch": 0.8702962369895917, "grad_norm": 1.0520474911116193, "learning_rate": 8.694741672811191e-07, "loss": 1.5211, "step": 6522 }, { "epoch": 0.8704296770749933, "grad_norm": 0.9648717939397788, "learning_rate": 8.677123663659038e-07, "loss": 1.5513, "step": 6523 }, { "epoch": 0.870563117160395, "grad_norm": 0.968648455882469, "learning_rate": 8.659522712580437e-07, "loss": 1.5971, "step": 6524 }, { "epoch": 0.8706965572457966, "grad_norm": 0.9731547427610653, "learning_rate": 8.641938822863039e-07, "loss": 1.5163, "step": 6525 }, { "epoch": 0.8708299973311983, "grad_norm": 1.3042383135891946, "learning_rate": 8.624371997791292e-07, "loss": 1.5791, "step": 6526 }, { "epoch": 0.8709634374166, "grad_norm": 0.9354023468065126, "learning_rate": 8.60682224064654e-07, "loss": 1.5338, "step": 6527 }, { "epoch": 0.8710968775020016, "grad_norm": 1.0952681463977836, "learning_rate": 8.589289554706826e-07, "loss": 1.4912, "step": 6528 }, { "epoch": 0.8712303175874032, "grad_norm": 0.9461007651182953, "learning_rate": 8.571773943247063e-07, "loss": 1.5812, "step": 6529 }, { "epoch": 0.8713637576728049, "grad_norm": 0.9328438835651939, "learning_rate": 8.554275409539004e-07, "loss": 1.5688, "step": 6530 }, { "epoch": 0.8714971977582066, "grad_norm": 0.9452776089490901, "learning_rate": 8.536793956851141e-07, "loss": 1.519, "step": 6531 }, { "epoch": 0.8716306378436082, "grad_norm": 0.9314474567474438, "learning_rate": 8.519329588448822e-07, "loss": 1.487, "step": 6532 }, { "epoch": 0.8717640779290099, "grad_norm": 0.9203268868498949, "learning_rate": 8.501882307594167e-07, "loss": 1.5422, "step": 6533 }, { "epoch": 0.8718975180144115, "grad_norm": 0.9548253170186739, "learning_rate": 8.484452117546171e-07, "loss": 1.5782, "step": 6534 }, { "epoch": 0.8720309580998132, "grad_norm": 0.9327588816923488, "learning_rate": 8.467039021560575e-07, "loss": 1.5449, "step": 6535 }, { "epoch": 0.8721643981852149, "grad_norm": 1.116947865086207, "learning_rate": 8.449643022889953e-07, "loss": 1.5331, "step": 6536 }, { "epoch": 0.8722978382706165, "grad_norm": 0.9154028744010556, "learning_rate": 8.432264124783662e-07, "loss": 1.5358, "step": 6537 }, { "epoch": 0.8724312783560182, "grad_norm": 0.9584163843597435, "learning_rate": 8.414902330487906e-07, "loss": 1.5348, "step": 6538 }, { "epoch": 0.8725647184414198, "grad_norm": 0.96915727974722, "learning_rate": 8.397557643245646e-07, "loss": 1.5153, "step": 6539 }, { "epoch": 0.8726981585268214, "grad_norm": 0.9454548862508139, "learning_rate": 8.38023006629669e-07, "loss": 1.5727, "step": 6540 }, { "epoch": 0.8728315986122231, "grad_norm": 0.9381925491031865, "learning_rate": 8.36291960287764e-07, "loss": 1.5877, "step": 6541 }, { "epoch": 0.8729650386976248, "grad_norm": 0.9475264001662446, "learning_rate": 8.345626256221873e-07, "loss": 1.5583, "step": 6542 }, { "epoch": 0.8730984787830264, "grad_norm": 0.9063919928958889, "learning_rate": 8.328350029559595e-07, "loss": 1.5421, "step": 6543 }, { "epoch": 0.8732319188684281, "grad_norm": 1.162151324167172, "learning_rate": 8.311090926117793e-07, "loss": 1.5868, "step": 6544 }, { "epoch": 0.8733653589538297, "grad_norm": 0.9586262421769765, "learning_rate": 8.293848949120309e-07, "loss": 1.5314, "step": 6545 }, { "epoch": 0.8734987990392313, "grad_norm": 0.9299431509946539, "learning_rate": 8.276624101787733e-07, "loss": 1.4904, "step": 6546 }, { "epoch": 0.8736322391246331, "grad_norm": 1.1008446800425085, "learning_rate": 8.259416387337437e-07, "loss": 1.5502, "step": 6547 }, { "epoch": 0.8737656792100347, "grad_norm": 0.9407230063487958, "learning_rate": 8.242225808983684e-07, "loss": 1.5496, "step": 6548 }, { "epoch": 0.8738991192954364, "grad_norm": 0.9950896376938949, "learning_rate": 8.225052369937436e-07, "loss": 1.569, "step": 6549 }, { "epoch": 0.874032559380838, "grad_norm": 0.9343428376024484, "learning_rate": 8.207896073406518e-07, "loss": 1.5303, "step": 6550 }, { "epoch": 0.8741659994662396, "grad_norm": 0.9213894936942306, "learning_rate": 8.1907569225955e-07, "loss": 1.5117, "step": 6551 }, { "epoch": 0.8742994395516414, "grad_norm": 1.0003551884905675, "learning_rate": 8.17363492070582e-07, "loss": 1.5317, "step": 6552 }, { "epoch": 0.874432879637043, "grad_norm": 0.952551223522479, "learning_rate": 8.156530070935654e-07, "loss": 1.5314, "step": 6553 }, { "epoch": 0.8745663197224446, "grad_norm": 0.9370104185043665, "learning_rate": 8.139442376479967e-07, "loss": 1.5173, "step": 6554 }, { "epoch": 0.8746997598078463, "grad_norm": 1.0396921232304424, "learning_rate": 8.122371840530597e-07, "loss": 1.5413, "step": 6555 }, { "epoch": 0.8748331998932479, "grad_norm": 0.9443695541062251, "learning_rate": 8.105318466276102e-07, "loss": 1.5138, "step": 6556 }, { "epoch": 0.8749666399786495, "grad_norm": 0.9529659601953869, "learning_rate": 8.088282256901858e-07, "loss": 1.5592, "step": 6557 }, { "epoch": 0.8751000800640513, "grad_norm": 0.9345505185968372, "learning_rate": 8.07126321559002e-07, "loss": 1.5475, "step": 6558 }, { "epoch": 0.8752335201494529, "grad_norm": 0.9949322978905206, "learning_rate": 8.054261345519576e-07, "loss": 1.5437, "step": 6559 }, { "epoch": 0.8753669602348545, "grad_norm": 1.3528014835986673, "learning_rate": 8.037276649866277e-07, "loss": 1.5508, "step": 6560 }, { "epoch": 0.8755004003202562, "grad_norm": 0.9126381930458377, "learning_rate": 8.02030913180264e-07, "loss": 1.5883, "step": 6561 }, { "epoch": 0.8756338404056578, "grad_norm": 0.9059415684652883, "learning_rate": 8.003358794498051e-07, "loss": 1.4654, "step": 6562 }, { "epoch": 0.8757672804910596, "grad_norm": 0.9317951935750919, "learning_rate": 7.986425641118612e-07, "loss": 1.526, "step": 6563 }, { "epoch": 0.8759007205764612, "grad_norm": 1.2757830287361367, "learning_rate": 7.969509674827258e-07, "loss": 1.5593, "step": 6564 }, { "epoch": 0.8760341606618628, "grad_norm": 0.9421033561329558, "learning_rate": 7.952610898783675e-07, "loss": 1.5101, "step": 6565 }, { "epoch": 0.8761676007472645, "grad_norm": 0.9277438199353042, "learning_rate": 7.935729316144381e-07, "loss": 1.5095, "step": 6566 }, { "epoch": 0.8763010408326661, "grad_norm": 0.9336579625689782, "learning_rate": 7.918864930062675e-07, "loss": 1.5145, "step": 6567 }, { "epoch": 0.8764344809180677, "grad_norm": 0.9777046753987705, "learning_rate": 7.902017743688606e-07, "loss": 1.5592, "step": 6568 }, { "epoch": 0.8765679210034695, "grad_norm": 0.9684594181447197, "learning_rate": 7.885187760169033e-07, "loss": 1.572, "step": 6569 }, { "epoch": 0.8767013610888711, "grad_norm": 1.267214799587763, "learning_rate": 7.868374982647642e-07, "loss": 1.5555, "step": 6570 }, { "epoch": 0.8768348011742727, "grad_norm": 0.9419267296602276, "learning_rate": 7.851579414264843e-07, "loss": 1.5705, "step": 6571 }, { "epoch": 0.8769682412596744, "grad_norm": 0.9304158422245021, "learning_rate": 7.834801058157837e-07, "loss": 1.5284, "step": 6572 }, { "epoch": 0.877101681345076, "grad_norm": 0.9745745483632863, "learning_rate": 7.818039917460674e-07, "loss": 1.512, "step": 6573 }, { "epoch": 0.8772351214304777, "grad_norm": 0.9525590369369205, "learning_rate": 7.801295995304125e-07, "loss": 1.532, "step": 6574 }, { "epoch": 0.8773685615158794, "grad_norm": 0.9198789574840573, "learning_rate": 7.78456929481577e-07, "loss": 1.516, "step": 6575 }, { "epoch": 0.877502001601281, "grad_norm": 0.901804075657504, "learning_rate": 7.767859819119927e-07, "loss": 1.5354, "step": 6576 }, { "epoch": 0.8776354416866827, "grad_norm": 0.9437717919984645, "learning_rate": 7.751167571337792e-07, "loss": 1.5562, "step": 6577 }, { "epoch": 0.8777688817720843, "grad_norm": 0.901156691623584, "learning_rate": 7.734492554587269e-07, "loss": 1.566, "step": 6578 }, { "epoch": 0.877902321857486, "grad_norm": 0.9507122398403107, "learning_rate": 7.717834771983046e-07, "loss": 1.5809, "step": 6579 }, { "epoch": 0.8780357619428877, "grad_norm": 0.9154819364254669, "learning_rate": 7.70119422663661e-07, "loss": 1.5289, "step": 6580 }, { "epoch": 0.8781692020282893, "grad_norm": 0.9235692837642071, "learning_rate": 7.684570921656231e-07, "loss": 1.5468, "step": 6581 }, { "epoch": 0.8783026421136909, "grad_norm": 0.9431590240253189, "learning_rate": 7.667964860146959e-07, "loss": 1.5715, "step": 6582 }, { "epoch": 0.8784360821990926, "grad_norm": 0.9841453617835374, "learning_rate": 7.651376045210612e-07, "loss": 1.5703, "step": 6583 }, { "epoch": 0.8785695222844943, "grad_norm": 0.9280133482016604, "learning_rate": 7.63480447994579e-07, "loss": 1.5325, "step": 6584 }, { "epoch": 0.8787029623698959, "grad_norm": 0.9042299305841511, "learning_rate": 7.618250167447871e-07, "loss": 1.5151, "step": 6585 }, { "epoch": 0.8788364024552976, "grad_norm": 0.9303954686816721, "learning_rate": 7.601713110809006e-07, "loss": 1.5533, "step": 6586 }, { "epoch": 0.8789698425406992, "grad_norm": 0.9450693162515615, "learning_rate": 7.585193313118155e-07, "loss": 1.5242, "step": 6587 }, { "epoch": 0.8791032826261009, "grad_norm": 0.9224963862036915, "learning_rate": 7.568690777461008e-07, "loss": 1.5231, "step": 6588 }, { "epoch": 0.8792367227115025, "grad_norm": 1.051976485103453, "learning_rate": 7.552205506920052e-07, "loss": 1.5829, "step": 6589 }, { "epoch": 0.8793701627969042, "grad_norm": 0.9433620420881246, "learning_rate": 7.535737504574536e-07, "loss": 1.4849, "step": 6590 }, { "epoch": 0.8795036028823059, "grad_norm": 1.0769771830246422, "learning_rate": 7.519286773500522e-07, "loss": 1.5769, "step": 6591 }, { "epoch": 0.8796370429677075, "grad_norm": 0.9338463460938362, "learning_rate": 7.502853316770808e-07, "loss": 1.5084, "step": 6592 }, { "epoch": 0.8797704830531091, "grad_norm": 0.9513071114461021, "learning_rate": 7.48643713745496e-07, "loss": 1.4993, "step": 6593 }, { "epoch": 0.8799039231385108, "grad_norm": 0.9736174380569763, "learning_rate": 7.47003823861936e-07, "loss": 1.5405, "step": 6594 }, { "epoch": 0.8800373632239125, "grad_norm": 0.9381351170469457, "learning_rate": 7.453656623327132e-07, "loss": 1.5456, "step": 6595 }, { "epoch": 0.8801708033093141, "grad_norm": 0.9345529815258716, "learning_rate": 7.437292294638155e-07, "loss": 1.509, "step": 6596 }, { "epoch": 0.8803042433947158, "grad_norm": 0.9678042966662639, "learning_rate": 7.420945255609102e-07, "loss": 1.4996, "step": 6597 }, { "epoch": 0.8804376834801174, "grad_norm": 0.9243133790343941, "learning_rate": 7.404615509293444e-07, "loss": 1.5459, "step": 6598 }, { "epoch": 0.880571123565519, "grad_norm": 0.9421650715900654, "learning_rate": 7.388303058741364e-07, "loss": 1.5101, "step": 6599 }, { "epoch": 0.8807045636509208, "grad_norm": 0.9521800237087055, "learning_rate": 7.372007906999856e-07, "loss": 1.56, "step": 6600 }, { "epoch": 0.8808380037363224, "grad_norm": 0.9828304259855544, "learning_rate": 7.355730057112643e-07, "loss": 1.5758, "step": 6601 }, { "epoch": 0.8809714438217241, "grad_norm": 0.9439889966484398, "learning_rate": 7.339469512120268e-07, "loss": 1.5744, "step": 6602 }, { "epoch": 0.8811048839071257, "grad_norm": 0.9616630127654491, "learning_rate": 7.323226275060014e-07, "loss": 1.5572, "step": 6603 }, { "epoch": 0.8812383239925273, "grad_norm": 0.9218601721204037, "learning_rate": 7.307000348965909e-07, "loss": 1.4996, "step": 6604 }, { "epoch": 0.881371764077929, "grad_norm": 1.0701166098993966, "learning_rate": 7.290791736868819e-07, "loss": 1.5297, "step": 6605 }, { "epoch": 0.8815052041633307, "grad_norm": 0.9708535489021737, "learning_rate": 7.274600441796287e-07, "loss": 1.519, "step": 6606 }, { "epoch": 0.8816386442487323, "grad_norm": 0.9728154176736694, "learning_rate": 7.258426466772672e-07, "loss": 1.5832, "step": 6607 }, { "epoch": 0.881772084334134, "grad_norm": 0.9383164976637655, "learning_rate": 7.242269814819081e-07, "loss": 1.5461, "step": 6608 }, { "epoch": 0.8819055244195356, "grad_norm": 0.9213707626663843, "learning_rate": 7.226130488953409e-07, "loss": 1.5343, "step": 6609 }, { "epoch": 0.8820389645049372, "grad_norm": 0.9620469927915996, "learning_rate": 7.210008492190301e-07, "loss": 1.5522, "step": 6610 }, { "epoch": 0.882172404590339, "grad_norm": 0.9341015762591438, "learning_rate": 7.193903827541127e-07, "loss": 1.4715, "step": 6611 }, { "epoch": 0.8823058446757406, "grad_norm": 0.9956857458374051, "learning_rate": 7.177816498014101e-07, "loss": 1.5903, "step": 6612 }, { "epoch": 0.8824392847611422, "grad_norm": 0.9857230476852422, "learning_rate": 7.161746506614142e-07, "loss": 1.5852, "step": 6613 }, { "epoch": 0.8825727248465439, "grad_norm": 0.9343322357829968, "learning_rate": 7.145693856342928e-07, "loss": 1.5051, "step": 6614 }, { "epoch": 0.8827061649319455, "grad_norm": 0.9231910471570226, "learning_rate": 7.129658550198892e-07, "loss": 1.5404, "step": 6615 }, { "epoch": 0.8828396050173473, "grad_norm": 0.9422235323401871, "learning_rate": 7.113640591177296e-07, "loss": 1.5096, "step": 6616 }, { "epoch": 0.8829730451027489, "grad_norm": 0.9268050292928415, "learning_rate": 7.097639982270077e-07, "loss": 1.5613, "step": 6617 }, { "epoch": 0.8831064851881505, "grad_norm": 1.0117584913067665, "learning_rate": 7.081656726465968e-07, "loss": 1.5219, "step": 6618 }, { "epoch": 0.8832399252735522, "grad_norm": 1.0082915428918369, "learning_rate": 7.065690826750482e-07, "loss": 1.564, "step": 6619 }, { "epoch": 0.8833733653589538, "grad_norm": 0.9226919224076718, "learning_rate": 7.049742286105843e-07, "loss": 1.4563, "step": 6620 }, { "epoch": 0.8835068054443554, "grad_norm": 0.9235669483501304, "learning_rate": 7.033811107511079e-07, "loss": 1.5549, "step": 6621 }, { "epoch": 0.8836402455297572, "grad_norm": 0.9675042520116867, "learning_rate": 7.017897293941934e-07, "loss": 1.5154, "step": 6622 }, { "epoch": 0.8837736856151588, "grad_norm": 0.9532787577491273, "learning_rate": 7.002000848370938e-07, "loss": 1.5967, "step": 6623 }, { "epoch": 0.8839071257005604, "grad_norm": 0.9172226848899977, "learning_rate": 6.98612177376734e-07, "loss": 1.5259, "step": 6624 }, { "epoch": 0.8840405657859621, "grad_norm": 0.9383987828753451, "learning_rate": 6.970260073097213e-07, "loss": 1.5119, "step": 6625 }, { "epoch": 0.8841740058713637, "grad_norm": 1.2105279069704957, "learning_rate": 6.954415749323318e-07, "loss": 1.5682, "step": 6626 }, { "epoch": 0.8843074459567654, "grad_norm": 0.9799759626641332, "learning_rate": 6.9385888054052e-07, "loss": 1.631, "step": 6627 }, { "epoch": 0.8844408860421671, "grad_norm": 0.9054994550461375, "learning_rate": 6.92277924429915e-07, "loss": 1.4996, "step": 6628 }, { "epoch": 0.8845743261275687, "grad_norm": 0.9271188131214871, "learning_rate": 6.906987068958193e-07, "loss": 1.6082, "step": 6629 }, { "epoch": 0.8847077662129704, "grad_norm": 0.9259197742858476, "learning_rate": 6.89121228233216e-07, "loss": 1.5372, "step": 6630 }, { "epoch": 0.884841206298372, "grad_norm": 0.9170649924609936, "learning_rate": 6.875454887367605e-07, "loss": 1.5452, "step": 6631 }, { "epoch": 0.8849746463837737, "grad_norm": 1.2210390039887022, "learning_rate": 6.859714887007796e-07, "loss": 1.5014, "step": 6632 }, { "epoch": 0.8851080864691754, "grad_norm": 0.9534407665379254, "learning_rate": 6.843992284192802e-07, "loss": 1.5863, "step": 6633 }, { "epoch": 0.885241526554577, "grad_norm": 0.961514819564435, "learning_rate": 6.82828708185943e-07, "loss": 1.5202, "step": 6634 }, { "epoch": 0.8853749666399786, "grad_norm": 0.9797343158237848, "learning_rate": 6.812599282941246e-07, "loss": 1.5998, "step": 6635 }, { "epoch": 0.8855084067253803, "grad_norm": 0.9254630062159546, "learning_rate": 6.796928890368515e-07, "loss": 1.5759, "step": 6636 }, { "epoch": 0.885641846810782, "grad_norm": 0.921610927618888, "learning_rate": 6.781275907068329e-07, "loss": 1.5506, "step": 6637 }, { "epoch": 0.8857752868961836, "grad_norm": 1.1965720258668404, "learning_rate": 6.765640335964463e-07, "loss": 1.4921, "step": 6638 }, { "epoch": 0.8859087269815853, "grad_norm": 0.9199991742942989, "learning_rate": 6.750022179977467e-07, "loss": 1.5276, "step": 6639 }, { "epoch": 0.8860421670669869, "grad_norm": 0.9414233305947609, "learning_rate": 6.73442144202463e-07, "loss": 1.5265, "step": 6640 }, { "epoch": 0.8861756071523885, "grad_norm": 0.9303782628792104, "learning_rate": 6.718838125020011e-07, "loss": 1.4829, "step": 6641 }, { "epoch": 0.8863090472377902, "grad_norm": 1.244221234348905, "learning_rate": 6.703272231874391e-07, "loss": 1.5792, "step": 6642 }, { "epoch": 0.8864424873231919, "grad_norm": 0.9051064947661962, "learning_rate": 6.687723765495268e-07, "loss": 1.558, "step": 6643 }, { "epoch": 0.8865759274085936, "grad_norm": 1.0602549890874733, "learning_rate": 6.672192728786964e-07, "loss": 1.5136, "step": 6644 }, { "epoch": 0.8867093674939952, "grad_norm": 1.015376492992175, "learning_rate": 6.656679124650489e-07, "loss": 1.526, "step": 6645 }, { "epoch": 0.8868428075793968, "grad_norm": 0.9763385623831438, "learning_rate": 6.641182955983594e-07, "loss": 1.5627, "step": 6646 }, { "epoch": 0.8869762476647985, "grad_norm": 0.9484361059315187, "learning_rate": 6.625704225680773e-07, "loss": 1.5681, "step": 6647 }, { "epoch": 0.8871096877502002, "grad_norm": 1.1736786559443448, "learning_rate": 6.610242936633315e-07, "loss": 1.52, "step": 6648 }, { "epoch": 0.8872431278356018, "grad_norm": 1.0652253170073327, "learning_rate": 6.594799091729198e-07, "loss": 1.5487, "step": 6649 }, { "epoch": 0.8873765679210035, "grad_norm": 0.9130108805332712, "learning_rate": 6.579372693853137e-07, "loss": 1.5838, "step": 6650 }, { "epoch": 0.8875100080064051, "grad_norm": 0.9461766707449843, "learning_rate": 6.563963745886637e-07, "loss": 1.5557, "step": 6651 }, { "epoch": 0.8876434480918067, "grad_norm": 0.9345577948179379, "learning_rate": 6.548572250707896e-07, "loss": 1.5397, "step": 6652 }, { "epoch": 0.8877768881772085, "grad_norm": 0.9306784187277369, "learning_rate": 6.533198211191871e-07, "loss": 1.5641, "step": 6653 }, { "epoch": 0.8879103282626101, "grad_norm": 0.9904047827430623, "learning_rate": 6.517841630210254e-07, "loss": 1.5339, "step": 6654 }, { "epoch": 0.8880437683480118, "grad_norm": 0.9847791868831337, "learning_rate": 6.502502510631503e-07, "loss": 1.5608, "step": 6655 }, { "epoch": 0.8881772084334134, "grad_norm": 1.2239512456758879, "learning_rate": 6.487180855320762e-07, "loss": 1.5556, "step": 6656 }, { "epoch": 0.888310648518815, "grad_norm": 0.9473098956072885, "learning_rate": 6.471876667139954e-07, "loss": 1.5671, "step": 6657 }, { "epoch": 0.8884440886042168, "grad_norm": 0.9150781295209052, "learning_rate": 6.456589948947733e-07, "loss": 1.4855, "step": 6658 }, { "epoch": 0.8885775286896184, "grad_norm": 0.9262691372999762, "learning_rate": 6.441320703599474e-07, "loss": 1.5296, "step": 6659 }, { "epoch": 0.88871096877502, "grad_norm": 1.2521772098087953, "learning_rate": 6.426068933947305e-07, "loss": 1.5772, "step": 6660 }, { "epoch": 0.8888444088604217, "grad_norm": 0.9718277083295738, "learning_rate": 6.410834642840069e-07, "loss": 1.5483, "step": 6661 }, { "epoch": 0.8889778489458233, "grad_norm": 0.9249506579829485, "learning_rate": 6.395617833123379e-07, "loss": 1.5262, "step": 6662 }, { "epoch": 0.8891112890312249, "grad_norm": 0.991283938492636, "learning_rate": 6.380418507639563e-07, "loss": 1.5752, "step": 6663 }, { "epoch": 0.8892447291166267, "grad_norm": 1.145717875907442, "learning_rate": 6.365236669227659e-07, "loss": 1.5203, "step": 6664 }, { "epoch": 0.8893781692020283, "grad_norm": 0.9459042028552311, "learning_rate": 6.350072320723477e-07, "loss": 1.5782, "step": 6665 }, { "epoch": 0.8895116092874299, "grad_norm": 0.951409036273284, "learning_rate": 6.33492546495954e-07, "loss": 1.5228, "step": 6666 }, { "epoch": 0.8896450493728316, "grad_norm": 0.9376247682782157, "learning_rate": 6.319796104765097e-07, "loss": 1.5482, "step": 6667 }, { "epoch": 0.8897784894582332, "grad_norm": 0.9343467840146905, "learning_rate": 6.304684242966164e-07, "loss": 1.5819, "step": 6668 }, { "epoch": 0.889911929543635, "grad_norm": 0.9677052840154801, "learning_rate": 6.289589882385461e-07, "loss": 1.5253, "step": 6669 }, { "epoch": 0.8900453696290366, "grad_norm": 0.9467119490943833, "learning_rate": 6.274513025842421e-07, "loss": 1.5582, "step": 6670 }, { "epoch": 0.8901788097144382, "grad_norm": 0.9311722512072491, "learning_rate": 6.25945367615326e-07, "loss": 1.5084, "step": 6671 }, { "epoch": 0.8903122497998399, "grad_norm": 1.1535295894188413, "learning_rate": 6.244411836130848e-07, "loss": 1.5288, "step": 6672 }, { "epoch": 0.8904456898852415, "grad_norm": 0.9398613520690328, "learning_rate": 6.229387508584872e-07, "loss": 1.541, "step": 6673 }, { "epoch": 0.8905791299706431, "grad_norm": 0.9460664090823528, "learning_rate": 6.214380696321698e-07, "loss": 1.5259, "step": 6674 }, { "epoch": 0.8907125700560449, "grad_norm": 0.9192830875852988, "learning_rate": 6.199391402144406e-07, "loss": 1.5079, "step": 6675 }, { "epoch": 0.8908460101414465, "grad_norm": 0.9624182903854707, "learning_rate": 6.184419628852845e-07, "loss": 1.514, "step": 6676 }, { "epoch": 0.8909794502268481, "grad_norm": 0.9567443782327006, "learning_rate": 6.169465379243578e-07, "loss": 1.5349, "step": 6677 }, { "epoch": 0.8911128903122498, "grad_norm": 1.173850544587069, "learning_rate": 6.154528656109871e-07, "loss": 1.5518, "step": 6678 }, { "epoch": 0.8912463303976514, "grad_norm": 0.9397559763327206, "learning_rate": 6.139609462241724e-07, "loss": 1.524, "step": 6679 }, { "epoch": 0.8913797704830531, "grad_norm": 0.9739117546526423, "learning_rate": 6.124707800425911e-07, "loss": 1.5663, "step": 6680 }, { "epoch": 0.8915132105684548, "grad_norm": 0.9526569594291362, "learning_rate": 6.109823673445869e-07, "loss": 1.5959, "step": 6681 }, { "epoch": 0.8916466506538564, "grad_norm": 1.2208803452356254, "learning_rate": 6.094957084081765e-07, "loss": 1.527, "step": 6682 }, { "epoch": 0.8917800907392581, "grad_norm": 0.9533030946813461, "learning_rate": 6.080108035110543e-07, "loss": 1.5158, "step": 6683 }, { "epoch": 0.8919135308246597, "grad_norm": 0.9373914483177651, "learning_rate": 6.06527652930583e-07, "loss": 1.5035, "step": 6684 }, { "epoch": 0.8920469709100614, "grad_norm": 0.9206005814229758, "learning_rate": 6.050462569437965e-07, "loss": 1.5194, "step": 6685 }, { "epoch": 0.8921804109954631, "grad_norm": 0.9411608802351985, "learning_rate": 6.035666158274034e-07, "loss": 1.5614, "step": 6686 }, { "epoch": 0.8923138510808647, "grad_norm": 0.9549233585076925, "learning_rate": 6.020887298577838e-07, "loss": 1.524, "step": 6687 }, { "epoch": 0.8924472911662663, "grad_norm": 1.198668371629655, "learning_rate": 6.006125993109913e-07, "loss": 1.5915, "step": 6688 }, { "epoch": 0.892580731251668, "grad_norm": 0.9623095343289453, "learning_rate": 5.991382244627475e-07, "loss": 1.51, "step": 6689 }, { "epoch": 0.8927141713370697, "grad_norm": 0.9678788722229147, "learning_rate": 5.976656055884522e-07, "loss": 1.5247, "step": 6690 }, { "epoch": 0.8928476114224713, "grad_norm": 1.0279035033137902, "learning_rate": 5.961947429631721e-07, "loss": 1.5636, "step": 6691 }, { "epoch": 0.892981051507873, "grad_norm": 0.9304167484501092, "learning_rate": 5.947256368616483e-07, "loss": 1.5038, "step": 6692 }, { "epoch": 0.8931144915932746, "grad_norm": 0.9374804062894364, "learning_rate": 5.932582875582904e-07, "loss": 1.5582, "step": 6693 }, { "epoch": 0.8932479316786762, "grad_norm": 0.9034683624937042, "learning_rate": 5.917926953271857e-07, "loss": 1.5244, "step": 6694 }, { "epoch": 0.893381371764078, "grad_norm": 1.0506006939018857, "learning_rate": 5.903288604420887e-07, "loss": 1.5336, "step": 6695 }, { "epoch": 0.8935148118494796, "grad_norm": 0.9685577008965324, "learning_rate": 5.888667831764283e-07, "loss": 1.5201, "step": 6696 }, { "epoch": 0.8936482519348813, "grad_norm": 0.9546554805063521, "learning_rate": 5.874064638033017e-07, "loss": 1.5315, "step": 6697 }, { "epoch": 0.8937816920202829, "grad_norm": 0.9383844169484069, "learning_rate": 5.859479025954817e-07, "loss": 1.5399, "step": 6698 }, { "epoch": 0.8939151321056845, "grad_norm": 1.0001219462580064, "learning_rate": 5.844910998254117e-07, "loss": 1.534, "step": 6699 }, { "epoch": 0.8940485721910862, "grad_norm": 1.0514709520939773, "learning_rate": 5.830360557652026e-07, "loss": 1.5436, "step": 6700 }, { "epoch": 0.8941820122764879, "grad_norm": 0.9603464211449854, "learning_rate": 5.815827706866439e-07, "loss": 1.6296, "step": 6701 }, { "epoch": 0.8943154523618895, "grad_norm": 0.9699909980766925, "learning_rate": 5.801312448611907e-07, "loss": 1.5712, "step": 6702 }, { "epoch": 0.8944488924472912, "grad_norm": 0.9081518549220741, "learning_rate": 5.786814785599715e-07, "loss": 1.5363, "step": 6703 }, { "epoch": 0.8945823325326928, "grad_norm": 0.9477859920909177, "learning_rate": 5.772334720537854e-07, "loss": 1.5438, "step": 6704 }, { "epoch": 0.8947157726180944, "grad_norm": 0.9467954261018504, "learning_rate": 5.757872256131048e-07, "loss": 1.5359, "step": 6705 }, { "epoch": 0.8948492127034962, "grad_norm": 1.0635463978582553, "learning_rate": 5.743427395080736e-07, "loss": 1.5161, "step": 6706 }, { "epoch": 0.8949826527888978, "grad_norm": 0.9139015356788128, "learning_rate": 5.729000140085017e-07, "loss": 1.5263, "step": 6707 }, { "epoch": 0.8951160928742994, "grad_norm": 1.2623806658299492, "learning_rate": 5.714590493838768e-07, "loss": 1.5617, "step": 6708 }, { "epoch": 0.8952495329597011, "grad_norm": 1.0270566802124301, "learning_rate": 5.700198459033535e-07, "loss": 1.5226, "step": 6709 }, { "epoch": 0.8953829730451027, "grad_norm": 0.9557701098327668, "learning_rate": 5.685824038357568e-07, "loss": 1.554, "step": 6710 }, { "epoch": 0.8955164131305045, "grad_norm": 0.8951591320979515, "learning_rate": 5.671467234495875e-07, "loss": 1.5101, "step": 6711 }, { "epoch": 0.8956498532159061, "grad_norm": 0.9290933211646522, "learning_rate": 5.657128050130134e-07, "loss": 1.5454, "step": 6712 }, { "epoch": 0.8957832933013077, "grad_norm": 0.935571799488784, "learning_rate": 5.642806487938746e-07, "loss": 1.6001, "step": 6713 }, { "epoch": 0.8959167333867094, "grad_norm": 0.967264821795858, "learning_rate": 5.628502550596781e-07, "loss": 1.4861, "step": 6714 }, { "epoch": 0.896050173472111, "grad_norm": 0.9487698266935534, "learning_rate": 5.614216240776105e-07, "loss": 1.5693, "step": 6715 }, { "epoch": 0.8961836135575126, "grad_norm": 1.1670691771030441, "learning_rate": 5.599947561145214e-07, "loss": 1.4934, "step": 6716 }, { "epoch": 0.8963170536429144, "grad_norm": 0.960589734890621, "learning_rate": 5.585696514369321e-07, "loss": 1.5477, "step": 6717 }, { "epoch": 0.896450493728316, "grad_norm": 0.935167699678419, "learning_rate": 5.571463103110375e-07, "loss": 1.4821, "step": 6718 }, { "epoch": 0.8965839338137176, "grad_norm": 0.9947979187682457, "learning_rate": 5.557247330027016e-07, "loss": 1.5684, "step": 6719 }, { "epoch": 0.8967173738991193, "grad_norm": 0.9593305751018268, "learning_rate": 5.543049197774586e-07, "loss": 1.5996, "step": 6720 }, { "epoch": 0.8968508139845209, "grad_norm": 0.9375856914820678, "learning_rate": 5.52886870900512e-07, "loss": 1.5154, "step": 6721 }, { "epoch": 0.8969842540699227, "grad_norm": 1.2862051330318942, "learning_rate": 5.514705866367387e-07, "loss": 1.5196, "step": 6722 }, { "epoch": 0.8971176941553243, "grad_norm": 0.9340659004674106, "learning_rate": 5.500560672506861e-07, "loss": 1.4784, "step": 6723 }, { "epoch": 0.8972511342407259, "grad_norm": 0.945137802994412, "learning_rate": 5.486433130065672e-07, "loss": 1.5399, "step": 6724 }, { "epoch": 0.8973845743261276, "grad_norm": 1.0209524963685872, "learning_rate": 5.472323241682687e-07, "loss": 1.5503, "step": 6725 }, { "epoch": 0.8975180144115292, "grad_norm": 0.927685929338529, "learning_rate": 5.458231009993498e-07, "loss": 1.5844, "step": 6726 }, { "epoch": 0.8976514544969308, "grad_norm": 0.9428359886770269, "learning_rate": 5.444156437630365e-07, "loss": 1.5389, "step": 6727 }, { "epoch": 0.8977848945823326, "grad_norm": 0.9334863494390306, "learning_rate": 5.430099527222244e-07, "loss": 1.5162, "step": 6728 }, { "epoch": 0.8979183346677342, "grad_norm": 0.9436338050010254, "learning_rate": 5.416060281394797e-07, "loss": 1.5382, "step": 6729 }, { "epoch": 0.8980517747531358, "grad_norm": 0.9365789196387994, "learning_rate": 5.402038702770418e-07, "loss": 1.5249, "step": 6730 }, { "epoch": 0.8981852148385375, "grad_norm": 0.9221528407309991, "learning_rate": 5.388034793968189e-07, "loss": 1.5386, "step": 6731 }, { "epoch": 0.8983186549239391, "grad_norm": 0.9515618324069464, "learning_rate": 5.374048557603828e-07, "loss": 1.5362, "step": 6732 }, { "epoch": 0.8984520950093408, "grad_norm": 0.9944556911301887, "learning_rate": 5.360079996289869e-07, "loss": 1.5167, "step": 6733 }, { "epoch": 0.8985855350947425, "grad_norm": 0.97850772806653, "learning_rate": 5.346129112635445e-07, "loss": 1.5102, "step": 6734 }, { "epoch": 0.8987189751801441, "grad_norm": 0.9540868569995883, "learning_rate": 5.332195909246429e-07, "loss": 1.5752, "step": 6735 }, { "epoch": 0.8988524152655458, "grad_norm": 0.9221941861156301, "learning_rate": 5.318280388725372e-07, "loss": 1.5118, "step": 6736 }, { "epoch": 0.8989858553509474, "grad_norm": 1.0270466084981307, "learning_rate": 5.304382553671561e-07, "loss": 1.5436, "step": 6737 }, { "epoch": 0.899119295436349, "grad_norm": 0.9203067458998816, "learning_rate": 5.290502406680931e-07, "loss": 1.5437, "step": 6738 }, { "epoch": 0.8992527355217508, "grad_norm": 0.9501536472353344, "learning_rate": 5.276639950346129e-07, "loss": 1.5075, "step": 6739 }, { "epoch": 0.8993861756071524, "grad_norm": 0.9334210192752896, "learning_rate": 5.262795187256542e-07, "loss": 1.5422, "step": 6740 }, { "epoch": 0.899519615692554, "grad_norm": 0.9567078114398553, "learning_rate": 5.248968119998188e-07, "loss": 1.5471, "step": 6741 }, { "epoch": 0.8996530557779557, "grad_norm": 0.9282038591345071, "learning_rate": 5.235158751153801e-07, "loss": 1.4623, "step": 6742 }, { "epoch": 0.8997864958633573, "grad_norm": 0.9384691548104735, "learning_rate": 5.22136708330282e-07, "loss": 1.5249, "step": 6743 }, { "epoch": 0.899919935948759, "grad_norm": 1.0717530433261342, "learning_rate": 5.20759311902137e-07, "loss": 1.5446, "step": 6744 }, { "epoch": 0.9000533760341607, "grad_norm": 0.949517161915402, "learning_rate": 5.193836860882284e-07, "loss": 1.5237, "step": 6745 }, { "epoch": 0.9001868161195623, "grad_norm": 0.9209721246022604, "learning_rate": 5.180098311455051e-07, "loss": 1.4865, "step": 6746 }, { "epoch": 0.9003202562049639, "grad_norm": 1.0530255691032593, "learning_rate": 5.166377473305894e-07, "loss": 1.5196, "step": 6747 }, { "epoch": 0.9004536962903656, "grad_norm": 0.9330131313465537, "learning_rate": 5.15267434899771e-07, "loss": 1.5512, "step": 6748 }, { "epoch": 0.9005871363757673, "grad_norm": 0.9202256141014292, "learning_rate": 5.138988941090084e-07, "loss": 1.5196, "step": 6749 }, { "epoch": 0.900720576461169, "grad_norm": 0.9650715822764238, "learning_rate": 5.125321252139282e-07, "loss": 1.5177, "step": 6750 }, { "epoch": 0.9008540165465706, "grad_norm": 0.9433756615509346, "learning_rate": 5.111671284698283e-07, "loss": 1.5432, "step": 6751 }, { "epoch": 0.9009874566319722, "grad_norm": 0.9533022305140074, "learning_rate": 5.098039041316738e-07, "loss": 1.5537, "step": 6752 }, { "epoch": 0.9011208967173739, "grad_norm": 0.9346416532826066, "learning_rate": 5.084424524540999e-07, "loss": 1.5495, "step": 6753 }, { "epoch": 0.9012543368027756, "grad_norm": 1.1020333932337052, "learning_rate": 5.070827736914119e-07, "loss": 1.5476, "step": 6754 }, { "epoch": 0.9013877768881772, "grad_norm": 0.9410283837732905, "learning_rate": 5.057248680975802e-07, "loss": 1.5037, "step": 6755 }, { "epoch": 0.9015212169735789, "grad_norm": 1.0772876014109762, "learning_rate": 5.043687359262472e-07, "loss": 1.5047, "step": 6756 }, { "epoch": 0.9016546570589805, "grad_norm": 0.9439200358382797, "learning_rate": 5.030143774307205e-07, "loss": 1.5584, "step": 6757 }, { "epoch": 0.9017880971443821, "grad_norm": 1.0161041213410495, "learning_rate": 5.016617928639822e-07, "loss": 1.5249, "step": 6758 }, { "epoch": 0.9019215372297839, "grad_norm": 0.9412328793676751, "learning_rate": 5.003109824786789e-07, "loss": 1.5376, "step": 6759 }, { "epoch": 0.9020549773151855, "grad_norm": 0.9639374913924775, "learning_rate": 4.989619465271245e-07, "loss": 1.5075, "step": 6760 }, { "epoch": 0.9021884174005871, "grad_norm": 0.9124862517814768, "learning_rate": 4.976146852613062e-07, "loss": 1.5236, "step": 6761 }, { "epoch": 0.9023218574859888, "grad_norm": 0.9328183814360856, "learning_rate": 4.96269198932876e-07, "loss": 1.5257, "step": 6762 }, { "epoch": 0.9024552975713904, "grad_norm": 0.9480856180023016, "learning_rate": 4.949254877931564e-07, "loss": 1.6097, "step": 6763 }, { "epoch": 0.9025887376567922, "grad_norm": 0.9452684747563659, "learning_rate": 4.935835520931342e-07, "loss": 1.5583, "step": 6764 }, { "epoch": 0.9027221777421938, "grad_norm": 0.9435189294655035, "learning_rate": 4.922433920834713e-07, "loss": 1.5576, "step": 6765 }, { "epoch": 0.9028556178275954, "grad_norm": 0.9100774226094155, "learning_rate": 4.909050080144928e-07, "loss": 1.5061, "step": 6766 }, { "epoch": 0.9029890579129971, "grad_norm": 0.9188166446494538, "learning_rate": 4.895684001361933e-07, "loss": 1.525, "step": 6767 }, { "epoch": 0.9031224979983987, "grad_norm": 0.9513533676277205, "learning_rate": 4.882335686982354e-07, "loss": 1.5194, "step": 6768 }, { "epoch": 0.9032559380838003, "grad_norm": 0.9901332701917303, "learning_rate": 4.869005139499528e-07, "loss": 1.5135, "step": 6769 }, { "epoch": 0.9033893781692021, "grad_norm": 0.9252469459313303, "learning_rate": 4.85569236140343e-07, "loss": 1.5236, "step": 6770 }, { "epoch": 0.9035228182546037, "grad_norm": 1.0114758784078768, "learning_rate": 4.842397355180728e-07, "loss": 1.5166, "step": 6771 }, { "epoch": 0.9036562583400053, "grad_norm": 0.972194230662802, "learning_rate": 4.829120123314801e-07, "loss": 1.5787, "step": 6772 }, { "epoch": 0.903789698425407, "grad_norm": 0.917284388104258, "learning_rate": 4.815860668285688e-07, "loss": 1.5353, "step": 6773 }, { "epoch": 0.9039231385108086, "grad_norm": 0.9549812627537546, "learning_rate": 4.802618992570074e-07, "loss": 1.545, "step": 6774 }, { "epoch": 0.9040565785962102, "grad_norm": 1.052982946218394, "learning_rate": 4.789395098641359e-07, "loss": 1.5932, "step": 6775 }, { "epoch": 0.904190018681612, "grad_norm": 1.1732404735888684, "learning_rate": 4.776188988969643e-07, "loss": 1.4899, "step": 6776 }, { "epoch": 0.9043234587670136, "grad_norm": 1.0066806501063883, "learning_rate": 4.7630006660216665e-07, "loss": 1.5585, "step": 6777 }, { "epoch": 0.9044568988524153, "grad_norm": 0.9579935580737865, "learning_rate": 4.749830132260824e-07, "loss": 1.5712, "step": 6778 }, { "epoch": 0.9045903389378169, "grad_norm": 0.9169006378445536, "learning_rate": 4.736677390147271e-07, "loss": 1.5371, "step": 6779 }, { "epoch": 0.9047237790232185, "grad_norm": 0.9413950267463799, "learning_rate": 4.723542442137774e-07, "loss": 1.5586, "step": 6780 }, { "epoch": 0.9048572191086203, "grad_norm": 0.9375274450226259, "learning_rate": 4.710425290685772e-07, "loss": 1.526, "step": 6781 }, { "epoch": 0.9049906591940219, "grad_norm": 0.920951060703472, "learning_rate": 4.697325938241404e-07, "loss": 1.5496, "step": 6782 }, { "epoch": 0.9051240992794235, "grad_norm": 0.9705124398869889, "learning_rate": 4.684244387251513e-07, "loss": 1.552, "step": 6783 }, { "epoch": 0.9052575393648252, "grad_norm": 0.9307149713548113, "learning_rate": 4.671180640159545e-07, "loss": 1.5031, "step": 6784 }, { "epoch": 0.9053909794502268, "grad_norm": 1.0044776590484505, "learning_rate": 4.65813469940567e-07, "loss": 1.5668, "step": 6785 }, { "epoch": 0.9055244195356285, "grad_norm": 0.8995467573403738, "learning_rate": 4.645106567426738e-07, "loss": 1.4998, "step": 6786 }, { "epoch": 0.9056578596210302, "grad_norm": 0.9441419162843016, "learning_rate": 4.632096246656237e-07, "loss": 1.5544, "step": 6787 }, { "epoch": 0.9057912997064318, "grad_norm": 1.37214339045131, "learning_rate": 4.619103739524355e-07, "loss": 1.5009, "step": 6788 }, { "epoch": 0.9059247397918335, "grad_norm": 0.9569798512304817, "learning_rate": 4.6061290484579304e-07, "loss": 1.5217, "step": 6789 }, { "epoch": 0.9060581798772351, "grad_norm": 0.9238396452899695, "learning_rate": 4.5931721758805117e-07, "loss": 1.5632, "step": 6790 }, { "epoch": 0.9061916199626368, "grad_norm": 0.9533994593848583, "learning_rate": 4.5802331242122855e-07, "loss": 1.559, "step": 6791 }, { "epoch": 0.9063250600480385, "grad_norm": 1.0268411283902035, "learning_rate": 4.567311895870108e-07, "loss": 1.517, "step": 6792 }, { "epoch": 0.9064585001334401, "grad_norm": 1.0100719989531157, "learning_rate": 4.554408493267537e-07, "loss": 1.5145, "step": 6793 }, { "epoch": 0.9065919402188417, "grad_norm": 1.1648731959473748, "learning_rate": 4.5415229188147667e-07, "loss": 1.5143, "step": 6794 }, { "epoch": 0.9067253803042434, "grad_norm": 0.92959204449064, "learning_rate": 4.5286551749186726e-07, "loss": 1.5744, "step": 6795 }, { "epoch": 0.906858820389645, "grad_norm": 0.9435972338142883, "learning_rate": 4.5158052639828085e-07, "loss": 1.5309, "step": 6796 }, { "epoch": 0.9069922604750467, "grad_norm": 1.0579708377998769, "learning_rate": 4.5029731884074004e-07, "loss": 1.5452, "step": 6797 }, { "epoch": 0.9071257005604484, "grad_norm": 1.6099878391111002, "learning_rate": 4.4901589505893297e-07, "loss": 1.535, "step": 6798 }, { "epoch": 0.90725914064585, "grad_norm": 0.9431159488281261, "learning_rate": 4.4773625529221486e-07, "loss": 1.5271, "step": 6799 }, { "epoch": 0.9073925807312516, "grad_norm": 0.9302835803444568, "learning_rate": 4.464583997796057e-07, "loss": 1.5343, "step": 6800 }, { "epoch": 0.9075260208166533, "grad_norm": 0.9727837080368901, "learning_rate": 4.451823287597978e-07, "loss": 1.5523, "step": 6801 }, { "epoch": 0.907659460902055, "grad_norm": 0.9311133151109089, "learning_rate": 4.4390804247114503e-07, "loss": 1.5352, "step": 6802 }, { "epoch": 0.9077929009874567, "grad_norm": 0.9834944210354547, "learning_rate": 4.4263554115166805e-07, "loss": 1.5093, "step": 6803 }, { "epoch": 0.9079263410728583, "grad_norm": 0.924803540849428, "learning_rate": 4.413648250390601e-07, "loss": 1.5702, "step": 6804 }, { "epoch": 0.9080597811582599, "grad_norm": 1.0516475698411472, "learning_rate": 4.400958943706724e-07, "loss": 1.566, "step": 6805 }, { "epoch": 0.9081932212436616, "grad_norm": 1.0934743180424145, "learning_rate": 4.388287493835286e-07, "loss": 1.5735, "step": 6806 }, { "epoch": 0.9083266613290633, "grad_norm": 0.9552392103374737, "learning_rate": 4.3756339031431394e-07, "loss": 1.563, "step": 6807 }, { "epoch": 0.9084601014144649, "grad_norm": 0.9653006612408468, "learning_rate": 4.362998173993882e-07, "loss": 1.5681, "step": 6808 }, { "epoch": 0.9085935414998666, "grad_norm": 0.9718108077335248, "learning_rate": 4.3503803087476926e-07, "loss": 1.5478, "step": 6809 }, { "epoch": 0.9087269815852682, "grad_norm": 0.9602753698421362, "learning_rate": 4.337780309761441e-07, "loss": 1.554, "step": 6810 }, { "epoch": 0.9088604216706698, "grad_norm": 0.9424771815385352, "learning_rate": 4.3251981793886787e-07, "loss": 1.6088, "step": 6811 }, { "epoch": 0.9089938617560716, "grad_norm": 0.9274015813847016, "learning_rate": 4.312633919979603e-07, "loss": 1.5803, "step": 6812 }, { "epoch": 0.9091273018414732, "grad_norm": 0.9230912583513253, "learning_rate": 4.300087533881059e-07, "loss": 1.5684, "step": 6813 }, { "epoch": 0.9092607419268748, "grad_norm": 0.9305252391203362, "learning_rate": 4.2875590234365825e-07, "loss": 1.4863, "step": 6814 }, { "epoch": 0.9093941820122765, "grad_norm": 0.9029270378762562, "learning_rate": 4.2750483909863584e-07, "loss": 1.5725, "step": 6815 }, { "epoch": 0.9095276220976781, "grad_norm": 0.9383970397120545, "learning_rate": 4.2625556388672163e-07, "loss": 1.5339, "step": 6816 }, { "epoch": 0.9096610621830798, "grad_norm": 0.9345725438722642, "learning_rate": 4.2500807694126677e-07, "loss": 1.5505, "step": 6817 }, { "epoch": 0.9097945022684815, "grad_norm": 0.9259234911532692, "learning_rate": 4.2376237849528936e-07, "loss": 1.5278, "step": 6818 }, { "epoch": 0.9099279423538831, "grad_norm": 1.0476543726975824, "learning_rate": 4.2251846878146873e-07, "loss": 1.5572, "step": 6819 }, { "epoch": 0.9100613824392848, "grad_norm": 0.9337601320766555, "learning_rate": 4.2127634803215576e-07, "loss": 1.5409, "step": 6820 }, { "epoch": 0.9101948225246864, "grad_norm": 0.9335871299642117, "learning_rate": 4.2003601647936156e-07, "loss": 1.5401, "step": 6821 }, { "epoch": 0.910328262610088, "grad_norm": 0.9622149934684122, "learning_rate": 4.187974743547674e-07, "loss": 1.5137, "step": 6822 }, { "epoch": 0.9104617026954898, "grad_norm": 0.9148887346642454, "learning_rate": 4.175607218897204e-07, "loss": 1.5505, "step": 6823 }, { "epoch": 0.9105951427808914, "grad_norm": 1.0530922369152327, "learning_rate": 4.1632575931522923e-07, "loss": 1.5171, "step": 6824 }, { "epoch": 0.910728582866293, "grad_norm": 0.9530413630746577, "learning_rate": 4.150925868619726e-07, "loss": 1.6038, "step": 6825 }, { "epoch": 0.9108620229516947, "grad_norm": 0.9248293389790625, "learning_rate": 4.138612047602919e-07, "loss": 1.53, "step": 6826 }, { "epoch": 0.9109954630370963, "grad_norm": 0.9435050547492767, "learning_rate": 4.126316132401975e-07, "loss": 1.5497, "step": 6827 }, { "epoch": 0.911128903122498, "grad_norm": 0.9172974815783576, "learning_rate": 4.1140381253135907e-07, "loss": 1.53, "step": 6828 }, { "epoch": 0.9112623432078997, "grad_norm": 0.9198332491276605, "learning_rate": 4.101778028631198e-07, "loss": 1.4973, "step": 6829 }, { "epoch": 0.9113957832933013, "grad_norm": 0.9340095445157393, "learning_rate": 4.0895358446448205e-07, "loss": 1.5311, "step": 6830 }, { "epoch": 0.911529223378703, "grad_norm": 0.9218669809304821, "learning_rate": 4.077311575641174e-07, "loss": 1.4977, "step": 6831 }, { "epoch": 0.9116626634641046, "grad_norm": 0.9386440639251815, "learning_rate": 4.0651052239035873e-07, "loss": 1.4714, "step": 6832 }, { "epoch": 0.9117961035495062, "grad_norm": 0.9188422179185332, "learning_rate": 4.0529167917121024e-07, "loss": 1.5086, "step": 6833 }, { "epoch": 0.911929543634908, "grad_norm": 0.9929237105805191, "learning_rate": 4.040746281343355e-07, "loss": 1.5435, "step": 6834 }, { "epoch": 0.9120629837203096, "grad_norm": 0.9262323335599812, "learning_rate": 4.0285936950706705e-07, "loss": 1.543, "step": 6835 }, { "epoch": 0.9121964238057112, "grad_norm": 0.9413275796185295, "learning_rate": 4.016459035164e-07, "loss": 1.5475, "step": 6836 }, { "epoch": 0.9123298638911129, "grad_norm": 0.9355976771543438, "learning_rate": 4.0043423038899743e-07, "loss": 1.5646, "step": 6837 }, { "epoch": 0.9124633039765145, "grad_norm": 0.9308452239118499, "learning_rate": 3.99224350351185e-07, "loss": 1.5282, "step": 6838 }, { "epoch": 0.9125967440619162, "grad_norm": 0.9352299931869122, "learning_rate": 3.9801626362895527e-07, "loss": 1.5478, "step": 6839 }, { "epoch": 0.9127301841473179, "grad_norm": 0.920552426822107, "learning_rate": 3.9680997044796445e-07, "loss": 1.571, "step": 6840 }, { "epoch": 0.9128636242327195, "grad_norm": 0.9614541460432983, "learning_rate": 3.9560547103353553e-07, "loss": 1.5716, "step": 6841 }, { "epoch": 0.9129970643181211, "grad_norm": 0.921161207602936, "learning_rate": 3.9440276561065306e-07, "loss": 1.5062, "step": 6842 }, { "epoch": 0.9131305044035228, "grad_norm": 0.9298591480513266, "learning_rate": 3.932018544039717e-07, "loss": 1.5823, "step": 6843 }, { "epoch": 0.9132639444889245, "grad_norm": 0.9522285441788466, "learning_rate": 3.920027376378055e-07, "loss": 1.5636, "step": 6844 }, { "epoch": 0.9133973845743262, "grad_norm": 0.9048091631898952, "learning_rate": 3.9080541553613737e-07, "loss": 1.5236, "step": 6845 }, { "epoch": 0.9135308246597278, "grad_norm": 0.8997967100666988, "learning_rate": 3.8960988832261184e-07, "loss": 1.543, "step": 6846 }, { "epoch": 0.9136642647451294, "grad_norm": 0.9344054356817217, "learning_rate": 3.884161562205413e-07, "loss": 1.5238, "step": 6847 }, { "epoch": 0.9137977048305311, "grad_norm": 0.9433085668577765, "learning_rate": 3.872242194529019e-07, "loss": 1.5323, "step": 6848 }, { "epoch": 0.9139311449159327, "grad_norm": 0.9298922807904612, "learning_rate": 3.8603407824233e-07, "loss": 1.5804, "step": 6849 }, { "epoch": 0.9140645850013344, "grad_norm": 0.916116285836366, "learning_rate": 3.8484573281113546e-07, "loss": 1.5167, "step": 6850 }, { "epoch": 0.9141980250867361, "grad_norm": 1.0047053088456148, "learning_rate": 3.8365918338128525e-07, "loss": 1.5509, "step": 6851 }, { "epoch": 0.9143314651721377, "grad_norm": 0.9283300536875279, "learning_rate": 3.8247443017441323e-07, "loss": 1.5111, "step": 6852 }, { "epoch": 0.9144649052575393, "grad_norm": 1.1042519917914184, "learning_rate": 3.812914734118167e-07, "loss": 1.5714, "step": 6853 }, { "epoch": 0.914598345342941, "grad_norm": 0.9081858321179135, "learning_rate": 3.8011031331446125e-07, "loss": 1.5021, "step": 6854 }, { "epoch": 0.9147317854283427, "grad_norm": 0.9771021703620095, "learning_rate": 3.7893095010297255e-07, "loss": 1.5391, "step": 6855 }, { "epoch": 0.9148652255137443, "grad_norm": 0.9348735621584969, "learning_rate": 3.7775338399764106e-07, "loss": 1.5353, "step": 6856 }, { "epoch": 0.914998665599146, "grad_norm": 1.217330780323833, "learning_rate": 3.765776152184264e-07, "loss": 1.5584, "step": 6857 }, { "epoch": 0.9151321056845476, "grad_norm": 0.9415726884299152, "learning_rate": 3.754036439849451e-07, "loss": 1.5495, "step": 6858 }, { "epoch": 0.9152655457699493, "grad_norm": 0.9466565821101811, "learning_rate": 3.742314705164829e-07, "loss": 1.5281, "step": 6859 }, { "epoch": 0.915398985855351, "grad_norm": 0.9439757015604497, "learning_rate": 3.7306109503198797e-07, "loss": 1.6172, "step": 6860 }, { "epoch": 0.9155324259407526, "grad_norm": 0.9358623022886202, "learning_rate": 3.718925177500743e-07, "loss": 1.5008, "step": 6861 }, { "epoch": 0.9156658660261543, "grad_norm": 0.9532844740769746, "learning_rate": 3.707257388890195e-07, "loss": 1.5318, "step": 6862 }, { "epoch": 0.9157993061115559, "grad_norm": 0.9749521833223408, "learning_rate": 3.6956075866676155e-07, "loss": 1.528, "step": 6863 }, { "epoch": 0.9159327461969575, "grad_norm": 1.1423465704015638, "learning_rate": 3.6839757730090744e-07, "loss": 1.5111, "step": 6864 }, { "epoch": 0.9160661862823593, "grad_norm": 0.9134442615213153, "learning_rate": 3.672361950087266e-07, "loss": 1.5284, "step": 6865 }, { "epoch": 0.9161996263677609, "grad_norm": 0.9286686714079571, "learning_rate": 3.660766120071513e-07, "loss": 1.4942, "step": 6866 }, { "epoch": 0.9163330664531625, "grad_norm": 0.9431823883198829, "learning_rate": 3.649188285127769e-07, "loss": 1.5244, "step": 6867 }, { "epoch": 0.9164665065385642, "grad_norm": 1.0666489900172478, "learning_rate": 3.637628447418673e-07, "loss": 1.5604, "step": 6868 }, { "epoch": 0.9165999466239658, "grad_norm": 0.9836730783947925, "learning_rate": 3.626086609103463e-07, "loss": 1.5952, "step": 6869 }, { "epoch": 0.9167333867093675, "grad_norm": 0.9593749865507312, "learning_rate": 3.614562772338015e-07, "loss": 1.4742, "step": 6870 }, { "epoch": 0.9168668267947692, "grad_norm": 1.0579631089786683, "learning_rate": 3.6030569392748294e-07, "loss": 1.5043, "step": 6871 }, { "epoch": 0.9170002668801708, "grad_norm": 0.979103974391311, "learning_rate": 3.5915691120631093e-07, "loss": 1.5323, "step": 6872 }, { "epoch": 0.9171337069655725, "grad_norm": 1.503246770281354, "learning_rate": 3.5800992928486265e-07, "loss": 1.5514, "step": 6873 }, { "epoch": 0.9172671470509741, "grad_norm": 0.9405027952328543, "learning_rate": 3.568647483773813e-07, "loss": 1.5276, "step": 6874 }, { "epoch": 0.9174005871363757, "grad_norm": 1.0134385203890537, "learning_rate": 3.557213686977723e-07, "loss": 1.5814, "step": 6875 }, { "epoch": 0.9175340272217775, "grad_norm": 0.9200398848804521, "learning_rate": 3.545797904596082e-07, "loss": 1.5544, "step": 6876 }, { "epoch": 0.9176674673071791, "grad_norm": 0.9685297046655957, "learning_rate": 3.5344001387612293e-07, "loss": 1.5557, "step": 6877 }, { "epoch": 0.9178009073925807, "grad_norm": 0.9225117368528908, "learning_rate": 3.5230203916021277e-07, "loss": 1.5779, "step": 6878 }, { "epoch": 0.9179343474779824, "grad_norm": 0.9089807335651439, "learning_rate": 3.511658665244377e-07, "loss": 1.4707, "step": 6879 }, { "epoch": 0.918067787563384, "grad_norm": 1.0909826397284352, "learning_rate": 3.5003149618102253e-07, "loss": 1.5554, "step": 6880 }, { "epoch": 0.9182012276487856, "grad_norm": 0.9370669644460864, "learning_rate": 3.4889892834185533e-07, "loss": 1.4784, "step": 6881 }, { "epoch": 0.9183346677341874, "grad_norm": 0.9322223361726935, "learning_rate": 3.4776816321848596e-07, "loss": 1.5147, "step": 6882 }, { "epoch": 0.918468107819589, "grad_norm": 0.9329739735097461, "learning_rate": 3.466392010221298e-07, "loss": 1.5383, "step": 6883 }, { "epoch": 0.9186015479049907, "grad_norm": 0.9151417878518067, "learning_rate": 3.4551204196366263e-07, "loss": 1.5576, "step": 6884 }, { "epoch": 0.9187349879903923, "grad_norm": 0.951435126300393, "learning_rate": 3.4438668625362493e-07, "loss": 1.5314, "step": 6885 }, { "epoch": 0.9188684280757939, "grad_norm": 0.913279499004807, "learning_rate": 3.432631341022219e-07, "loss": 1.4513, "step": 6886 }, { "epoch": 0.9190018681611957, "grad_norm": 0.9441844786118736, "learning_rate": 3.42141385719319e-07, "loss": 1.5387, "step": 6887 }, { "epoch": 0.9191353082465973, "grad_norm": 0.932767307846722, "learning_rate": 3.410214413144464e-07, "loss": 1.5246, "step": 6888 }, { "epoch": 0.9192687483319989, "grad_norm": 0.9472847800493374, "learning_rate": 3.399033010967967e-07, "loss": 1.4983, "step": 6889 }, { "epoch": 0.9194021884174006, "grad_norm": 1.0419370287145757, "learning_rate": 3.3878696527522624e-07, "loss": 1.5103, "step": 6890 }, { "epoch": 0.9195356285028022, "grad_norm": 0.9681983142545559, "learning_rate": 3.3767243405825487e-07, "loss": 1.5462, "step": 6891 }, { "epoch": 0.9196690685882039, "grad_norm": 0.9052614017344128, "learning_rate": 3.3655970765406056e-07, "loss": 1.4736, "step": 6892 }, { "epoch": 0.9198025086736056, "grad_norm": 1.0228191814158987, "learning_rate": 3.354487862704925e-07, "loss": 1.5639, "step": 6893 }, { "epoch": 0.9199359487590072, "grad_norm": 0.9360422534666102, "learning_rate": 3.3433967011505586e-07, "loss": 1.5466, "step": 6894 }, { "epoch": 0.9200693888444088, "grad_norm": 0.9264242257227067, "learning_rate": 3.332323593949205e-07, "loss": 1.5214, "step": 6895 }, { "epoch": 0.9202028289298105, "grad_norm": 0.9785840279808523, "learning_rate": 3.3212685431691983e-07, "loss": 1.521, "step": 6896 }, { "epoch": 0.9203362690152121, "grad_norm": 0.990110652510918, "learning_rate": 3.310231550875509e-07, "loss": 1.539, "step": 6897 }, { "epoch": 0.9204697091006139, "grad_norm": 0.9933025587703744, "learning_rate": 3.29921261912971e-07, "loss": 1.5062, "step": 6898 }, { "epoch": 0.9206031491860155, "grad_norm": 0.924550064236254, "learning_rate": 3.28821174999e-07, "loss": 1.5394, "step": 6899 }, { "epoch": 0.9207365892714171, "grad_norm": 0.9765816922537065, "learning_rate": 3.277228945511246e-07, "loss": 1.4902, "step": 6900 }, { "epoch": 0.9208700293568188, "grad_norm": 0.9523736840444362, "learning_rate": 3.266264207744885e-07, "loss": 1.5404, "step": 6901 }, { "epoch": 0.9210034694422204, "grad_norm": 0.9485498472126853, "learning_rate": 3.2553175387390225e-07, "loss": 1.5547, "step": 6902 }, { "epoch": 0.9211369095276221, "grad_norm": 0.9345918935854542, "learning_rate": 3.2443889405383564e-07, "loss": 1.4989, "step": 6903 }, { "epoch": 0.9212703496130238, "grad_norm": 0.9620003150256877, "learning_rate": 3.2334784151842434e-07, "loss": 1.5763, "step": 6904 }, { "epoch": 0.9214037896984254, "grad_norm": 0.9533414019445823, "learning_rate": 3.2225859647146306e-07, "loss": 1.5578, "step": 6905 }, { "epoch": 0.921537229783827, "grad_norm": 1.030837787394548, "learning_rate": 3.2117115911640904e-07, "loss": 1.5385, "step": 6906 }, { "epoch": 0.9216706698692287, "grad_norm": 0.9838369808924554, "learning_rate": 3.200855296563865e-07, "loss": 1.5466, "step": 6907 }, { "epoch": 0.9218041099546304, "grad_norm": 0.9369615843893399, "learning_rate": 3.1900170829417765e-07, "loss": 1.5616, "step": 6908 }, { "epoch": 0.921937550040032, "grad_norm": 1.0512907143003218, "learning_rate": 3.179196952322272e-07, "loss": 1.5901, "step": 6909 }, { "epoch": 0.9220709901254337, "grad_norm": 0.9920000789006053, "learning_rate": 3.168394906726413e-07, "loss": 1.5857, "step": 6910 }, { "epoch": 0.9222044302108353, "grad_norm": 0.9257587202232741, "learning_rate": 3.1576109481719294e-07, "loss": 1.5313, "step": 6911 }, { "epoch": 0.922337870296237, "grad_norm": 0.9418319592189258, "learning_rate": 3.146845078673133e-07, "loss": 1.539, "step": 6912 }, { "epoch": 0.9224713103816387, "grad_norm": 0.9070087342084403, "learning_rate": 3.136097300240948e-07, "loss": 1.4764, "step": 6913 }, { "epoch": 0.9226047504670403, "grad_norm": 0.965385596189811, "learning_rate": 3.125367614882957e-07, "loss": 1.5912, "step": 6914 }, { "epoch": 0.922738190552442, "grad_norm": 0.9457846928440798, "learning_rate": 3.114656024603346e-07, "loss": 1.6022, "step": 6915 }, { "epoch": 0.9228716306378436, "grad_norm": 0.9626967040370848, "learning_rate": 3.1039625314028934e-07, "loss": 1.5788, "step": 6916 }, { "epoch": 0.9230050707232452, "grad_norm": 0.9262926987232699, "learning_rate": 3.093287137279044e-07, "loss": 1.5076, "step": 6917 }, { "epoch": 0.923138510808647, "grad_norm": 0.9263671794999331, "learning_rate": 3.0826298442258263e-07, "loss": 1.5376, "step": 6918 }, { "epoch": 0.9232719508940486, "grad_norm": 1.0802339378714219, "learning_rate": 3.071990654233925e-07, "loss": 1.5194, "step": 6919 }, { "epoch": 0.9234053909794502, "grad_norm": 0.9517175443121075, "learning_rate": 3.0613695692905955e-07, "loss": 1.5472, "step": 6920 }, { "epoch": 0.9235388310648519, "grad_norm": 0.9997691999553755, "learning_rate": 3.050766591379739e-07, "loss": 1.5521, "step": 6921 }, { "epoch": 0.9236722711502535, "grad_norm": 0.927201693441311, "learning_rate": 3.040181722481872e-07, "loss": 1.5507, "step": 6922 }, { "epoch": 0.9238057112356551, "grad_norm": 0.9436121306544267, "learning_rate": 3.0296149645741344e-07, "loss": 1.5319, "step": 6923 }, { "epoch": 0.9239391513210569, "grad_norm": 0.9358298860342295, "learning_rate": 3.0190663196302706e-07, "loss": 1.512, "step": 6924 }, { "epoch": 0.9240725914064585, "grad_norm": 0.9671848262898582, "learning_rate": 3.008535789620648e-07, "loss": 1.5107, "step": 6925 }, { "epoch": 0.9242060314918602, "grad_norm": 0.9462619779524947, "learning_rate": 2.99802337651226e-07, "loss": 1.5133, "step": 6926 }, { "epoch": 0.9243394715772618, "grad_norm": 0.9375880200694752, "learning_rate": 2.987529082268692e-07, "loss": 1.5224, "step": 6927 }, { "epoch": 0.9244729116626634, "grad_norm": 0.9185590934633807, "learning_rate": 2.977052908850142e-07, "loss": 1.5366, "step": 6928 }, { "epoch": 0.9246063517480652, "grad_norm": 0.925660519615923, "learning_rate": 2.9665948582134783e-07, "loss": 1.5373, "step": 6929 }, { "epoch": 0.9247397918334668, "grad_norm": 0.9558442289500324, "learning_rate": 2.9561549323121385e-07, "loss": 1.5556, "step": 6930 }, { "epoch": 0.9248732319188684, "grad_norm": 0.9105723816514041, "learning_rate": 2.9457331330961513e-07, "loss": 1.5277, "step": 6931 }, { "epoch": 0.9250066720042701, "grad_norm": 0.9402020298370549, "learning_rate": 2.935329462512226e-07, "loss": 1.5444, "step": 6932 }, { "epoch": 0.9251401120896717, "grad_norm": 0.9389120664607241, "learning_rate": 2.9249439225036313e-07, "loss": 1.5906, "step": 6933 }, { "epoch": 0.9252735521750733, "grad_norm": 0.9566777094490774, "learning_rate": 2.91457651501027e-07, "loss": 1.5388, "step": 6934 }, { "epoch": 0.9254069922604751, "grad_norm": 0.9475669400048962, "learning_rate": 2.904227241968638e-07, "loss": 1.5122, "step": 6935 }, { "epoch": 0.9255404323458767, "grad_norm": 0.9606016426402592, "learning_rate": 2.8938961053118997e-07, "loss": 1.5747, "step": 6936 }, { "epoch": 0.9256738724312784, "grad_norm": 0.9416160631017734, "learning_rate": 2.8835831069697786e-07, "loss": 1.5942, "step": 6937 }, { "epoch": 0.92580731251668, "grad_norm": 0.9255067631796212, "learning_rate": 2.873288248868611e-07, "loss": 1.5681, "step": 6938 }, { "epoch": 0.9259407526020816, "grad_norm": 0.9354963560086351, "learning_rate": 2.86301153293137e-07, "loss": 1.5262, "step": 6939 }, { "epoch": 0.9260741926874834, "grad_norm": 0.9413294820231983, "learning_rate": 2.852752961077632e-07, "loss": 1.535, "step": 6940 }, { "epoch": 0.926207632772885, "grad_norm": 0.9362918263402716, "learning_rate": 2.842512535223585e-07, "loss": 1.5211, "step": 6941 }, { "epoch": 0.9263410728582866, "grad_norm": 0.921974590840743, "learning_rate": 2.832290257282e-07, "loss": 1.5103, "step": 6942 }, { "epoch": 0.9264745129436883, "grad_norm": 0.9369764170400131, "learning_rate": 2.822086129162305e-07, "loss": 1.5596, "step": 6943 }, { "epoch": 0.9266079530290899, "grad_norm": 0.9147557346642654, "learning_rate": 2.811900152770519e-07, "loss": 1.5058, "step": 6944 }, { "epoch": 0.9267413931144916, "grad_norm": 0.9679302190588822, "learning_rate": 2.8017323300092435e-07, "loss": 1.576, "step": 6945 }, { "epoch": 0.9268748331998933, "grad_norm": 0.9466621416815499, "learning_rate": 2.791582662777725e-07, "loss": 1.5155, "step": 6946 }, { "epoch": 0.9270082732852949, "grad_norm": 1.0259125716265545, "learning_rate": 2.7814511529718146e-07, "loss": 1.588, "step": 6947 }, { "epoch": 0.9271417133706965, "grad_norm": 0.9713536021169017, "learning_rate": 2.771337802483953e-07, "loss": 1.505, "step": 6948 }, { "epoch": 0.9272751534560982, "grad_norm": 0.9124231485781754, "learning_rate": 2.7612426132031966e-07, "loss": 1.5179, "step": 6949 }, { "epoch": 0.9274085935414998, "grad_norm": 0.9450377082479692, "learning_rate": 2.751165587015214e-07, "loss": 1.516, "step": 6950 }, { "epoch": 0.9275420336269016, "grad_norm": 0.9430651969927591, "learning_rate": 2.7411067258022896e-07, "loss": 1.5616, "step": 6951 }, { "epoch": 0.9276754737123032, "grad_norm": 1.1461572084836693, "learning_rate": 2.731066031443275e-07, "loss": 1.5797, "step": 6952 }, { "epoch": 0.9278089137977048, "grad_norm": 0.9850748413044481, "learning_rate": 2.721043505813692e-07, "loss": 1.6142, "step": 6953 }, { "epoch": 0.9279423538831065, "grad_norm": 0.9410326470678416, "learning_rate": 2.7110391507856215e-07, "loss": 1.5513, "step": 6954 }, { "epoch": 0.9280757939685081, "grad_norm": 0.9717540522614107, "learning_rate": 2.7010529682277573e-07, "loss": 1.5271, "step": 6955 }, { "epoch": 0.9282092340539098, "grad_norm": 1.2527445070154288, "learning_rate": 2.691084960005408e-07, "loss": 1.554, "step": 6956 }, { "epoch": 0.9283426741393115, "grad_norm": 0.9380409123192367, "learning_rate": 2.681135127980483e-07, "loss": 1.4966, "step": 6957 }, { "epoch": 0.9284761142247131, "grad_norm": 1.067794635668956, "learning_rate": 2.671203474011508e-07, "loss": 1.4979, "step": 6958 }, { "epoch": 0.9286095543101147, "grad_norm": 0.924552942569084, "learning_rate": 2.6612899999535867e-07, "loss": 1.6066, "step": 6959 }, { "epoch": 0.9287429943955164, "grad_norm": 0.9844576271371721, "learning_rate": 2.65139470765845e-07, "loss": 1.558, "step": 6960 }, { "epoch": 0.9288764344809181, "grad_norm": 0.9671888751234345, "learning_rate": 2.64151759897443e-07, "loss": 1.5542, "step": 6961 }, { "epoch": 0.9290098745663197, "grad_norm": 0.9178373406964115, "learning_rate": 2.6316586757464513e-07, "loss": 1.5549, "step": 6962 }, { "epoch": 0.9291433146517214, "grad_norm": 0.9799980886987, "learning_rate": 2.621817939816051e-07, "loss": 1.5763, "step": 6963 }, { "epoch": 0.929276754737123, "grad_norm": 0.9239659156537983, "learning_rate": 2.6119953930213713e-07, "loss": 1.5654, "step": 6964 }, { "epoch": 0.9294101948225247, "grad_norm": 1.0117689513131394, "learning_rate": 2.602191037197155e-07, "loss": 1.6097, "step": 6965 }, { "epoch": 0.9295436349079264, "grad_norm": 0.9259063770945314, "learning_rate": 2.592404874174714e-07, "loss": 1.5442, "step": 6966 }, { "epoch": 0.929677074993328, "grad_norm": 0.94289731705226, "learning_rate": 2.582636905782032e-07, "loss": 1.5511, "step": 6967 }, { "epoch": 0.9298105150787297, "grad_norm": 1.2363951568921343, "learning_rate": 2.572887133843638e-07, "loss": 1.5284, "step": 6968 }, { "epoch": 0.9299439551641313, "grad_norm": 0.9237895244839454, "learning_rate": 2.5631555601806746e-07, "loss": 1.5666, "step": 6969 }, { "epoch": 0.9300773952495329, "grad_norm": 1.2534199107639514, "learning_rate": 2.5534421866108884e-07, "loss": 1.5386, "step": 6970 }, { "epoch": 0.9302108353349346, "grad_norm": 0.9440071222676694, "learning_rate": 2.543747014948639e-07, "loss": 1.5252, "step": 6971 }, { "epoch": 0.9303442754203363, "grad_norm": 0.9410423473034675, "learning_rate": 2.5340700470048664e-07, "loss": 1.5539, "step": 6972 }, { "epoch": 0.9304777155057379, "grad_norm": 0.9370283821223521, "learning_rate": 2.524411284587114e-07, "loss": 1.5497, "step": 6973 }, { "epoch": 0.9306111555911396, "grad_norm": 0.9353742776563619, "learning_rate": 2.5147707294995274e-07, "loss": 1.4892, "step": 6974 }, { "epoch": 0.9307445956765412, "grad_norm": 0.9297881657463101, "learning_rate": 2.505148383542866e-07, "loss": 1.5467, "step": 6975 }, { "epoch": 0.9308780357619428, "grad_norm": 0.952275461349787, "learning_rate": 2.495544248514459e-07, "loss": 1.4877, "step": 6976 }, { "epoch": 0.9310114758473446, "grad_norm": 0.9152447233126888, "learning_rate": 2.48595832620826e-07, "loss": 1.5295, "step": 6977 }, { "epoch": 0.9311449159327462, "grad_norm": 0.91217801833338, "learning_rate": 2.476390618414803e-07, "loss": 1.5292, "step": 6978 }, { "epoch": 0.9312783560181479, "grad_norm": 0.927701396795192, "learning_rate": 2.4668411269212377e-07, "loss": 1.5662, "step": 6979 }, { "epoch": 0.9314117961035495, "grad_norm": 0.9404375961303916, "learning_rate": 2.4573098535112913e-07, "loss": 1.5882, "step": 6980 }, { "epoch": 0.9315452361889511, "grad_norm": 0.9405292351716275, "learning_rate": 2.4477967999652854e-07, "loss": 1.512, "step": 6981 }, { "epoch": 0.9316786762743529, "grad_norm": 0.9390791133758444, "learning_rate": 2.438301968060186e-07, "loss": 1.5456, "step": 6982 }, { "epoch": 0.9318121163597545, "grad_norm": 0.9294723456622169, "learning_rate": 2.4288253595694865e-07, "loss": 1.5763, "step": 6983 }, { "epoch": 0.9319455564451561, "grad_norm": 0.9086002326565813, "learning_rate": 2.419366976263315e-07, "loss": 1.5412, "step": 6984 }, { "epoch": 0.9320789965305578, "grad_norm": 1.0686971025848702, "learning_rate": 2.409926819908404e-07, "loss": 1.5746, "step": 6985 }, { "epoch": 0.9322124366159594, "grad_norm": 1.0891885771737662, "learning_rate": 2.400504892268052e-07, "loss": 1.5662, "step": 6986 }, { "epoch": 0.932345876701361, "grad_norm": 0.9460922151250107, "learning_rate": 2.391101195102175e-07, "loss": 1.557, "step": 6987 }, { "epoch": 0.9324793167867628, "grad_norm": 0.985651528432505, "learning_rate": 2.3817157301672777e-07, "loss": 1.578, "step": 6988 }, { "epoch": 0.9326127568721644, "grad_norm": 0.9664112206315217, "learning_rate": 2.372348499216459e-07, "loss": 1.539, "step": 6989 }, { "epoch": 0.932746196957566, "grad_norm": 0.9490309404868328, "learning_rate": 2.3629995039994082e-07, "loss": 1.5389, "step": 6990 }, { "epoch": 0.9328796370429677, "grad_norm": 0.9577672566554155, "learning_rate": 2.3536687462624053e-07, "loss": 1.5601, "step": 6991 }, { "epoch": 0.9330130771283693, "grad_norm": 0.9223528519716557, "learning_rate": 2.3443562277483345e-07, "loss": 1.5533, "step": 6992 }, { "epoch": 0.9331465172137711, "grad_norm": 0.9427663030847381, "learning_rate": 2.335061950196671e-07, "loss": 1.5562, "step": 6993 }, { "epoch": 0.9332799572991727, "grad_norm": 0.9523796363575632, "learning_rate": 2.3257859153434815e-07, "loss": 1.556, "step": 6994 }, { "epoch": 0.9334133973845743, "grad_norm": 0.9514855637959606, "learning_rate": 2.3165281249214133e-07, "loss": 1.5486, "step": 6995 }, { "epoch": 0.933546837469976, "grad_norm": 0.8960652793709516, "learning_rate": 2.307288580659728e-07, "loss": 1.4996, "step": 6996 }, { "epoch": 0.9336802775553776, "grad_norm": 0.9468839774939591, "learning_rate": 2.2980672842842665e-07, "loss": 1.5557, "step": 6997 }, { "epoch": 0.9338137176407793, "grad_norm": 0.9323423502741678, "learning_rate": 2.288864237517463e-07, "loss": 1.5423, "step": 6998 }, { "epoch": 0.933947157726181, "grad_norm": 0.9477440100001612, "learning_rate": 2.2796794420783198e-07, "loss": 1.5839, "step": 6999 }, { "epoch": 0.9340805978115826, "grad_norm": 0.9719508936768406, "learning_rate": 2.2705128996824755e-07, "loss": 1.5303, "step": 7000 }, { "epoch": 0.9342140378969842, "grad_norm": 0.9511975135556928, "learning_rate": 2.2613646120421383e-07, "loss": 1.5622, "step": 7001 }, { "epoch": 0.9343474779823859, "grad_norm": 0.9164128395943615, "learning_rate": 2.2522345808660861e-07, "loss": 1.5558, "step": 7002 }, { "epoch": 0.9344809180677875, "grad_norm": 0.9532778700819494, "learning_rate": 2.24312280785971e-07, "loss": 1.5872, "step": 7003 }, { "epoch": 0.9346143581531893, "grad_norm": 0.9256358437386732, "learning_rate": 2.2340292947250043e-07, "loss": 1.5318, "step": 7004 }, { "epoch": 0.9347477982385909, "grad_norm": 1.0723225175898141, "learning_rate": 2.2249540431605099e-07, "loss": 1.5304, "step": 7005 }, { "epoch": 0.9348812383239925, "grad_norm": 0.941909432602312, "learning_rate": 2.2158970548613934e-07, "loss": 1.4993, "step": 7006 }, { "epoch": 0.9350146784093942, "grad_norm": 0.9580854716526129, "learning_rate": 2.2068583315193902e-07, "loss": 1.5505, "step": 7007 }, { "epoch": 0.9351481184947958, "grad_norm": 0.9149155992270841, "learning_rate": 2.197837874822839e-07, "loss": 1.5243, "step": 7008 }, { "epoch": 0.9352815585801975, "grad_norm": 0.931850561675305, "learning_rate": 2.1888356864566363e-07, "loss": 1.5763, "step": 7009 }, { "epoch": 0.9354149986655992, "grad_norm": 0.945264466148816, "learning_rate": 2.1798517681023257e-07, "loss": 1.5681, "step": 7010 }, { "epoch": 0.9355484387510008, "grad_norm": 1.1408246656369734, "learning_rate": 2.1708861214379762e-07, "loss": 1.5508, "step": 7011 }, { "epoch": 0.9356818788364024, "grad_norm": 0.9305152136394855, "learning_rate": 2.1619387481382704e-07, "loss": 1.4831, "step": 7012 }, { "epoch": 0.9358153189218041, "grad_norm": 0.9374514423774111, "learning_rate": 2.153009649874471e-07, "loss": 1.534, "step": 7013 }, { "epoch": 0.9359487590072058, "grad_norm": 0.9373740971918221, "learning_rate": 2.144098828314445e-07, "loss": 1.4639, "step": 7014 }, { "epoch": 0.9360821990926074, "grad_norm": 0.9202329086139532, "learning_rate": 2.1352062851226263e-07, "loss": 1.5189, "step": 7015 }, { "epoch": 0.9362156391780091, "grad_norm": 0.9251964887586024, "learning_rate": 2.1263320219600426e-07, "loss": 1.5786, "step": 7016 }, { "epoch": 0.9363490792634107, "grad_norm": 0.9284256574113852, "learning_rate": 2.1174760404843008e-07, "loss": 1.526, "step": 7017 }, { "epoch": 0.9364825193488124, "grad_norm": 1.882242068390261, "learning_rate": 2.1086383423496004e-07, "loss": 1.5768, "step": 7018 }, { "epoch": 0.936615959434214, "grad_norm": 0.9901007118211904, "learning_rate": 2.0998189292067316e-07, "loss": 1.5077, "step": 7019 }, { "epoch": 0.9367493995196157, "grad_norm": 1.0159260253271438, "learning_rate": 2.0910178027030326e-07, "loss": 1.5657, "step": 7020 }, { "epoch": 0.9368828396050174, "grad_norm": 0.9401229086492431, "learning_rate": 2.082234964482488e-07, "loss": 1.5582, "step": 7021 }, { "epoch": 0.937016279690419, "grad_norm": 1.0751751887062786, "learning_rate": 2.073470416185619e-07, "loss": 1.5412, "step": 7022 }, { "epoch": 0.9371497197758206, "grad_norm": 0.9506968457876054, "learning_rate": 2.0647241594495381e-07, "loss": 1.525, "step": 7023 }, { "epoch": 0.9372831598612223, "grad_norm": 0.9301700959194582, "learning_rate": 2.0559961959079278e-07, "loss": 1.531, "step": 7024 }, { "epoch": 0.937416599946624, "grad_norm": 0.9323275478960292, "learning_rate": 2.047286527191117e-07, "loss": 1.5362, "step": 7025 }, { "epoch": 0.9375500400320256, "grad_norm": 0.9191649723560835, "learning_rate": 2.0385951549259486e-07, "loss": 1.5815, "step": 7026 }, { "epoch": 0.9376834801174273, "grad_norm": 1.032563096761044, "learning_rate": 2.0299220807358578e-07, "loss": 1.5741, "step": 7027 }, { "epoch": 0.9378169202028289, "grad_norm": 0.987366129369346, "learning_rate": 2.0212673062409038e-07, "loss": 1.5807, "step": 7028 }, { "epoch": 0.9379503602882305, "grad_norm": 0.9364694767430964, "learning_rate": 2.0126308330576937e-07, "loss": 1.5519, "step": 7029 }, { "epoch": 0.9380838003736323, "grad_norm": 1.4286340232745998, "learning_rate": 2.004012662799404e-07, "loss": 1.5545, "step": 7030 }, { "epoch": 0.9382172404590339, "grad_norm": 0.935633863001671, "learning_rate": 1.9954127970758131e-07, "loss": 1.5272, "step": 7031 }, { "epoch": 0.9383506805444356, "grad_norm": 0.9511612705616507, "learning_rate": 1.986831237493303e-07, "loss": 1.5378, "step": 7032 }, { "epoch": 0.9384841206298372, "grad_norm": 0.9426846301005706, "learning_rate": 1.9782679856548025e-07, "loss": 1.5306, "step": 7033 }, { "epoch": 0.9386175607152388, "grad_norm": 1.055465469484963, "learning_rate": 1.969723043159799e-07, "loss": 1.5642, "step": 7034 }, { "epoch": 0.9387510008006406, "grad_norm": 0.9183296763521461, "learning_rate": 1.961196411604438e-07, "loss": 1.5428, "step": 7035 }, { "epoch": 0.9388844408860422, "grad_norm": 0.9134482621394847, "learning_rate": 1.9526880925813673e-07, "loss": 1.501, "step": 7036 }, { "epoch": 0.9390178809714438, "grad_norm": 0.9419067060611698, "learning_rate": 1.9441980876798493e-07, "loss": 1.5601, "step": 7037 }, { "epoch": 0.9391513210568455, "grad_norm": 0.9369479568180347, "learning_rate": 1.9357263984857044e-07, "loss": 1.5471, "step": 7038 }, { "epoch": 0.9392847611422471, "grad_norm": 0.9557500312485121, "learning_rate": 1.9272730265813887e-07, "loss": 1.6029, "step": 7039 }, { "epoch": 0.9394182012276487, "grad_norm": 1.0554599506743223, "learning_rate": 1.9188379735458618e-07, "loss": 1.5511, "step": 7040 }, { "epoch": 0.9395516413130505, "grad_norm": 1.0458439629457223, "learning_rate": 1.9104212409546964e-07, "loss": 1.5568, "step": 7041 }, { "epoch": 0.9396850813984521, "grad_norm": 1.0948545675024355, "learning_rate": 1.902022830380068e-07, "loss": 1.537, "step": 7042 }, { "epoch": 0.9398185214838537, "grad_norm": 1.004814199766514, "learning_rate": 1.8936427433906778e-07, "loss": 1.5177, "step": 7043 }, { "epoch": 0.9399519615692554, "grad_norm": 0.9607332814184879, "learning_rate": 1.8852809815518514e-07, "loss": 1.5538, "step": 7044 }, { "epoch": 0.940085401654657, "grad_norm": 0.9487552535897916, "learning_rate": 1.8769375464254503e-07, "loss": 1.5562, "step": 7045 }, { "epoch": 0.9402188417400588, "grad_norm": 0.9083170430971447, "learning_rate": 1.868612439569939e-07, "loss": 1.5094, "step": 7046 }, { "epoch": 0.9403522818254604, "grad_norm": 0.9634251033944141, "learning_rate": 1.8603056625403627e-07, "loss": 1.5631, "step": 7047 }, { "epoch": 0.940485721910862, "grad_norm": 0.9656510203512615, "learning_rate": 1.8520172168883243e-07, "loss": 1.5834, "step": 7048 }, { "epoch": 0.9406191619962637, "grad_norm": 1.0000660756193365, "learning_rate": 1.843747104162008e-07, "loss": 1.582, "step": 7049 }, { "epoch": 0.9407526020816653, "grad_norm": 1.1327819091986768, "learning_rate": 1.8354953259061892e-07, "loss": 1.5539, "step": 7050 }, { "epoch": 0.940886042167067, "grad_norm": 0.9810323631990697, "learning_rate": 1.8272618836621902e-07, "loss": 1.5371, "step": 7051 }, { "epoch": 0.9410194822524687, "grad_norm": 0.9130660342403393, "learning_rate": 1.819046778967948e-07, "loss": 1.522, "step": 7052 }, { "epoch": 0.9411529223378703, "grad_norm": 0.9194929376758173, "learning_rate": 1.8108500133579233e-07, "loss": 1.4903, "step": 7053 }, { "epoch": 0.9412863624232719, "grad_norm": 1.2769265280912176, "learning_rate": 1.8026715883631917e-07, "loss": 1.5293, "step": 7054 }, { "epoch": 0.9414198025086736, "grad_norm": 0.983577585375118, "learning_rate": 1.7945115055113983e-07, "loss": 1.5471, "step": 7055 }, { "epoch": 0.9415532425940752, "grad_norm": 1.2831644360579264, "learning_rate": 1.7863697663267231e-07, "loss": 1.5696, "step": 7056 }, { "epoch": 0.9416866826794769, "grad_norm": 0.9321407428486242, "learning_rate": 1.7782463723299947e-07, "loss": 1.5499, "step": 7057 }, { "epoch": 0.9418201227648786, "grad_norm": 0.9239769506776327, "learning_rate": 1.7701413250385324e-07, "loss": 1.552, "step": 7058 }, { "epoch": 0.9419535628502802, "grad_norm": 0.9822924040248578, "learning_rate": 1.7620546259662808e-07, "loss": 1.5701, "step": 7059 }, { "epoch": 0.9420870029356819, "grad_norm": 0.9264197624873486, "learning_rate": 1.7539862766237536e-07, "loss": 1.5301, "step": 7060 }, { "epoch": 0.9422204430210835, "grad_norm": 0.9098879290150069, "learning_rate": 1.7459362785180122e-07, "loss": 1.497, "step": 7061 }, { "epoch": 0.9423538831064852, "grad_norm": 0.9543713844981121, "learning_rate": 1.7379046331527094e-07, "loss": 1.4844, "step": 7062 }, { "epoch": 0.9424873231918869, "grad_norm": 1.091370085062858, "learning_rate": 1.729891342028067e-07, "loss": 1.5259, "step": 7063 }, { "epoch": 0.9426207632772885, "grad_norm": 1.0782943171835628, "learning_rate": 1.7218964066408773e-07, "loss": 1.5141, "step": 7064 }, { "epoch": 0.9427542033626901, "grad_norm": 0.9167701552489729, "learning_rate": 1.7139198284845005e-07, "loss": 1.4914, "step": 7065 }, { "epoch": 0.9428876434480918, "grad_norm": 0.9315361672522141, "learning_rate": 1.7059616090488561e-07, "loss": 1.553, "step": 7066 }, { "epoch": 0.9430210835334935, "grad_norm": 0.941731632670938, "learning_rate": 1.6980217498204777e-07, "loss": 1.5184, "step": 7067 }, { "epoch": 0.9431545236188951, "grad_norm": 1.0951605019526698, "learning_rate": 1.6901002522824228e-07, "loss": 1.518, "step": 7068 }, { "epoch": 0.9432879637042968, "grad_norm": 0.9413032824460642, "learning_rate": 1.6821971179143415e-07, "loss": 1.4946, "step": 7069 }, { "epoch": 0.9434214037896984, "grad_norm": 0.9158286385674823, "learning_rate": 1.6743123481924417e-07, "loss": 1.5623, "step": 7070 }, { "epoch": 0.9435548438751001, "grad_norm": 0.9396586297163264, "learning_rate": 1.666445944589523e-07, "loss": 1.5608, "step": 7071 }, { "epoch": 0.9436882839605018, "grad_norm": 0.9392711665239271, "learning_rate": 1.6585979085749326e-07, "loss": 1.5315, "step": 7072 }, { "epoch": 0.9438217240459034, "grad_norm": 0.9382386473834549, "learning_rate": 1.6507682416145865e-07, "loss": 1.578, "step": 7073 }, { "epoch": 0.9439551641313051, "grad_norm": 0.9007505568219435, "learning_rate": 1.6429569451709925e-07, "loss": 1.5409, "step": 7074 }, { "epoch": 0.9440886042167067, "grad_norm": 0.9402668564767092, "learning_rate": 1.635164020703206e-07, "loss": 1.5789, "step": 7075 }, { "epoch": 0.9442220443021083, "grad_norm": 0.9642072770932624, "learning_rate": 1.6273894696668514e-07, "loss": 1.5041, "step": 7076 }, { "epoch": 0.94435548438751, "grad_norm": 0.9169461721627207, "learning_rate": 1.6196332935141225e-07, "loss": 1.5472, "step": 7077 }, { "epoch": 0.9444889244729117, "grad_norm": 1.3273948346582414, "learning_rate": 1.6118954936938046e-07, "loss": 1.5359, "step": 7078 }, { "epoch": 0.9446223645583133, "grad_norm": 1.129474162644648, "learning_rate": 1.604176071651209e-07, "loss": 1.5153, "step": 7079 }, { "epoch": 0.944755804643715, "grad_norm": 0.9293810385080555, "learning_rate": 1.596475028828237e-07, "loss": 1.5414, "step": 7080 }, { "epoch": 0.9448892447291166, "grad_norm": 0.9382288057598555, "learning_rate": 1.588792366663383e-07, "loss": 1.5622, "step": 7081 }, { "epoch": 0.9450226848145182, "grad_norm": 0.989321226514514, "learning_rate": 1.5811280865916435e-07, "loss": 1.5804, "step": 7082 }, { "epoch": 0.94515612489992, "grad_norm": 0.9390344091231897, "learning_rate": 1.5734821900446507e-07, "loss": 1.5219, "step": 7083 }, { "epoch": 0.9452895649853216, "grad_norm": 1.0089198056152286, "learning_rate": 1.56585467845054e-07, "loss": 1.5103, "step": 7084 }, { "epoch": 0.9454230050707233, "grad_norm": 0.9927138491828363, "learning_rate": 1.5582455532340836e-07, "loss": 1.5626, "step": 7085 }, { "epoch": 0.9455564451561249, "grad_norm": 6.151724858717579, "learning_rate": 1.5506548158165437e-07, "loss": 1.5719, "step": 7086 }, { "epoch": 0.9456898852415265, "grad_norm": 0.9752024728609847, "learning_rate": 1.5430824676157974e-07, "loss": 1.5091, "step": 7087 }, { "epoch": 0.9458233253269283, "grad_norm": 0.9228271908165766, "learning_rate": 1.535528510046258e-07, "loss": 1.5471, "step": 7088 }, { "epoch": 0.9459567654123299, "grad_norm": 0.9142093131081027, "learning_rate": 1.5279929445189523e-07, "loss": 1.56, "step": 7089 }, { "epoch": 0.9460902054977315, "grad_norm": 1.040271354196889, "learning_rate": 1.5204757724414098e-07, "loss": 1.5649, "step": 7090 }, { "epoch": 0.9462236455831332, "grad_norm": 0.944030106842017, "learning_rate": 1.512976995217774e-07, "loss": 1.5361, "step": 7091 }, { "epoch": 0.9463570856685348, "grad_norm": 0.9345127025549201, "learning_rate": 1.5054966142487027e-07, "loss": 1.5856, "step": 7092 }, { "epoch": 0.9464905257539364, "grad_norm": 0.9282759654458305, "learning_rate": 1.4980346309314776e-07, "loss": 1.5434, "step": 7093 }, { "epoch": 0.9466239658393382, "grad_norm": 0.9226489628851323, "learning_rate": 1.4905910466598727e-07, "loss": 1.569, "step": 7094 }, { "epoch": 0.9467574059247398, "grad_norm": 0.9279025309090545, "learning_rate": 1.4831658628243096e-07, "loss": 1.5276, "step": 7095 }, { "epoch": 0.9468908460101414, "grad_norm": 0.9423761860152485, "learning_rate": 1.4757590808117006e-07, "loss": 1.5249, "step": 7096 }, { "epoch": 0.9470242860955431, "grad_norm": 1.004907421039197, "learning_rate": 1.4683707020055614e-07, "loss": 1.5448, "step": 7097 }, { "epoch": 0.9471577261809447, "grad_norm": 0.9258429181621549, "learning_rate": 1.4610007277859328e-07, "loss": 1.5371, "step": 7098 }, { "epoch": 0.9472911662663465, "grad_norm": 0.9395593033423324, "learning_rate": 1.453649159529469e-07, "loss": 1.5647, "step": 7099 }, { "epoch": 0.9474246063517481, "grad_norm": 0.9180402841843257, "learning_rate": 1.446315998609349e-07, "loss": 1.5026, "step": 7100 }, { "epoch": 0.9475580464371497, "grad_norm": 0.9387391943964363, "learning_rate": 1.4390012463953329e-07, "loss": 1.4874, "step": 7101 }, { "epoch": 0.9476914865225514, "grad_norm": 0.9403053185795006, "learning_rate": 1.4317049042537167e-07, "loss": 1.557, "step": 7102 }, { "epoch": 0.947824926607953, "grad_norm": 0.9716572056708245, "learning_rate": 1.424426973547377e-07, "loss": 1.5695, "step": 7103 }, { "epoch": 0.9479583666933546, "grad_norm": 0.9638053337694246, "learning_rate": 1.4171674556357705e-07, "loss": 1.528, "step": 7104 }, { "epoch": 0.9480918067787564, "grad_norm": 0.9326742291482022, "learning_rate": 1.4099263518748574e-07, "loss": 1.5284, "step": 7105 }, { "epoch": 0.948225246864158, "grad_norm": 0.9202879189907175, "learning_rate": 1.402703663617222e-07, "loss": 1.5171, "step": 7106 }, { "epoch": 0.9483586869495596, "grad_norm": 0.9499293926304142, "learning_rate": 1.395499392211963e-07, "loss": 1.5009, "step": 7107 }, { "epoch": 0.9484921270349613, "grad_norm": 0.9191237857115953, "learning_rate": 1.388313539004771e-07, "loss": 1.5325, "step": 7108 }, { "epoch": 0.948625567120363, "grad_norm": 0.9281253464726291, "learning_rate": 1.3811461053378606e-07, "loss": 1.5305, "step": 7109 }, { "epoch": 0.9487590072057646, "grad_norm": 0.9391089740766908, "learning_rate": 1.3739970925500613e-07, "loss": 1.5645, "step": 7110 }, { "epoch": 0.9488924472911663, "grad_norm": 0.9906753900198737, "learning_rate": 1.3668665019766937e-07, "loss": 1.5169, "step": 7111 }, { "epoch": 0.9490258873765679, "grad_norm": 0.9307202205615231, "learning_rate": 1.35975433494967e-07, "loss": 1.5244, "step": 7112 }, { "epoch": 0.9491593274619696, "grad_norm": 0.9307053074686159, "learning_rate": 1.3526605927974946e-07, "loss": 1.5106, "step": 7113 }, { "epoch": 0.9492927675473712, "grad_norm": 0.9528924062350823, "learning_rate": 1.3455852768451738e-07, "loss": 1.4496, "step": 7114 }, { "epoch": 0.9494262076327729, "grad_norm": 1.4092694693096828, "learning_rate": 1.3385283884143064e-07, "loss": 1.5641, "step": 7115 }, { "epoch": 0.9495596477181746, "grad_norm": 0.9508352494225291, "learning_rate": 1.3314899288230154e-07, "loss": 1.5428, "step": 7116 }, { "epoch": 0.9496930878035762, "grad_norm": 1.0036182199394033, "learning_rate": 1.3244698993860383e-07, "loss": 1.5242, "step": 7117 }, { "epoch": 0.9498265278889778, "grad_norm": 0.9239362307261929, "learning_rate": 1.317468301414615e-07, "loss": 1.5461, "step": 7118 }, { "epoch": 0.9499599679743795, "grad_norm": 0.9265412901982545, "learning_rate": 1.310485136216566e-07, "loss": 1.5534, "step": 7119 }, { "epoch": 0.9500934080597812, "grad_norm": 0.9381184097035782, "learning_rate": 1.3035204050962702e-07, "loss": 1.5232, "step": 7120 }, { "epoch": 0.9502268481451828, "grad_norm": 1.0080877900305665, "learning_rate": 1.2965741093546757e-07, "loss": 1.5221, "step": 7121 }, { "epoch": 0.9503602882305845, "grad_norm": 0.9245848574437733, "learning_rate": 1.289646250289245e-07, "loss": 1.5618, "step": 7122 }, { "epoch": 0.9504937283159861, "grad_norm": 0.9340954109480951, "learning_rate": 1.282736829194031e-07, "loss": 1.539, "step": 7123 }, { "epoch": 0.9506271684013877, "grad_norm": 0.9413706371278181, "learning_rate": 1.2758458473596468e-07, "loss": 1.5756, "step": 7124 }, { "epoch": 0.9507606084867894, "grad_norm": 0.9435011931603419, "learning_rate": 1.2689733060732512e-07, "loss": 1.5168, "step": 7125 }, { "epoch": 0.9508940485721911, "grad_norm": 0.9640763949138873, "learning_rate": 1.2621192066185394e-07, "loss": 1.5767, "step": 7126 }, { "epoch": 0.9510274886575928, "grad_norm": 1.0666760841057328, "learning_rate": 1.2552835502757765e-07, "loss": 1.5459, "step": 7127 }, { "epoch": 0.9511609287429944, "grad_norm": 0.9177228398177878, "learning_rate": 1.2484663383217964e-07, "loss": 1.483, "step": 7128 }, { "epoch": 0.951294368828396, "grad_norm": 0.9400026184127935, "learning_rate": 1.241667572029992e-07, "loss": 1.5753, "step": 7129 }, { "epoch": 0.9514278089137977, "grad_norm": 1.0492605217878297, "learning_rate": 1.2348872526702693e-07, "loss": 1.5503, "step": 7130 }, { "epoch": 0.9515612489991994, "grad_norm": 0.9251817708144794, "learning_rate": 1.2281253815091154e-07, "loss": 1.5143, "step": 7131 }, { "epoch": 0.951694689084601, "grad_norm": 0.940577041105443, "learning_rate": 1.2213819598095979e-07, "loss": 1.5524, "step": 7132 }, { "epoch": 0.9518281291700027, "grad_norm": 0.9912924069921497, "learning_rate": 1.2146569888312865e-07, "loss": 1.505, "step": 7133 }, { "epoch": 0.9519615692554043, "grad_norm": 1.0163650859754099, "learning_rate": 1.2079504698303324e-07, "loss": 1.5415, "step": 7134 }, { "epoch": 0.9520950093408059, "grad_norm": 0.91474346735063, "learning_rate": 1.2012624040594445e-07, "loss": 1.5454, "step": 7135 }, { "epoch": 0.9522284494262077, "grad_norm": 0.9327035920232858, "learning_rate": 1.194592792767879e-07, "loss": 1.5117, "step": 7136 }, { "epoch": 0.9523618895116093, "grad_norm": 0.9217306273503845, "learning_rate": 1.1879416372014285e-07, "loss": 1.529, "step": 7137 }, { "epoch": 0.9524953295970109, "grad_norm": 0.9609918293396442, "learning_rate": 1.1813089386024657e-07, "loss": 1.5807, "step": 7138 }, { "epoch": 0.9526287696824126, "grad_norm": 1.112063642124847, "learning_rate": 1.1746946982098995e-07, "loss": 1.5532, "step": 7139 }, { "epoch": 0.9527622097678142, "grad_norm": 0.9466676036098123, "learning_rate": 1.1680989172591972e-07, "loss": 1.52, "step": 7140 }, { "epoch": 0.952895649853216, "grad_norm": 0.9269239682104546, "learning_rate": 1.1615215969823734e-07, "loss": 1.5417, "step": 7141 }, { "epoch": 0.9530290899386176, "grad_norm": 1.38625103234689, "learning_rate": 1.1549627386080009e-07, "loss": 1.5533, "step": 7142 }, { "epoch": 0.9531625300240192, "grad_norm": 0.9665414589291429, "learning_rate": 1.1484223433611885e-07, "loss": 1.5013, "step": 7143 }, { "epoch": 0.9532959701094209, "grad_norm": 1.1016470069768787, "learning_rate": 1.1419004124636146e-07, "loss": 1.5066, "step": 7144 }, { "epoch": 0.9534294101948225, "grad_norm": 1.1136680127273073, "learning_rate": 1.1353969471335047e-07, "loss": 1.5206, "step": 7145 }, { "epoch": 0.9535628502802241, "grad_norm": 0.9664150023916923, "learning_rate": 1.1289119485856315e-07, "loss": 1.5303, "step": 7146 }, { "epoch": 0.9536962903656259, "grad_norm": 0.9298992429566982, "learning_rate": 1.122445418031315e-07, "loss": 1.5278, "step": 7147 }, { "epoch": 0.9538297304510275, "grad_norm": 1.1638106219236741, "learning_rate": 1.1159973566784221e-07, "loss": 1.5079, "step": 7148 }, { "epoch": 0.9539631705364291, "grad_norm": 0.9586600372591042, "learning_rate": 1.1095677657314008e-07, "loss": 1.5023, "step": 7149 }, { "epoch": 0.9540966106218308, "grad_norm": 0.9231194918030783, "learning_rate": 1.1031566463912014e-07, "loss": 1.5314, "step": 7150 }, { "epoch": 0.9542300507072324, "grad_norm": 0.9169704635554706, "learning_rate": 1.0967639998553659e-07, "loss": 1.5137, "step": 7151 }, { "epoch": 0.9543634907926342, "grad_norm": 0.9623890401190696, "learning_rate": 1.0903898273179503e-07, "loss": 1.54, "step": 7152 }, { "epoch": 0.9544969308780358, "grad_norm": 0.9269400628043825, "learning_rate": 1.0840341299695911e-07, "loss": 1.5609, "step": 7153 }, { "epoch": 0.9546303709634374, "grad_norm": 0.9465474447152321, "learning_rate": 1.0776969089974609e-07, "loss": 1.5059, "step": 7154 }, { "epoch": 0.9547638110488391, "grad_norm": 0.9356339177793419, "learning_rate": 1.0713781655852684e-07, "loss": 1.545, "step": 7155 }, { "epoch": 0.9548972511342407, "grad_norm": 0.927542167441195, "learning_rate": 1.0650779009132917e-07, "loss": 1.5608, "step": 7156 }, { "epoch": 0.9550306912196423, "grad_norm": 0.9369805873387638, "learning_rate": 1.0587961161583448e-07, "loss": 1.5508, "step": 7157 }, { "epoch": 0.9551641313050441, "grad_norm": 0.9523970569734105, "learning_rate": 1.0525328124938006e-07, "loss": 1.5117, "step": 7158 }, { "epoch": 0.9552975713904457, "grad_norm": 0.9097601640170604, "learning_rate": 1.0462879910895674e-07, "loss": 1.5232, "step": 7159 }, { "epoch": 0.9554310114758473, "grad_norm": 0.9589791998825555, "learning_rate": 1.0400616531121011e-07, "loss": 1.56, "step": 7160 }, { "epoch": 0.955564451561249, "grad_norm": 0.9522319609125601, "learning_rate": 1.033853799724427e-07, "loss": 1.543, "step": 7161 }, { "epoch": 0.9556978916466506, "grad_norm": 0.9285567819238145, "learning_rate": 1.0276644320860729e-07, "loss": 1.5558, "step": 7162 }, { "epoch": 0.9558313317320523, "grad_norm": 0.9595527770779047, "learning_rate": 1.021493551353181e-07, "loss": 1.5226, "step": 7163 }, { "epoch": 0.955964771817454, "grad_norm": 0.9018522623887555, "learning_rate": 1.0153411586783734e-07, "loss": 1.5229, "step": 7164 }, { "epoch": 0.9560982119028556, "grad_norm": 1.0712384937201727, "learning_rate": 1.0092072552108534e-07, "loss": 1.5517, "step": 7165 }, { "epoch": 0.9562316519882573, "grad_norm": 0.9341382301028408, "learning_rate": 1.0030918420963598e-07, "loss": 1.5233, "step": 7166 }, { "epoch": 0.9563650920736589, "grad_norm": 0.9928816977811972, "learning_rate": 9.969949204772011e-08, "loss": 1.5388, "step": 7167 }, { "epoch": 0.9564985321590606, "grad_norm": 0.9410197074503386, "learning_rate": 9.909164914921886e-08, "loss": 1.6122, "step": 7168 }, { "epoch": 0.9566319722444623, "grad_norm": 0.9028807413403246, "learning_rate": 9.848565562767143e-08, "loss": 1.4786, "step": 7169 }, { "epoch": 0.9567654123298639, "grad_norm": 0.9567117405537122, "learning_rate": 9.788151159627168e-08, "loss": 1.5112, "step": 7170 }, { "epoch": 0.9568988524152655, "grad_norm": 0.9811327272197673, "learning_rate": 9.727921716786492e-08, "loss": 1.5945, "step": 7171 }, { "epoch": 0.9570322925006672, "grad_norm": 1.0524690809980066, "learning_rate": 9.667877245495338e-08, "loss": 1.5584, "step": 7172 }, { "epoch": 0.9571657325860689, "grad_norm": 0.9613936267762925, "learning_rate": 9.608017756969512e-08, "loss": 1.5532, "step": 7173 }, { "epoch": 0.9572991726714705, "grad_norm": 1.2156726555516593, "learning_rate": 9.548343262389736e-08, "loss": 1.6236, "step": 7174 }, { "epoch": 0.9574326127568722, "grad_norm": 0.9579182785549096, "learning_rate": 9.488853772902762e-08, "loss": 1.5155, "step": 7175 }, { "epoch": 0.9575660528422738, "grad_norm": 0.9311509207700649, "learning_rate": 9.429549299620589e-08, "loss": 1.5794, "step": 7176 }, { "epoch": 0.9576994929276754, "grad_norm": 0.9409890634401489, "learning_rate": 9.370429853620578e-08, "loss": 1.5597, "step": 7177 }, { "epoch": 0.9578329330130771, "grad_norm": 0.9199706623701136, "learning_rate": 9.311495445945451e-08, "loss": 1.5511, "step": 7178 }, { "epoch": 0.9579663730984788, "grad_norm": 0.9160829798550142, "learning_rate": 9.252746087603626e-08, "loss": 1.5512, "step": 7179 }, { "epoch": 0.9580998131838805, "grad_norm": 0.9742308156882673, "learning_rate": 9.194181789568657e-08, "loss": 1.5763, "step": 7180 }, { "epoch": 0.9582332532692821, "grad_norm": 1.6586965890912353, "learning_rate": 9.135802562779794e-08, "loss": 1.4843, "step": 7181 }, { "epoch": 0.9583666933546837, "grad_norm": 0.9929783009078508, "learning_rate": 9.077608418141648e-08, "loss": 1.565, "step": 7182 }, { "epoch": 0.9585001334400854, "grad_norm": 0.9342398271698058, "learning_rate": 9.019599366524079e-08, "loss": 1.524, "step": 7183 }, { "epoch": 0.9586335735254871, "grad_norm": 0.9169837288541282, "learning_rate": 8.961775418762752e-08, "loss": 1.495, "step": 7184 }, { "epoch": 0.9587670136108887, "grad_norm": 0.9084724578571916, "learning_rate": 8.904136585658251e-08, "loss": 1.5988, "step": 7185 }, { "epoch": 0.9589004536962904, "grad_norm": 0.9504512059920357, "learning_rate": 8.846682877977075e-08, "loss": 1.5612, "step": 7186 }, { "epoch": 0.959033893781692, "grad_norm": 0.9505995914507539, "learning_rate": 8.78941430645075e-08, "loss": 1.5368, "step": 7187 }, { "epoch": 0.9591673338670936, "grad_norm": 1.1809298932125372, "learning_rate": 8.732330881776608e-08, "loss": 1.5281, "step": 7188 }, { "epoch": 0.9593007739524954, "grad_norm": 0.9314187782139279, "learning_rate": 8.675432614617008e-08, "loss": 1.5489, "step": 7189 }, { "epoch": 0.959434214037897, "grad_norm": 0.9232377132180151, "learning_rate": 8.618719515599894e-08, "loss": 1.5449, "step": 7190 }, { "epoch": 0.9595676541232986, "grad_norm": 1.0270042691413004, "learning_rate": 8.56219159531868e-08, "loss": 1.5986, "step": 7191 }, { "epoch": 0.9597010942087003, "grad_norm": 1.2061551149492193, "learning_rate": 8.505848864332145e-08, "loss": 1.5768, "step": 7192 }, { "epoch": 0.9598345342941019, "grad_norm": 0.9340146167897232, "learning_rate": 8.449691333164423e-08, "loss": 1.5385, "step": 7193 }, { "epoch": 0.9599679743795037, "grad_norm": 0.9296173201446742, "learning_rate": 8.393719012305124e-08, "loss": 1.5808, "step": 7194 }, { "epoch": 0.9601014144649053, "grad_norm": 0.9802388493036821, "learning_rate": 8.337931912209329e-08, "loss": 1.6037, "step": 7195 }, { "epoch": 0.9602348545503069, "grad_norm": 0.9331120869630394, "learning_rate": 8.28233004329737e-08, "loss": 1.5445, "step": 7196 }, { "epoch": 0.9603682946357086, "grad_norm": 1.025665909891443, "learning_rate": 8.22691341595494e-08, "loss": 1.5029, "step": 7197 }, { "epoch": 0.9605017347211102, "grad_norm": 0.9441651844649627, "learning_rate": 8.171682040533314e-08, "loss": 1.5366, "step": 7198 }, { "epoch": 0.9606351748065118, "grad_norm": 0.914204298926644, "learning_rate": 8.116635927349126e-08, "loss": 1.4671, "step": 7199 }, { "epoch": 0.9607686148919136, "grad_norm": 0.9547090251419438, "learning_rate": 8.061775086684376e-08, "loss": 1.5321, "step": 7200 }, { "epoch": 0.9609020549773152, "grad_norm": 0.9231643897440952, "learning_rate": 8.007099528786311e-08, "loss": 1.4749, "step": 7201 }, { "epoch": 0.9610354950627168, "grad_norm": 0.9330739495247855, "learning_rate": 7.952609263867872e-08, "loss": 1.549, "step": 7202 }, { "epoch": 0.9611689351481185, "grad_norm": 0.9436536688017126, "learning_rate": 7.89830430210714e-08, "loss": 1.573, "step": 7203 }, { "epoch": 0.9613023752335201, "grad_norm": 0.9289138338185312, "learning_rate": 7.844184653647669e-08, "loss": 1.4821, "step": 7204 }, { "epoch": 0.9614358153189217, "grad_norm": 1.0587966606284311, "learning_rate": 7.79025032859837e-08, "loss": 1.5745, "step": 7205 }, { "epoch": 0.9615692554043235, "grad_norm": 0.9391981736849577, "learning_rate": 7.736501337033519e-08, "loss": 1.5196, "step": 7206 }, { "epoch": 0.9617026954897251, "grad_norm": 0.921984477255766, "learning_rate": 7.682937688992975e-08, "loss": 1.5816, "step": 7207 }, { "epoch": 0.9618361355751268, "grad_norm": 0.9210876386975504, "learning_rate": 7.629559394481622e-08, "loss": 1.5665, "step": 7208 }, { "epoch": 0.9619695756605284, "grad_norm": 0.9320013031423707, "learning_rate": 7.576366463470042e-08, "loss": 1.4974, "step": 7209 }, { "epoch": 0.96210301574593, "grad_norm": 1.108953281361308, "learning_rate": 7.523358905894063e-08, "loss": 1.548, "step": 7210 }, { "epoch": 0.9622364558313318, "grad_norm": 0.9286055208909001, "learning_rate": 7.470536731654876e-08, "loss": 1.4809, "step": 7211 }, { "epoch": 0.9623698959167334, "grad_norm": 0.9397318541414119, "learning_rate": 7.417899950619035e-08, "loss": 1.5441, "step": 7212 }, { "epoch": 0.962503336002135, "grad_norm": 0.928266538779687, "learning_rate": 7.36544857261845e-08, "loss": 1.5575, "step": 7213 }, { "epoch": 0.9626367760875367, "grad_norm": 1.1602227605302893, "learning_rate": 7.31318260745062e-08, "loss": 1.4993, "step": 7214 }, { "epoch": 0.9627702161729383, "grad_norm": 0.933054423819918, "learning_rate": 7.261102064878067e-08, "loss": 1.5281, "step": 7215 }, { "epoch": 0.96290365625834, "grad_norm": 0.984048837363307, "learning_rate": 7.2092069546289e-08, "loss": 1.5852, "step": 7216 }, { "epoch": 0.9630370963437417, "grad_norm": 0.9736412117865043, "learning_rate": 7.157497286396475e-08, "loss": 1.5516, "step": 7217 }, { "epoch": 0.9631705364291433, "grad_norm": 1.0788935314073798, "learning_rate": 7.105973069839622e-08, "loss": 1.5497, "step": 7218 }, { "epoch": 0.963303976514545, "grad_norm": 1.0829067853857433, "learning_rate": 7.054634314582531e-08, "loss": 1.5787, "step": 7219 }, { "epoch": 0.9634374165999466, "grad_norm": 0.9424299932130654, "learning_rate": 7.003481030214642e-08, "loss": 1.5995, "step": 7220 }, { "epoch": 0.9635708566853483, "grad_norm": 0.9587712887267895, "learning_rate": 6.952513226290758e-08, "loss": 1.5399, "step": 7221 }, { "epoch": 0.96370429677075, "grad_norm": 0.9129861391767959, "learning_rate": 6.90173091233115e-08, "loss": 1.5154, "step": 7222 }, { "epoch": 0.9638377368561516, "grad_norm": 1.0939988766367443, "learning_rate": 6.851134097821344e-08, "loss": 1.5632, "step": 7223 }, { "epoch": 0.9639711769415532, "grad_norm": 0.936664559385459, "learning_rate": 6.800722792212333e-08, "loss": 1.5212, "step": 7224 }, { "epoch": 0.9641046170269549, "grad_norm": 0.93098974114669, "learning_rate": 6.750497004920253e-08, "loss": 1.5135, "step": 7225 }, { "epoch": 0.9642380571123566, "grad_norm": 0.9927276595661103, "learning_rate": 6.700456745326822e-08, "loss": 1.5372, "step": 7226 }, { "epoch": 0.9643714971977582, "grad_norm": 1.0843133033461316, "learning_rate": 6.650602022778785e-08, "loss": 1.5053, "step": 7227 }, { "epoch": 0.9645049372831599, "grad_norm": 1.1442758247625016, "learning_rate": 6.600932846588692e-08, "loss": 1.5681, "step": 7228 }, { "epoch": 0.9646383773685615, "grad_norm": 0.9544653994484005, "learning_rate": 6.551449226034124e-08, "loss": 1.4815, "step": 7229 }, { "epoch": 0.9647718174539631, "grad_norm": 0.9771673417993357, "learning_rate": 6.502151170357906e-08, "loss": 1.5094, "step": 7230 }, { "epoch": 0.9649052575393648, "grad_norm": 0.9952586049134462, "learning_rate": 6.453038688768454e-08, "loss": 1.5057, "step": 7231 }, { "epoch": 0.9650386976247665, "grad_norm": 0.9468439711909263, "learning_rate": 6.404111790439427e-08, "loss": 1.5695, "step": 7232 }, { "epoch": 0.9651721377101682, "grad_norm": 0.9741329945289054, "learning_rate": 6.355370484509848e-08, "loss": 1.5713, "step": 7233 }, { "epoch": 0.9653055777955698, "grad_norm": 1.18170513807492, "learning_rate": 6.306814780083992e-08, "loss": 1.521, "step": 7234 }, { "epoch": 0.9654390178809714, "grad_norm": 0.9705328509871132, "learning_rate": 6.258444686231491e-08, "loss": 1.5803, "step": 7235 }, { "epoch": 0.9655724579663731, "grad_norm": 0.8989950439543595, "learning_rate": 6.21026021198734e-08, "loss": 1.4914, "step": 7236 }, { "epoch": 0.9657058980517748, "grad_norm": 0.9190782535619162, "learning_rate": 6.162261366351785e-08, "loss": 1.5458, "step": 7237 }, { "epoch": 0.9658393381371764, "grad_norm": 0.9325432801015195, "learning_rate": 6.114448158290653e-08, "loss": 1.557, "step": 7238 }, { "epoch": 0.9659727782225781, "grad_norm": 0.9129405790473425, "learning_rate": 6.066820596734801e-08, "loss": 1.5548, "step": 7239 }, { "epoch": 0.9661062183079797, "grad_norm": 0.9077011984641612, "learning_rate": 6.019378690580447e-08, "loss": 1.515, "step": 7240 }, { "epoch": 0.9662396583933813, "grad_norm": 0.9878409436375739, "learning_rate": 5.972122448689278e-08, "loss": 1.5372, "step": 7241 }, { "epoch": 0.966373098478783, "grad_norm": 0.9403367481562539, "learning_rate": 5.925051879888233e-08, "loss": 1.531, "step": 7242 }, { "epoch": 0.9665065385641847, "grad_norm": 0.9133740011830598, "learning_rate": 5.878166992969503e-08, "loss": 1.5515, "step": 7243 }, { "epoch": 0.9666399786495863, "grad_norm": 0.9468373367676625, "learning_rate": 5.8314677966906376e-08, "loss": 1.5626, "step": 7244 }, { "epoch": 0.966773418734988, "grad_norm": 0.9583047387897976, "learning_rate": 5.784954299774548e-08, "loss": 1.5667, "step": 7245 }, { "epoch": 0.9669068588203896, "grad_norm": 1.1076391186400782, "learning_rate": 5.738626510909506e-08, "loss": 1.5306, "step": 7246 }, { "epoch": 0.9670402989057914, "grad_norm": 0.9531885401281327, "learning_rate": 5.692484438748924e-08, "loss": 1.5195, "step": 7247 }, { "epoch": 0.967173738991193, "grad_norm": 0.9286226294276955, "learning_rate": 5.646528091911574e-08, "loss": 1.5468, "step": 7248 }, { "epoch": 0.9673071790765946, "grad_norm": 1.0932763529633334, "learning_rate": 5.6007574789817e-08, "loss": 1.5583, "step": 7249 }, { "epoch": 0.9674406191619963, "grad_norm": 0.9368950168319131, "learning_rate": 5.5551726085086854e-08, "loss": 1.5233, "step": 7250 }, { "epoch": 0.9675740592473979, "grad_norm": 1.0046496115780676, "learning_rate": 5.509773489007164e-08, "loss": 1.5453, "step": 7251 }, { "epoch": 0.9677074993327995, "grad_norm": 0.9110736411180317, "learning_rate": 5.4645601289572414e-08, "loss": 1.5221, "step": 7252 }, { "epoch": 0.9678409394182013, "grad_norm": 0.9490066324920002, "learning_rate": 5.419532536804384e-08, "loss": 1.5655, "step": 7253 }, { "epoch": 0.9679743795036029, "grad_norm": 0.962378240370102, "learning_rate": 5.3746907209590856e-08, "loss": 1.5207, "step": 7254 }, { "epoch": 0.9681078195890045, "grad_norm": 1.1335586421572892, "learning_rate": 5.3300346897973143e-08, "loss": 1.5354, "step": 7255 }, { "epoch": 0.9682412596744062, "grad_norm": 0.9713944583777968, "learning_rate": 5.285564451660285e-08, "loss": 1.5339, "step": 7256 }, { "epoch": 0.9683746997598078, "grad_norm": 0.9407589092657578, "learning_rate": 5.2412800148546884e-08, "loss": 1.5333, "step": 7257 }, { "epoch": 0.9685081398452094, "grad_norm": 0.9498219588447119, "learning_rate": 5.197181387652128e-08, "loss": 1.5341, "step": 7258 }, { "epoch": 0.9686415799306112, "grad_norm": 1.184152865294345, "learning_rate": 5.153268578289794e-08, "loss": 1.5766, "step": 7259 }, { "epoch": 0.9687750200160128, "grad_norm": 0.9171966164362452, "learning_rate": 5.109541594970235e-08, "loss": 1.4931, "step": 7260 }, { "epoch": 0.9689084601014145, "grad_norm": 0.9294958426402554, "learning_rate": 5.06600044586103e-08, "loss": 1.507, "step": 7261 }, { "epoch": 0.9690419001868161, "grad_norm": 1.0547850975106445, "learning_rate": 5.022645139095117e-08, "loss": 1.568, "step": 7262 }, { "epoch": 0.9691753402722177, "grad_norm": 0.9867765801059861, "learning_rate": 4.979475682770907e-08, "loss": 1.516, "step": 7263 }, { "epoch": 0.9693087803576195, "grad_norm": 0.9697836144717429, "learning_rate": 4.936492084951949e-08, "loss": 1.5803, "step": 7264 }, { "epoch": 0.9694422204430211, "grad_norm": 0.9720384959061538, "learning_rate": 4.893694353666934e-08, "loss": 1.5052, "step": 7265 }, { "epoch": 0.9695756605284227, "grad_norm": 0.9188699165623265, "learning_rate": 4.851082496910242e-08, "loss": 1.5465, "step": 7266 }, { "epoch": 0.9697091006138244, "grad_norm": 0.9508384028938736, "learning_rate": 4.808656522641064e-08, "loss": 1.5879, "step": 7267 }, { "epoch": 0.969842540699226, "grad_norm": 1.2180128273543447, "learning_rate": 4.766416438784172e-08, "loss": 1.5716, "step": 7268 }, { "epoch": 0.9699759807846277, "grad_norm": 0.9353158222871983, "learning_rate": 4.7243622532294756e-08, "loss": 1.5542, "step": 7269 }, { "epoch": 0.9701094208700294, "grad_norm": 0.9324250384056547, "learning_rate": 4.682493973832358e-08, "loss": 1.5415, "step": 7270 }, { "epoch": 0.970242860955431, "grad_norm": 0.9495787614825245, "learning_rate": 4.6408116084132313e-08, "loss": 1.5307, "step": 7271 }, { "epoch": 0.9703763010408326, "grad_norm": 0.9474456637216822, "learning_rate": 4.599315164757867e-08, "loss": 1.5638, "step": 7272 }, { "epoch": 0.9705097411262343, "grad_norm": 0.9360048271462087, "learning_rate": 4.558004650617398e-08, "loss": 1.5106, "step": 7273 }, { "epoch": 0.970643181211636, "grad_norm": 1.0394175365220302, "learning_rate": 4.5168800737080965e-08, "loss": 1.5319, "step": 7274 }, { "epoch": 0.9707766212970377, "grad_norm": 0.9512409507257436, "learning_rate": 4.4759414417117064e-08, "loss": 1.542, "step": 7275 }, { "epoch": 0.9709100613824393, "grad_norm": 0.938780333252122, "learning_rate": 4.435188762274778e-08, "loss": 1.5228, "step": 7276 }, { "epoch": 0.9710435014678409, "grad_norm": 0.9585863277465944, "learning_rate": 4.3946220430098885e-08, "loss": 1.5299, "step": 7277 }, { "epoch": 0.9711769415532426, "grad_norm": 1.2292729065904253, "learning_rate": 4.354241291494088e-08, "loss": 1.5671, "step": 7278 }, { "epoch": 0.9713103816386442, "grad_norm": 0.9701786083988774, "learning_rate": 4.314046515270121e-08, "loss": 1.5487, "step": 7279 }, { "epoch": 0.9714438217240459, "grad_norm": 0.9290988392315854, "learning_rate": 4.2740377218459805e-08, "loss": 1.5489, "step": 7280 }, { "epoch": 0.9715772618094476, "grad_norm": 0.9516211408003198, "learning_rate": 4.234214918694912e-08, "loss": 1.5158, "step": 7281 }, { "epoch": 0.9717107018948492, "grad_norm": 0.9216274492882804, "learning_rate": 4.194578113255188e-08, "loss": 1.4825, "step": 7282 }, { "epoch": 0.9718441419802508, "grad_norm": 0.956433000469751, "learning_rate": 4.1551273129306624e-08, "loss": 1.5503, "step": 7283 }, { "epoch": 0.9719775820656525, "grad_norm": 0.959454062575314, "learning_rate": 4.11586252509022e-08, "loss": 1.5901, "step": 7284 }, { "epoch": 0.9721110221510542, "grad_norm": 0.9623469747948837, "learning_rate": 4.076783757068103e-08, "loss": 1.5469, "step": 7285 }, { "epoch": 0.9722444622364559, "grad_norm": 0.941165654507255, "learning_rate": 4.037891016163697e-08, "loss": 1.5266, "step": 7286 }, { "epoch": 0.9723779023218575, "grad_norm": 0.953573546159197, "learning_rate": 3.999184309641857e-08, "loss": 1.5544, "step": 7287 }, { "epoch": 0.9725113424072591, "grad_norm": 1.0783246600687082, "learning_rate": 3.960663644732465e-08, "loss": 1.515, "step": 7288 }, { "epoch": 0.9726447824926608, "grad_norm": 0.9455437522062007, "learning_rate": 3.922329028630767e-08, "loss": 1.62, "step": 7289 }, { "epoch": 0.9727782225780625, "grad_norm": 0.933995861984615, "learning_rate": 3.8841804684972564e-08, "loss": 1.6147, "step": 7290 }, { "epoch": 0.9729116626634641, "grad_norm": 0.9369601765686556, "learning_rate": 3.846217971457677e-08, "loss": 1.4899, "step": 7291 }, { "epoch": 0.9730451027488658, "grad_norm": 0.9638712887226217, "learning_rate": 3.808441544602914e-08, "loss": 1.572, "step": 7292 }, { "epoch": 0.9731785428342674, "grad_norm": 0.9358530817042976, "learning_rate": 3.7708511949891e-08, "loss": 1.5785, "step": 7293 }, { "epoch": 0.973311982919669, "grad_norm": 0.956879412152412, "learning_rate": 3.7334469296378406e-08, "loss": 1.5398, "step": 7294 }, { "epoch": 0.9734454230050708, "grad_norm": 0.9452032315669289, "learning_rate": 3.69622875553588e-08, "loss": 1.563, "step": 7295 }, { "epoch": 0.9735788630904724, "grad_norm": 0.9496476555370976, "learning_rate": 3.659196679634991e-08, "loss": 1.5428, "step": 7296 }, { "epoch": 0.973712303175874, "grad_norm": 0.9327306321435094, "learning_rate": 3.622350708852307e-08, "loss": 1.5386, "step": 7297 }, { "epoch": 0.9738457432612757, "grad_norm": 0.9712502348220395, "learning_rate": 3.5856908500704335e-08, "loss": 1.5161, "step": 7298 }, { "epoch": 0.9739791833466773, "grad_norm": 0.9257919013138982, "learning_rate": 3.5492171101368935e-08, "loss": 1.5558, "step": 7299 }, { "epoch": 0.974112623432079, "grad_norm": 0.9385635102986536, "learning_rate": 3.512929495864459e-08, "loss": 1.5224, "step": 7300 }, { "epoch": 0.9742460635174807, "grad_norm": 0.9404378377963817, "learning_rate": 3.476828014031486e-08, "loss": 1.5744, "step": 7301 }, { "epoch": 0.9743795036028823, "grad_norm": 0.9341637296301126, "learning_rate": 3.440912671381136e-08, "loss": 1.557, "step": 7302 }, { "epoch": 0.974512943688284, "grad_norm": 0.9208183908614412, "learning_rate": 3.4051834746221534e-08, "loss": 1.5211, "step": 7303 }, { "epoch": 0.9746463837736856, "grad_norm": 0.9118064472195799, "learning_rate": 3.369640430428089e-08, "loss": 1.5383, "step": 7304 }, { "epoch": 0.9747798238590872, "grad_norm": 0.9329162779176879, "learning_rate": 3.3342835454382996e-08, "loss": 1.5513, "step": 7305 }, { "epoch": 0.974913263944489, "grad_norm": 0.951014370362824, "learning_rate": 3.299112826256723e-08, "loss": 1.5102, "step": 7306 }, { "epoch": 0.9750467040298906, "grad_norm": 1.047900440973857, "learning_rate": 3.264128279453105e-08, "loss": 1.554, "step": 7307 }, { "epoch": 0.9751801441152922, "grad_norm": 0.9197324429445751, "learning_rate": 3.229329911561996e-08, "loss": 1.5731, "step": 7308 }, { "epoch": 0.9753135842006939, "grad_norm": 0.9333452848398622, "learning_rate": 3.1947177290834184e-08, "loss": 1.588, "step": 7309 }, { "epoch": 0.9754470242860955, "grad_norm": 0.9777880188410225, "learning_rate": 3.160291738482535e-08, "loss": 1.5162, "step": 7310 }, { "epoch": 0.9755804643714971, "grad_norm": 0.9366198386385366, "learning_rate": 3.1260519461896456e-08, "loss": 1.5737, "step": 7311 }, { "epoch": 0.9757139044568989, "grad_norm": 0.9239853504065914, "learning_rate": 3.091998358600523e-08, "loss": 1.554, "step": 7312 }, { "epoch": 0.9758473445423005, "grad_norm": 0.9193926283503289, "learning_rate": 3.0581309820757466e-08, "loss": 1.4998, "step": 7313 }, { "epoch": 0.9759807846277022, "grad_norm": 0.9795340493304713, "learning_rate": 3.0244498229415885e-08, "loss": 1.4955, "step": 7314 }, { "epoch": 0.9761142247131038, "grad_norm": 0.9333034891767017, "learning_rate": 2.990954887489239e-08, "loss": 1.4969, "step": 7315 }, { "epoch": 0.9762476647985054, "grad_norm": 0.9282493967471739, "learning_rate": 2.957646181975027e-08, "loss": 1.5149, "step": 7316 }, { "epoch": 0.9763811048839072, "grad_norm": 0.9838269293925297, "learning_rate": 2.9245237126208638e-08, "loss": 1.5284, "step": 7317 }, { "epoch": 0.9765145449693088, "grad_norm": 0.9463715264539991, "learning_rate": 2.8915874856134672e-08, "loss": 1.5611, "step": 7318 }, { "epoch": 0.9766479850547104, "grad_norm": 0.9355403594679113, "learning_rate": 2.858837507105028e-08, "loss": 1.5054, "step": 7319 }, { "epoch": 0.9767814251401121, "grad_norm": 1.2260606411830046, "learning_rate": 2.826273783212874e-08, "loss": 1.5631, "step": 7320 }, { "epoch": 0.9769148652255137, "grad_norm": 0.9215276937501206, "learning_rate": 2.7938963200195844e-08, "loss": 1.5625, "step": 7321 }, { "epoch": 0.9770483053109154, "grad_norm": 1.0251253524014017, "learning_rate": 2.7617051235727666e-08, "loss": 1.5516, "step": 7322 }, { "epoch": 0.9771817453963171, "grad_norm": 0.9213818791054645, "learning_rate": 2.7297001998854987e-08, "loss": 1.5389, "step": 7323 }, { "epoch": 0.9773151854817187, "grad_norm": 0.941181131336436, "learning_rate": 2.697881554935888e-08, "loss": 1.5423, "step": 7324 }, { "epoch": 0.9774486255671203, "grad_norm": 0.8999309030256885, "learning_rate": 2.666249194667292e-08, "loss": 1.5542, "step": 7325 }, { "epoch": 0.977582065652522, "grad_norm": 0.9331555186069251, "learning_rate": 2.6348031249882057e-08, "loss": 1.5703, "step": 7326 }, { "epoch": 0.9777155057379237, "grad_norm": 1.0508215046605072, "learning_rate": 2.603543351772486e-08, "loss": 1.5462, "step": 7327 }, { "epoch": 0.9778489458233254, "grad_norm": 0.9500724069103249, "learning_rate": 2.5724698808591297e-08, "loss": 1.5018, "step": 7328 }, { "epoch": 0.977982385908727, "grad_norm": 0.950196772138831, "learning_rate": 2.541582718052271e-08, "loss": 1.4982, "step": 7329 }, { "epoch": 0.9781158259941286, "grad_norm": 1.022399182945929, "learning_rate": 2.5108818691212956e-08, "loss": 1.5155, "step": 7330 }, { "epoch": 0.9782492660795303, "grad_norm": 1.092331980943047, "learning_rate": 2.4803673398006157e-08, "loss": 1.5725, "step": 7331 }, { "epoch": 0.978382706164932, "grad_norm": 0.9255819560432912, "learning_rate": 2.4500391357902277e-08, "loss": 1.4885, "step": 7332 }, { "epoch": 0.9785161462503336, "grad_norm": 0.9513513483680666, "learning_rate": 2.4198972627549335e-08, "loss": 1.5104, "step": 7333 }, { "epoch": 0.9786495863357353, "grad_norm": 0.8903283569870852, "learning_rate": 2.389941726325007e-08, "loss": 1.5539, "step": 7334 }, { "epoch": 0.9787830264211369, "grad_norm": 0.9338400501650106, "learning_rate": 2.3601725320957503e-08, "loss": 1.4993, "step": 7335 }, { "epoch": 0.9789164665065385, "grad_norm": 0.971758384623749, "learning_rate": 2.3305896856277154e-08, "loss": 1.5658, "step": 7336 }, { "epoch": 0.9790499065919402, "grad_norm": 0.9672566853309181, "learning_rate": 2.3011931924465936e-08, "loss": 1.5645, "step": 7337 }, { "epoch": 0.9791833466773419, "grad_norm": 0.9218261068570299, "learning_rate": 2.2719830580434366e-08, "loss": 1.5638, "step": 7338 }, { "epoch": 0.9793167867627435, "grad_norm": 0.9367852380054548, "learning_rate": 2.2429592878742134e-08, "loss": 1.547, "step": 7339 }, { "epoch": 0.9794502268481452, "grad_norm": 0.9339712727726341, "learning_rate": 2.2141218873602544e-08, "loss": 1.514, "step": 7340 }, { "epoch": 0.9795836669335468, "grad_norm": 0.9417814446241446, "learning_rate": 2.1854708618882504e-08, "loss": 1.5222, "step": 7341 }, { "epoch": 0.9797171070189485, "grad_norm": 0.9339447722012021, "learning_rate": 2.1570062168095873e-08, "loss": 1.5335, "step": 7342 }, { "epoch": 0.9798505471043502, "grad_norm": 1.0402602333871376, "learning_rate": 2.1287279574414566e-08, "loss": 1.534, "step": 7343 }, { "epoch": 0.9799839871897518, "grad_norm": 0.9326106042217657, "learning_rate": 2.100636089065633e-08, "loss": 1.4977, "step": 7344 }, { "epoch": 0.9801174272751535, "grad_norm": 0.930957256971068, "learning_rate": 2.0727306169294747e-08, "loss": 1.5236, "step": 7345 }, { "epoch": 0.9802508673605551, "grad_norm": 0.9473415741877985, "learning_rate": 2.0450115462454788e-08, "loss": 1.5171, "step": 7346 }, { "epoch": 0.9803843074459567, "grad_norm": 0.9215591618457778, "learning_rate": 2.0174788821911706e-08, "loss": 1.4988, "step": 7347 }, { "epoch": 0.9805177475313585, "grad_norm": 0.9501836671635087, "learning_rate": 1.990132629909325e-08, "loss": 1.5363, "step": 7348 }, { "epoch": 0.9806511876167601, "grad_norm": 1.0095366173547495, "learning_rate": 1.9629727945079668e-08, "loss": 1.5768, "step": 7349 }, { "epoch": 0.9807846277021617, "grad_norm": 0.9307006869728814, "learning_rate": 1.9359993810601495e-08, "loss": 1.5595, "step": 7350 }, { "epoch": 0.9809180677875634, "grad_norm": 0.9742637399272271, "learning_rate": 1.9092123946042873e-08, "loss": 1.5153, "step": 7351 }, { "epoch": 0.981051507872965, "grad_norm": 0.9314930322710189, "learning_rate": 1.882611840143822e-08, "loss": 1.5104, "step": 7352 }, { "epoch": 0.9811849479583667, "grad_norm": 0.9381519532395032, "learning_rate": 1.856197722647557e-08, "loss": 1.5689, "step": 7353 }, { "epoch": 0.9813183880437684, "grad_norm": 0.9039239250179172, "learning_rate": 1.8299700470492122e-08, "loss": 1.5105, "step": 7354 }, { "epoch": 0.98145182812917, "grad_norm": 0.9396159105126344, "learning_rate": 1.8039288182478686e-08, "loss": 1.5139, "step": 7355 }, { "epoch": 0.9815852682145717, "grad_norm": 0.9951647404292933, "learning_rate": 1.778074041107747e-08, "loss": 1.5556, "step": 7356 }, { "epoch": 0.9817187082999733, "grad_norm": 0.970298389786309, "learning_rate": 1.7524057204582058e-08, "loss": 1.5558, "step": 7357 }, { "epoch": 0.9818521483853749, "grad_norm": 0.9653437553551486, "learning_rate": 1.7269238610938544e-08, "loss": 1.5561, "step": 7358 }, { "epoch": 0.9819855884707767, "grad_norm": 1.006023062423882, "learning_rate": 1.701628467774219e-08, "loss": 1.5283, "step": 7359 }, { "epoch": 0.9821190285561783, "grad_norm": 0.9396565250832867, "learning_rate": 1.6765195452245198e-08, "loss": 1.5434, "step": 7360 }, { "epoch": 0.9822524686415799, "grad_norm": 0.9608296788404236, "learning_rate": 1.6515970981344498e-08, "loss": 1.4794, "step": 7361 }, { "epoch": 0.9823859087269816, "grad_norm": 1.0298643297103718, "learning_rate": 1.6268611311595072e-08, "loss": 1.5356, "step": 7362 }, { "epoch": 0.9825193488123832, "grad_norm": 0.9122880384105183, "learning_rate": 1.6023116489199962e-08, "loss": 1.4973, "step": 7363 }, { "epoch": 0.9826527888977848, "grad_norm": 0.9280142474276555, "learning_rate": 1.5779486560014713e-08, "loss": 1.5525, "step": 7364 }, { "epoch": 0.9827862289831866, "grad_norm": 0.9191238933559271, "learning_rate": 1.553772156954736e-08, "loss": 1.4899, "step": 7365 }, { "epoch": 0.9829196690685882, "grad_norm": 0.9177971381786734, "learning_rate": 1.529782156295512e-08, "loss": 1.5066, "step": 7366 }, { "epoch": 0.9830531091539899, "grad_norm": 0.9212731057030706, "learning_rate": 1.505978658505103e-08, "loss": 1.502, "step": 7367 }, { "epoch": 0.9831865492393915, "grad_norm": 0.9287034571182928, "learning_rate": 1.4823616680295083e-08, "loss": 1.5333, "step": 7368 }, { "epoch": 0.9833199893247931, "grad_norm": 0.9141592195619862, "learning_rate": 1.4589311892801994e-08, "loss": 1.5547, "step": 7369 }, { "epoch": 0.9834534294101949, "grad_norm": 0.9139700337452249, "learning_rate": 1.4356872266337862e-08, "loss": 1.556, "step": 7370 }, { "epoch": 0.9835868694955965, "grad_norm": 0.9454395337463968, "learning_rate": 1.4126297844317959e-08, "loss": 1.5366, "step": 7371 }, { "epoch": 0.9837203095809981, "grad_norm": 0.9532365221783823, "learning_rate": 1.389758866981339e-08, "loss": 1.5516, "step": 7372 }, { "epoch": 0.9838537496663998, "grad_norm": 0.9276600231944385, "learning_rate": 1.367074478554331e-08, "loss": 1.549, "step": 7373 }, { "epoch": 0.9839871897518014, "grad_norm": 0.9614937926329741, "learning_rate": 1.3445766233878277e-08, "loss": 1.5544, "step": 7374 }, { "epoch": 0.984120629837203, "grad_norm": 0.8899635478946575, "learning_rate": 1.322265305684356e-08, "loss": 1.4937, "step": 7375 }, { "epoch": 0.9842540699226048, "grad_norm": 0.9288487499950765, "learning_rate": 1.3001405296113601e-08, "loss": 1.5814, "step": 7376 }, { "epoch": 0.9843875100080064, "grad_norm": 1.1647701919689324, "learning_rate": 1.2782022993015342e-08, "loss": 1.5296, "step": 7377 }, { "epoch": 0.984520950093408, "grad_norm": 0.9358750657202466, "learning_rate": 1.2564506188526004e-08, "loss": 1.5041, "step": 7378 }, { "epoch": 0.9846543901788097, "grad_norm": 0.9197611351339202, "learning_rate": 1.2348854923275311e-08, "loss": 1.5391, "step": 7379 }, { "epoch": 0.9847878302642114, "grad_norm": 0.9827592983158957, "learning_rate": 1.2135069237545483e-08, "loss": 1.535, "step": 7380 }, { "epoch": 0.9849212703496131, "grad_norm": 0.9615691598090853, "learning_rate": 1.1923149171267911e-08, "loss": 1.6116, "step": 7381 }, { "epoch": 0.9850547104350147, "grad_norm": 0.9273458595588824, "learning_rate": 1.171309476402871e-08, "loss": 1.5576, "step": 7382 }, { "epoch": 0.9851881505204163, "grad_norm": 0.9460589830522615, "learning_rate": 1.1504906055060938e-08, "loss": 1.5578, "step": 7383 }, { "epoch": 0.985321590605818, "grad_norm": 0.9954193989279615, "learning_rate": 1.1298583083254599e-08, "loss": 1.5602, "step": 7384 }, { "epoch": 0.9854550306912196, "grad_norm": 0.9445437464464703, "learning_rate": 1.1094125887146644e-08, "loss": 1.5294, "step": 7385 }, { "epoch": 0.9855884707766213, "grad_norm": 0.9104360078993541, "learning_rate": 1.0891534504928747e-08, "loss": 1.5428, "step": 7386 }, { "epoch": 0.985721910862023, "grad_norm": 1.0236669925899005, "learning_rate": 1.0690808974441747e-08, "loss": 1.6108, "step": 7387 }, { "epoch": 0.9858553509474246, "grad_norm": 0.9071583750469212, "learning_rate": 1.0491949333178986e-08, "loss": 1.5168, "step": 7388 }, { "epoch": 0.9859887910328262, "grad_norm": 0.9927493294549666, "learning_rate": 1.0294955618285196e-08, "loss": 1.5921, "step": 7389 }, { "epoch": 0.9861222311182279, "grad_norm": 0.9363046349406777, "learning_rate": 1.00998278665565e-08, "loss": 1.5694, "step": 7390 }, { "epoch": 0.9862556712036296, "grad_norm": 0.9135114422937861, "learning_rate": 9.906566114440408e-09, "loss": 1.5794, "step": 7391 }, { "epoch": 0.9863891112890312, "grad_norm": 0.9437284819277648, "learning_rate": 9.715170398036933e-09, "loss": 1.584, "step": 7392 }, { "epoch": 0.9865225513744329, "grad_norm": 0.9211484661856915, "learning_rate": 9.525640753095256e-09, "loss": 1.5472, "step": 7393 }, { "epoch": 0.9866559914598345, "grad_norm": 0.9176256061269126, "learning_rate": 9.337977215018169e-09, "loss": 1.4833, "step": 7394 }, { "epoch": 0.9867894315452362, "grad_norm": 0.9564873181171374, "learning_rate": 9.152179818859853e-09, "loss": 1.5476, "step": 7395 }, { "epoch": 0.9869228716306379, "grad_norm": 0.89959690847276, "learning_rate": 8.968248599323659e-09, "loss": 1.525, "step": 7396 }, { "epoch": 0.9870563117160395, "grad_norm": 0.919080132940536, "learning_rate": 8.78618359076544e-09, "loss": 1.5491, "step": 7397 }, { "epoch": 0.9871897518014412, "grad_norm": 1.4794996154790685, "learning_rate": 8.605984827195767e-09, "loss": 1.5364, "step": 7398 }, { "epoch": 0.9873231918868428, "grad_norm": 1.0985804202382334, "learning_rate": 8.427652342271053e-09, "loss": 1.529, "step": 7399 }, { "epoch": 0.9874566319722444, "grad_norm": 1.1736650971200993, "learning_rate": 8.251186169301318e-09, "loss": 1.5596, "step": 7400 }, { "epoch": 0.9875900720576462, "grad_norm": 1.0303346794805686, "learning_rate": 8.076586341251303e-09, "loss": 1.4777, "step": 7401 }, { "epoch": 0.9877235121430478, "grad_norm": 0.9872701203867904, "learning_rate": 7.903852890732699e-09, "loss": 1.5144, "step": 7402 }, { "epoch": 0.9878569522284494, "grad_norm": 0.9850487540902245, "learning_rate": 7.73298585000859e-09, "loss": 1.5604, "step": 7403 }, { "epoch": 0.9879903923138511, "grad_norm": 0.9471651528476187, "learning_rate": 7.563985250997884e-09, "loss": 1.5555, "step": 7404 }, { "epoch": 0.9881238323992527, "grad_norm": 0.9420598022378026, "learning_rate": 7.3968511252664466e-09, "loss": 1.517, "step": 7405 }, { "epoch": 0.9882572724846543, "grad_norm": 0.9419967634572181, "learning_rate": 7.231583504032636e-09, "loss": 1.5475, "step": 7406 }, { "epoch": 0.9883907125700561, "grad_norm": 1.0474390058108154, "learning_rate": 7.0681824181673134e-09, "loss": 1.5696, "step": 7407 }, { "epoch": 0.9885241526554577, "grad_norm": 1.036920220314024, "learning_rate": 6.906647898191621e-09, "loss": 1.554, "step": 7408 }, { "epoch": 0.9886575927408594, "grad_norm": 0.928746553820107, "learning_rate": 6.7469799742780895e-09, "loss": 1.5312, "step": 7409 }, { "epoch": 0.988791032826261, "grad_norm": 0.9153121582186817, "learning_rate": 6.589178676251751e-09, "loss": 1.528, "step": 7410 }, { "epoch": 0.9889244729116626, "grad_norm": 1.0836635609150753, "learning_rate": 6.433244033587916e-09, "loss": 1.5573, "step": 7411 }, { "epoch": 0.9890579129970644, "grad_norm": 1.221340256835362, "learning_rate": 6.279176075412175e-09, "loss": 1.5926, "step": 7412 }, { "epoch": 0.989191353082466, "grad_norm": 0.9091108579676047, "learning_rate": 6.12697483050484e-09, "loss": 1.5529, "step": 7413 }, { "epoch": 0.9893247931678676, "grad_norm": 0.92677410973629, "learning_rate": 5.976640327293171e-09, "loss": 1.541, "step": 7414 }, { "epoch": 0.9894582332532693, "grad_norm": 0.9383472475782342, "learning_rate": 5.828172593858039e-09, "loss": 1.4556, "step": 7415 }, { "epoch": 0.9895916733386709, "grad_norm": 0.9238065180005837, "learning_rate": 5.681571657933927e-09, "loss": 1.5083, "step": 7416 }, { "epoch": 0.9897251134240725, "grad_norm": 0.9377466504722105, "learning_rate": 5.536837546902263e-09, "loss": 1.5493, "step": 7417 }, { "epoch": 0.9898585535094743, "grad_norm": 0.9335726191972902, "learning_rate": 5.393970287796979e-09, "loss": 1.571, "step": 7418 }, { "epoch": 0.9899919935948759, "grad_norm": 1.1669560380448265, "learning_rate": 5.2529699073067265e-09, "loss": 1.504, "step": 7419 }, { "epoch": 0.9901254336802775, "grad_norm": 0.9537386546748483, "learning_rate": 5.113836431765995e-09, "loss": 1.5505, "step": 7420 }, { "epoch": 0.9902588737656792, "grad_norm": 0.9165621627626547, "learning_rate": 4.976569887165106e-09, "loss": 1.5176, "step": 7421 }, { "epoch": 0.9903923138510808, "grad_norm": 0.9389326153560223, "learning_rate": 4.8411702991435494e-09, "loss": 1.5421, "step": 7422 }, { "epoch": 0.9905257539364826, "grad_norm": 0.9939334972185406, "learning_rate": 4.707637692992206e-09, "loss": 1.5195, "step": 7423 }, { "epoch": 0.9906591940218842, "grad_norm": 1.0278495243989596, "learning_rate": 4.575972093653347e-09, "loss": 1.5995, "step": 7424 }, { "epoch": 0.9907926341072858, "grad_norm": 0.9567686246995181, "learning_rate": 4.4461735257206315e-09, "loss": 1.459, "step": 7425 }, { "epoch": 0.9909260741926875, "grad_norm": 1.136055181982407, "learning_rate": 4.318242013439111e-09, "loss": 1.4569, "step": 7426 }, { "epoch": 0.9910595142780891, "grad_norm": 0.9458609375171427, "learning_rate": 4.1921775807041154e-09, "loss": 1.5101, "step": 7427 }, { "epoch": 0.9911929543634908, "grad_norm": 0.9533736019573524, "learning_rate": 4.067980251064585e-09, "loss": 1.5205, "step": 7428 }, { "epoch": 0.9913263944488925, "grad_norm": 1.0479939592780765, "learning_rate": 3.94565004771863e-09, "loss": 1.6043, "step": 7429 }, { "epoch": 0.9914598345342941, "grad_norm": 0.9972412023234489, "learning_rate": 3.825186993515751e-09, "loss": 1.5548, "step": 7430 }, { "epoch": 0.9915932746196957, "grad_norm": 0.9272943886701682, "learning_rate": 3.7065911109568365e-09, "loss": 1.5379, "step": 7431 }, { "epoch": 0.9917267147050974, "grad_norm": 0.9473938913490474, "learning_rate": 3.589862422195278e-09, "loss": 1.5721, "step": 7432 }, { "epoch": 0.991860154790499, "grad_norm": 0.9185317252383915, "learning_rate": 3.4750009490336355e-09, "loss": 1.5664, "step": 7433 }, { "epoch": 0.9919935948759008, "grad_norm": 0.9791716481267135, "learning_rate": 3.3620067129269683e-09, "loss": 1.5587, "step": 7434 }, { "epoch": 0.9921270349613024, "grad_norm": 0.9173521049585661, "learning_rate": 3.2508797349817268e-09, "loss": 1.5304, "step": 7435 }, { "epoch": 0.992260475046704, "grad_norm": 0.9632980233243099, "learning_rate": 3.14162003595353e-09, "loss": 1.5745, "step": 7436 }, { "epoch": 0.9923939151321057, "grad_norm": 0.9581161698339242, "learning_rate": 3.034227636253828e-09, "loss": 1.5666, "step": 7437 }, { "epoch": 0.9925273552175073, "grad_norm": 0.9413492862074422, "learning_rate": 2.9287025559399108e-09, "loss": 1.5062, "step": 7438 }, { "epoch": 0.992660795302909, "grad_norm": 0.9747881941796354, "learning_rate": 2.8250448147237875e-09, "loss": 1.4891, "step": 7439 }, { "epoch": 0.9927942353883107, "grad_norm": 0.9824468800744949, "learning_rate": 2.723254431967748e-09, "loss": 1.5657, "step": 7440 }, { "epoch": 0.9929276754737123, "grad_norm": 0.9476470497339562, "learning_rate": 2.623331426683251e-09, "loss": 1.5411, "step": 7441 }, { "epoch": 0.9930611155591139, "grad_norm": 0.9089786350328128, "learning_rate": 2.525275817536477e-09, "loss": 1.573, "step": 7442 }, { "epoch": 0.9931945556445156, "grad_norm": 0.9170076888295607, "learning_rate": 2.429087622842774e-09, "loss": 1.5052, "step": 7443 }, { "epoch": 0.9933279957299173, "grad_norm": 0.9562919014528187, "learning_rate": 2.334766860568882e-09, "loss": 1.5247, "step": 7444 }, { "epoch": 0.9934614358153189, "grad_norm": 0.9327221876692189, "learning_rate": 2.2423135483329306e-09, "loss": 1.5501, "step": 7445 }, { "epoch": 0.9935948759007206, "grad_norm": 0.9382241408409218, "learning_rate": 2.151727703404438e-09, "loss": 1.5226, "step": 7446 }, { "epoch": 0.9937283159861222, "grad_norm": 0.9581003164477122, "learning_rate": 2.0630093427032038e-09, "loss": 1.5585, "step": 7447 }, { "epoch": 0.9938617560715239, "grad_norm": 1.049522255731219, "learning_rate": 1.9761584828004164e-09, "loss": 1.5329, "step": 7448 }, { "epoch": 0.9939951961569256, "grad_norm": 0.9641339166472345, "learning_rate": 1.8911751399197655e-09, "loss": 1.5166, "step": 7449 }, { "epoch": 0.9941286362423272, "grad_norm": 1.1001878062084773, "learning_rate": 1.808059329935219e-09, "loss": 1.5105, "step": 7450 }, { "epoch": 0.9942620763277289, "grad_norm": 0.9672405162002357, "learning_rate": 1.726811068371026e-09, "loss": 1.5858, "step": 7451 }, { "epoch": 0.9943955164131305, "grad_norm": 0.9497524896899447, "learning_rate": 1.6474303704039352e-09, "loss": 1.534, "step": 7452 }, { "epoch": 0.9945289564985321, "grad_norm": 0.9337846143023557, "learning_rate": 1.5699172508620852e-09, "loss": 1.4735, "step": 7453 }, { "epoch": 0.9946623965839338, "grad_norm": 1.083632648142817, "learning_rate": 1.4942717242227844e-09, "loss": 1.5346, "step": 7454 }, { "epoch": 0.9947958366693355, "grad_norm": 0.9438573488466988, "learning_rate": 1.4204938046158412e-09, "loss": 1.5054, "step": 7455 }, { "epoch": 0.9949292767547371, "grad_norm": 0.9556477000553659, "learning_rate": 1.3485835058224538e-09, "loss": 1.5491, "step": 7456 }, { "epoch": 0.9950627168401388, "grad_norm": 0.9113180378493281, "learning_rate": 1.278540841275211e-09, "loss": 1.4985, "step": 7457 }, { "epoch": 0.9951961569255404, "grad_norm": 0.9329564036235153, "learning_rate": 1.2103658240569805e-09, "loss": 1.5467, "step": 7458 }, { "epoch": 0.995329597010942, "grad_norm": 0.931682502757043, "learning_rate": 1.1440584669020205e-09, "loss": 1.5791, "step": 7459 }, { "epoch": 0.9954630370963438, "grad_norm": 1.1265746496215818, "learning_rate": 1.0796187821959792e-09, "loss": 1.589, "step": 7460 }, { "epoch": 0.9955964771817454, "grad_norm": 1.139830131670322, "learning_rate": 1.017046781973674e-09, "loss": 1.5046, "step": 7461 }, { "epoch": 0.9957299172671471, "grad_norm": 0.9401769463698738, "learning_rate": 9.563424779257535e-10, "loss": 1.4786, "step": 7462 }, { "epoch": 0.9958633573525487, "grad_norm": 1.0561437037692578, "learning_rate": 8.97505881388705e-10, "loss": 1.5136, "step": 7463 }, { "epoch": 0.9959967974379503, "grad_norm": 1.0139954761378014, "learning_rate": 8.405370033548466e-10, "loss": 1.5199, "step": 7464 }, { "epoch": 0.9961302375233521, "grad_norm": 0.9447397803000418, "learning_rate": 7.854358544623353e-10, "loss": 1.5971, "step": 7465 }, { "epoch": 0.9962636776087537, "grad_norm": 0.9299070974900873, "learning_rate": 7.322024450062693e-10, "loss": 1.5235, "step": 7466 }, { "epoch": 0.9963971176941553, "grad_norm": 0.9261836009002768, "learning_rate": 6.808367849286956e-10, "loss": 1.5404, "step": 7467 }, { "epoch": 0.996530557779557, "grad_norm": 0.9439837053013257, "learning_rate": 6.313388838230517e-10, "loss": 1.5434, "step": 7468 }, { "epoch": 0.9966639978649586, "grad_norm": 0.9180840699289173, "learning_rate": 5.83708750937495e-10, "loss": 1.4938, "step": 7469 }, { "epoch": 0.9967974379503602, "grad_norm": 0.9964338171196607, "learning_rate": 5.379463951671326e-10, "loss": 1.5414, "step": 7470 }, { "epoch": 0.996930878035762, "grad_norm": 0.967091118915389, "learning_rate": 4.940518250606818e-10, "loss": 1.5766, "step": 7471 }, { "epoch": 0.9970643181211636, "grad_norm": 0.9025190757694261, "learning_rate": 4.5202504881602936e-10, "loss": 1.5351, "step": 7472 }, { "epoch": 0.9971977582065652, "grad_norm": 0.92342728170414, "learning_rate": 4.118660742846725e-10, "loss": 1.546, "step": 7473 }, { "epoch": 0.9973311982919669, "grad_norm": 0.9285850741711503, "learning_rate": 3.735749089661678e-10, "loss": 1.5224, "step": 7474 }, { "epoch": 0.9974646383773685, "grad_norm": 0.9333801469713987, "learning_rate": 3.371515600147923e-10, "loss": 1.5543, "step": 7475 }, { "epoch": 0.9975980784627703, "grad_norm": 0.9326257599816116, "learning_rate": 3.0259603423288266e-10, "loss": 1.548, "step": 7476 }, { "epoch": 0.9977315185481719, "grad_norm": 0.9283677855379775, "learning_rate": 2.699083380741652e-10, "loss": 1.562, "step": 7477 }, { "epoch": 0.9978649586335735, "grad_norm": 0.9887750864167223, "learning_rate": 2.3908847764597674e-10, "loss": 1.5263, "step": 7478 }, { "epoch": 0.9979983987189752, "grad_norm": 1.0609376235918477, "learning_rate": 2.101364587048238e-10, "loss": 1.5183, "step": 7479 }, { "epoch": 0.9981318388043768, "grad_norm": 0.9276456917237319, "learning_rate": 1.8305228665860264e-10, "loss": 1.5592, "step": 7480 }, { "epoch": 0.9982652788897785, "grad_norm": 0.9511667430507186, "learning_rate": 1.578359665654894e-10, "loss": 1.6094, "step": 7481 }, { "epoch": 0.9983987189751802, "grad_norm": 0.935192544085142, "learning_rate": 1.3448750313616032e-10, "loss": 1.5063, "step": 7482 }, { "epoch": 0.9985321590605818, "grad_norm": 0.9411754750534707, "learning_rate": 1.130069007315715e-10, "loss": 1.5261, "step": 7483 }, { "epoch": 0.9986655991459834, "grad_norm": 1.1219517093294005, "learning_rate": 9.339416336517915e-11, "loss": 1.5611, "step": 7484 }, { "epoch": 0.9987990392313851, "grad_norm": 1.1069618633698497, "learning_rate": 7.56492946984988e-11, "loss": 1.5211, "step": 7485 }, { "epoch": 0.9989324793167867, "grad_norm": 0.9326395703982616, "learning_rate": 5.977229804776663e-11, "loss": 1.5316, "step": 7486 }, { "epoch": 0.9990659194021884, "grad_norm": 1.0224397864563275, "learning_rate": 4.576317637838834e-11, "loss": 1.5785, "step": 7487 }, { "epoch": 0.9991993594875901, "grad_norm": 1.2901485937141546, "learning_rate": 3.3621932306049375e-11, "loss": 1.5255, "step": 7488 }, { "epoch": 0.9993327995729917, "grad_norm": 0.9764240130637175, "learning_rate": 2.3348568098935376e-11, "loss": 1.5258, "step": 7489 }, { "epoch": 0.9994662396583934, "grad_norm": 0.9510632886911196, "learning_rate": 1.494308567662195e-11, "loss": 1.5212, "step": 7490 }, { "epoch": 0.999599679743795, "grad_norm": 0.9937550478187936, "learning_rate": 8.405486608964452e-12, "loss": 1.5589, "step": 7491 }, { "epoch": 0.9997331198291967, "grad_norm": 0.9234682778153688, "learning_rate": 3.735772117208214e-12, "loss": 1.501, "step": 7492 }, { "epoch": 0.9998665599145984, "grad_norm": 0.9297623176763948, "learning_rate": 9.339430728783073e-13, "loss": 1.5671, "step": 7493 }, { "epoch": 1.0, "grad_norm": 1.109193681967266, "learning_rate": 0.0, "loss": 1.5531, "step": 7494 }, { "epoch": 1.0, "step": 7494, "total_flos": 2.902622270823963e+19, "train_loss": 1.6068721134280217, "train_runtime": 34738.8495, "train_samples_per_second": 36.238, "train_steps_per_second": 0.216 } ], "logging_steps": 1.0, "max_steps": 7494, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.902622270823963e+19, "train_batch_size": 24, "trial_name": null, "trial_params": null }