{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9998533079067038, "eval_steps": 100, "global_step": 3408, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 7.766990291262136e-07, "loss": 3.4245, "step": 1 }, { "epoch": 0.0, "learning_rate": 1.5533980582524272e-06, "loss": 3.1989, "step": 2 }, { "epoch": 0.0, "learning_rate": 2.330097087378641e-06, "loss": 3.2308, "step": 3 }, { "epoch": 0.0, "learning_rate": 3.1067961165048544e-06, "loss": 2.9591, "step": 4 }, { "epoch": 0.0, "learning_rate": 3.883495145631068e-06, "loss": 2.6008, "step": 5 }, { "epoch": 0.0, "learning_rate": 4.660194174757282e-06, "loss": 2.5077, "step": 6 }, { "epoch": 0.0, "learning_rate": 5.436893203883496e-06, "loss": 2.3392, "step": 7 }, { "epoch": 0.0, "learning_rate": 6.213592233009709e-06, "loss": 2.2419, "step": 8 }, { "epoch": 0.0, "learning_rate": 6.990291262135923e-06, "loss": 2.0988, "step": 9 }, { "epoch": 0.0, "learning_rate": 7.766990291262136e-06, "loss": 2.0581, "step": 10 }, { "epoch": 0.0, "learning_rate": 8.54368932038835e-06, "loss": 1.9165, "step": 11 }, { "epoch": 0.0, "learning_rate": 9.320388349514565e-06, "loss": 2.0559, "step": 12 }, { "epoch": 0.0, "learning_rate": 1.0097087378640778e-05, "loss": 1.8834, "step": 13 }, { "epoch": 0.0, "learning_rate": 1.0873786407766991e-05, "loss": 1.8474, "step": 14 }, { "epoch": 0.0, "learning_rate": 1.1650485436893204e-05, "loss": 1.8903, "step": 15 }, { "epoch": 0.0, "learning_rate": 1.2427184466019418e-05, "loss": 1.8739, "step": 16 }, { "epoch": 0.0, "learning_rate": 1.3203883495145633e-05, "loss": 1.8753, "step": 17 }, { "epoch": 0.01, "learning_rate": 1.3980582524271846e-05, "loss": 1.8335, "step": 18 }, { "epoch": 0.01, "learning_rate": 1.475728155339806e-05, "loss": 1.848, "step": 19 }, { "epoch": 0.01, "learning_rate": 1.5533980582524273e-05, "loss": 1.7487, "step": 20 }, { "epoch": 0.01, "learning_rate": 1.6310679611650486e-05, "loss": 1.8451, "step": 21 }, { "epoch": 0.01, "learning_rate": 1.70873786407767e-05, "loss": 1.742, "step": 22 }, { "epoch": 0.01, "learning_rate": 1.7864077669902916e-05, "loss": 1.8671, "step": 23 }, { "epoch": 0.01, "learning_rate": 1.864077669902913e-05, "loss": 1.7642, "step": 24 }, { "epoch": 0.01, "learning_rate": 1.9417475728155343e-05, "loss": 1.7682, "step": 25 }, { "epoch": 0.01, "learning_rate": 2.0194174757281556e-05, "loss": 1.7926, "step": 26 }, { "epoch": 0.01, "learning_rate": 2.097087378640777e-05, "loss": 1.7951, "step": 27 }, { "epoch": 0.01, "learning_rate": 2.1747572815533982e-05, "loss": 1.7663, "step": 28 }, { "epoch": 0.01, "learning_rate": 2.2524271844660196e-05, "loss": 1.7114, "step": 29 }, { "epoch": 0.01, "learning_rate": 2.330097087378641e-05, "loss": 1.7552, "step": 30 }, { "epoch": 0.01, "learning_rate": 2.4077669902912622e-05, "loss": 1.7745, "step": 31 }, { "epoch": 0.01, "learning_rate": 2.4854368932038836e-05, "loss": 1.7002, "step": 32 }, { "epoch": 0.01, "learning_rate": 2.5631067961165052e-05, "loss": 1.6888, "step": 33 }, { "epoch": 0.01, "learning_rate": 2.6407766990291266e-05, "loss": 1.7667, "step": 34 }, { "epoch": 0.01, "learning_rate": 2.718446601941748e-05, "loss": 1.6512, "step": 35 }, { "epoch": 0.01, "learning_rate": 2.7961165048543692e-05, "loss": 1.6581, "step": 36 }, { "epoch": 0.01, "learning_rate": 2.8737864077669905e-05, "loss": 1.7076, "step": 37 }, { "epoch": 0.01, "learning_rate": 2.951456310679612e-05, "loss": 1.7243, "step": 38 }, { "epoch": 0.01, "learning_rate": 3.0291262135922332e-05, "loss": 1.6582, "step": 39 }, { "epoch": 0.01, "learning_rate": 3.1067961165048545e-05, "loss": 1.692, "step": 40 }, { "epoch": 0.01, "learning_rate": 3.184466019417476e-05, "loss": 1.6946, "step": 41 }, { "epoch": 0.01, "learning_rate": 3.262135922330097e-05, "loss": 1.7463, "step": 42 }, { "epoch": 0.01, "learning_rate": 3.339805825242719e-05, "loss": 1.6669, "step": 43 }, { "epoch": 0.01, "learning_rate": 3.41747572815534e-05, "loss": 1.6517, "step": 44 }, { "epoch": 0.01, "learning_rate": 3.4951456310679615e-05, "loss": 1.6559, "step": 45 }, { "epoch": 0.01, "learning_rate": 3.572815533980583e-05, "loss": 1.692, "step": 46 }, { "epoch": 0.01, "learning_rate": 3.650485436893204e-05, "loss": 1.6341, "step": 47 }, { "epoch": 0.01, "learning_rate": 3.728155339805826e-05, "loss": 1.6815, "step": 48 }, { "epoch": 0.01, "learning_rate": 3.805825242718447e-05, "loss": 1.7242, "step": 49 }, { "epoch": 0.01, "learning_rate": 3.8834951456310685e-05, "loss": 1.6661, "step": 50 }, { "epoch": 0.01, "learning_rate": 3.9611650485436895e-05, "loss": 1.7046, "step": 51 }, { "epoch": 0.02, "learning_rate": 4.038834951456311e-05, "loss": 1.683, "step": 52 }, { "epoch": 0.02, "learning_rate": 4.116504854368932e-05, "loss": 1.682, "step": 53 }, { "epoch": 0.02, "learning_rate": 4.194174757281554e-05, "loss": 1.6719, "step": 54 }, { "epoch": 0.02, "learning_rate": 4.271844660194175e-05, "loss": 1.6538, "step": 55 }, { "epoch": 0.02, "learning_rate": 4.3495145631067965e-05, "loss": 1.6646, "step": 56 }, { "epoch": 0.02, "learning_rate": 4.4271844660194175e-05, "loss": 1.6805, "step": 57 }, { "epoch": 0.02, "learning_rate": 4.504854368932039e-05, "loss": 1.6531, "step": 58 }, { "epoch": 0.02, "learning_rate": 4.58252427184466e-05, "loss": 1.6359, "step": 59 }, { "epoch": 0.02, "learning_rate": 4.660194174757282e-05, "loss": 1.6408, "step": 60 }, { "epoch": 0.02, "learning_rate": 4.737864077669903e-05, "loss": 1.6991, "step": 61 }, { "epoch": 0.02, "learning_rate": 4.8155339805825245e-05, "loss": 1.5819, "step": 62 }, { "epoch": 0.02, "learning_rate": 4.8932038834951454e-05, "loss": 1.6714, "step": 63 }, { "epoch": 0.02, "learning_rate": 4.970873786407767e-05, "loss": 1.6898, "step": 64 }, { "epoch": 0.02, "learning_rate": 5.0485436893203895e-05, "loss": 1.6105, "step": 65 }, { "epoch": 0.02, "learning_rate": 5.1262135922330105e-05, "loss": 1.6153, "step": 66 }, { "epoch": 0.02, "learning_rate": 5.203883495145632e-05, "loss": 1.708, "step": 67 }, { "epoch": 0.02, "learning_rate": 5.281553398058253e-05, "loss": 1.6854, "step": 68 }, { "epoch": 0.02, "learning_rate": 5.359223300970875e-05, "loss": 1.6446, "step": 69 }, { "epoch": 0.02, "learning_rate": 5.436893203883496e-05, "loss": 1.5945, "step": 70 }, { "epoch": 0.02, "learning_rate": 5.5145631067961174e-05, "loss": 1.6984, "step": 71 }, { "epoch": 0.02, "learning_rate": 5.5922330097087384e-05, "loss": 1.6914, "step": 72 }, { "epoch": 0.02, "learning_rate": 5.66990291262136e-05, "loss": 1.693, "step": 73 }, { "epoch": 0.02, "learning_rate": 5.747572815533981e-05, "loss": 1.642, "step": 74 }, { "epoch": 0.02, "learning_rate": 5.825242718446603e-05, "loss": 1.61, "step": 75 }, { "epoch": 0.02, "learning_rate": 5.902912621359224e-05, "loss": 1.7104, "step": 76 }, { "epoch": 0.02, "learning_rate": 5.9805825242718454e-05, "loss": 1.6252, "step": 77 }, { "epoch": 0.02, "learning_rate": 6.0582524271844664e-05, "loss": 1.6818, "step": 78 }, { "epoch": 0.02, "learning_rate": 6.135922330097087e-05, "loss": 1.6772, "step": 79 }, { "epoch": 0.02, "learning_rate": 6.213592233009709e-05, "loss": 1.699, "step": 80 }, { "epoch": 0.02, "learning_rate": 6.291262135922331e-05, "loss": 1.6542, "step": 81 }, { "epoch": 0.02, "learning_rate": 6.368932038834952e-05, "loss": 1.6459, "step": 82 }, { "epoch": 0.02, "learning_rate": 6.446601941747573e-05, "loss": 1.6477, "step": 83 }, { "epoch": 0.02, "learning_rate": 6.524271844660194e-05, "loss": 1.6493, "step": 84 }, { "epoch": 0.02, "learning_rate": 6.601941747572816e-05, "loss": 1.6539, "step": 85 }, { "epoch": 0.03, "learning_rate": 6.679611650485438e-05, "loss": 1.6467, "step": 86 }, { "epoch": 0.03, "learning_rate": 6.757281553398058e-05, "loss": 1.7471, "step": 87 }, { "epoch": 0.03, "learning_rate": 6.83495145631068e-05, "loss": 1.638, "step": 88 }, { "epoch": 0.03, "learning_rate": 6.912621359223301e-05, "loss": 1.6656, "step": 89 }, { "epoch": 0.03, "learning_rate": 6.990291262135923e-05, "loss": 1.6356, "step": 90 }, { "epoch": 0.03, "learning_rate": 7.067961165048545e-05, "loss": 1.7034, "step": 91 }, { "epoch": 0.03, "learning_rate": 7.145631067961166e-05, "loss": 1.7101, "step": 92 }, { "epoch": 0.03, "learning_rate": 7.223300970873787e-05, "loss": 1.6649, "step": 93 }, { "epoch": 0.03, "learning_rate": 7.300970873786408e-05, "loss": 1.7123, "step": 94 }, { "epoch": 0.03, "learning_rate": 7.37864077669903e-05, "loss": 1.6884, "step": 95 }, { "epoch": 0.03, "learning_rate": 7.456310679611652e-05, "loss": 1.7021, "step": 96 }, { "epoch": 0.03, "learning_rate": 7.533980582524272e-05, "loss": 1.6138, "step": 97 }, { "epoch": 0.03, "learning_rate": 7.611650485436894e-05, "loss": 1.7511, "step": 98 }, { "epoch": 0.03, "learning_rate": 7.689320388349515e-05, "loss": 1.6645, "step": 99 }, { "epoch": 0.03, "learning_rate": 7.766990291262137e-05, "loss": 1.6774, "step": 100 }, { "epoch": 0.03, "eval_loss": 0.18088601529598236, "eval_runtime": 25.5251, "eval_samples_per_second": 78.354, "eval_steps_per_second": 0.627, "step": 100 }, { "epoch": 0.03, "learning_rate": 7.844660194174757e-05, "loss": 1.642, "step": 101 }, { "epoch": 0.03, "learning_rate": 7.922330097087379e-05, "loss": 1.7632, "step": 102 }, { "epoch": 0.03, "learning_rate": 8e-05, "loss": 1.7431, "step": 103 }, { "epoch": 0.03, "learning_rate": 7.999998192880882e-05, "loss": 1.6428, "step": 104 }, { "epoch": 0.03, "learning_rate": 7.999992771525157e-05, "loss": 1.675, "step": 105 }, { "epoch": 0.03, "learning_rate": 7.999983735937726e-05, "loss": 1.685, "step": 106 }, { "epoch": 0.03, "learning_rate": 7.999971086126752e-05, "loss": 1.6642, "step": 107 }, { "epoch": 0.03, "learning_rate": 7.999954822103665e-05, "loss": 1.6388, "step": 108 }, { "epoch": 0.03, "learning_rate": 7.999934943883161e-05, "loss": 1.711, "step": 109 }, { "epoch": 0.03, "learning_rate": 7.9999114514832e-05, "loss": 1.683, "step": 110 }, { "epoch": 0.03, "learning_rate": 7.999884344925012e-05, "loss": 1.6607, "step": 111 }, { "epoch": 0.03, "learning_rate": 7.999853624233086e-05, "loss": 1.7063, "step": 112 }, { "epoch": 0.03, "learning_rate": 7.999819289435179e-05, "loss": 1.6994, "step": 113 }, { "epoch": 0.03, "learning_rate": 7.999781340562318e-05, "loss": 1.6633, "step": 114 }, { "epoch": 0.03, "learning_rate": 7.999739777648792e-05, "loss": 1.656, "step": 115 }, { "epoch": 0.03, "learning_rate": 7.999694600732152e-05, "loss": 1.6876, "step": 116 }, { "epoch": 0.03, "learning_rate": 7.999645809853222e-05, "loss": 1.7541, "step": 117 }, { "epoch": 0.03, "learning_rate": 7.999593405056084e-05, "loss": 1.65, "step": 118 }, { "epoch": 0.03, "learning_rate": 7.999537386388092e-05, "loss": 1.6745, "step": 119 }, { "epoch": 0.04, "learning_rate": 7.999477753899861e-05, "loss": 1.7118, "step": 120 }, { "epoch": 0.04, "learning_rate": 7.999414507645272e-05, "loss": 1.732, "step": 121 }, { "epoch": 0.04, "learning_rate": 7.99934764768147e-05, "loss": 1.6339, "step": 122 }, { "epoch": 0.04, "learning_rate": 7.999277174068872e-05, "loss": 1.62, "step": 123 }, { "epoch": 0.04, "learning_rate": 7.999203086871149e-05, "loss": 1.6943, "step": 124 }, { "epoch": 0.04, "learning_rate": 7.999125386155248e-05, "loss": 1.6972, "step": 125 }, { "epoch": 0.04, "learning_rate": 7.999044071991375e-05, "loss": 1.6469, "step": 126 }, { "epoch": 0.04, "learning_rate": 7.998959144453001e-05, "loss": 1.6318, "step": 127 }, { "epoch": 0.04, "learning_rate": 7.998870603616864e-05, "loss": 1.7085, "step": 128 }, { "epoch": 0.04, "learning_rate": 7.998778449562965e-05, "loss": 1.6849, "step": 129 }, { "epoch": 0.04, "learning_rate": 7.998682682374572e-05, "loss": 1.6529, "step": 130 }, { "epoch": 0.04, "learning_rate": 7.998583302138217e-05, "loss": 1.6256, "step": 131 }, { "epoch": 0.04, "learning_rate": 7.998480308943692e-05, "loss": 1.6608, "step": 132 }, { "epoch": 0.04, "learning_rate": 7.998373702884062e-05, "loss": 1.6488, "step": 133 }, { "epoch": 0.04, "learning_rate": 7.998263484055649e-05, "loss": 1.6508, "step": 134 }, { "epoch": 0.04, "learning_rate": 7.998149652558045e-05, "loss": 1.6393, "step": 135 }, { "epoch": 0.04, "learning_rate": 7.998032208494101e-05, "loss": 1.6887, "step": 136 }, { "epoch": 0.04, "learning_rate": 7.997911151969936e-05, "loss": 1.6903, "step": 137 }, { "epoch": 0.04, "learning_rate": 7.997786483094931e-05, "loss": 1.6407, "step": 138 }, { "epoch": 0.04, "learning_rate": 7.997658201981732e-05, "loss": 1.6433, "step": 139 }, { "epoch": 0.04, "learning_rate": 7.997526308746249e-05, "loss": 1.6846, "step": 140 }, { "epoch": 0.04, "learning_rate": 7.997390803507655e-05, "loss": 1.6547, "step": 141 }, { "epoch": 0.04, "learning_rate": 7.997251686388387e-05, "loss": 1.666, "step": 142 }, { "epoch": 0.04, "learning_rate": 7.997108957514146e-05, "loss": 1.6367, "step": 143 }, { "epoch": 0.04, "learning_rate": 7.996962617013897e-05, "loss": 1.6775, "step": 144 }, { "epoch": 0.04, "learning_rate": 7.996812665019865e-05, "loss": 1.6688, "step": 145 }, { "epoch": 0.04, "learning_rate": 7.996659101667542e-05, "loss": 1.6512, "step": 146 }, { "epoch": 0.04, "learning_rate": 7.996501927095682e-05, "loss": 1.6536, "step": 147 }, { "epoch": 0.04, "learning_rate": 7.9963411414463e-05, "loss": 1.615, "step": 148 }, { "epoch": 0.04, "learning_rate": 7.996176744864678e-05, "loss": 1.6131, "step": 149 }, { "epoch": 0.04, "learning_rate": 7.996008737499354e-05, "loss": 1.6378, "step": 150 }, { "epoch": 0.04, "learning_rate": 7.995837119502137e-05, "loss": 1.6612, "step": 151 }, { "epoch": 0.04, "learning_rate": 7.995661891028093e-05, "loss": 1.5754, "step": 152 }, { "epoch": 0.04, "learning_rate": 7.99548305223555e-05, "loss": 1.634, "step": 153 }, { "epoch": 0.05, "learning_rate": 7.995300603286099e-05, "loss": 1.6674, "step": 154 }, { "epoch": 0.05, "learning_rate": 7.995114544344596e-05, "loss": 1.6133, "step": 155 }, { "epoch": 0.05, "learning_rate": 7.994924875579154e-05, "loss": 1.662, "step": 156 }, { "epoch": 0.05, "learning_rate": 7.994731597161152e-05, "loss": 1.6249, "step": 157 }, { "epoch": 0.05, "learning_rate": 7.994534709265226e-05, "loss": 1.6465, "step": 158 }, { "epoch": 0.05, "learning_rate": 7.994334212069278e-05, "loss": 1.6165, "step": 159 }, { "epoch": 0.05, "learning_rate": 7.99413010575447e-05, "loss": 1.6191, "step": 160 }, { "epoch": 0.05, "learning_rate": 7.993922390505221e-05, "loss": 1.5617, "step": 161 }, { "epoch": 0.05, "learning_rate": 7.993711066509218e-05, "loss": 1.6773, "step": 162 }, { "epoch": 0.05, "learning_rate": 7.993496133957401e-05, "loss": 1.6366, "step": 163 }, { "epoch": 0.05, "learning_rate": 7.993277593043977e-05, "loss": 1.6121, "step": 164 }, { "epoch": 0.05, "learning_rate": 7.99305544396641e-05, "loss": 1.6534, "step": 165 }, { "epoch": 0.05, "learning_rate": 7.992829686925425e-05, "loss": 1.6495, "step": 166 }, { "epoch": 0.05, "learning_rate": 7.992600322125006e-05, "loss": 1.6239, "step": 167 }, { "epoch": 0.05, "learning_rate": 7.992367349772398e-05, "loss": 1.6081, "step": 168 }, { "epoch": 0.05, "learning_rate": 7.992130770078107e-05, "loss": 1.6434, "step": 169 }, { "epoch": 0.05, "learning_rate": 7.991890583255895e-05, "loss": 1.6174, "step": 170 }, { "epoch": 0.05, "learning_rate": 7.991646789522786e-05, "loss": 1.6288, "step": 171 }, { "epoch": 0.05, "learning_rate": 7.991399389099062e-05, "loss": 1.5779, "step": 172 }, { "epoch": 0.05, "learning_rate": 7.991148382208265e-05, "loss": 1.6689, "step": 173 }, { "epoch": 0.05, "learning_rate": 7.990893769077192e-05, "loss": 1.5706, "step": 174 }, { "epoch": 0.05, "learning_rate": 7.990635549935905e-05, "loss": 1.6077, "step": 175 }, { "epoch": 0.05, "learning_rate": 7.990373725017716e-05, "loss": 1.6109, "step": 176 }, { "epoch": 0.05, "learning_rate": 7.990108294559202e-05, "loss": 1.6423, "step": 177 }, { "epoch": 0.05, "learning_rate": 7.989839258800196e-05, "loss": 1.6463, "step": 178 }, { "epoch": 0.05, "learning_rate": 7.989566617983786e-05, "loss": 1.6117, "step": 179 }, { "epoch": 0.05, "learning_rate": 7.989290372356319e-05, "loss": 1.6587, "step": 180 }, { "epoch": 0.05, "learning_rate": 7.989010522167401e-05, "loss": 1.6205, "step": 181 }, { "epoch": 0.05, "learning_rate": 7.988727067669893e-05, "loss": 1.5736, "step": 182 }, { "epoch": 0.05, "learning_rate": 7.988440009119911e-05, "loss": 1.6257, "step": 183 }, { "epoch": 0.05, "learning_rate": 7.988149346776833e-05, "loss": 1.6637, "step": 184 }, { "epoch": 0.05, "learning_rate": 7.987855080903286e-05, "loss": 1.563, "step": 185 }, { "epoch": 0.05, "learning_rate": 7.987557211765161e-05, "loss": 1.5749, "step": 186 }, { "epoch": 0.05, "learning_rate": 7.987255739631596e-05, "loss": 1.5997, "step": 187 }, { "epoch": 0.06, "learning_rate": 7.986950664774992e-05, "loss": 1.5899, "step": 188 }, { "epoch": 0.06, "learning_rate": 7.986641987471e-05, "loss": 1.6201, "step": 189 }, { "epoch": 0.06, "learning_rate": 7.986329707998532e-05, "loss": 1.5987, "step": 190 }, { "epoch": 0.06, "learning_rate": 7.986013826639747e-05, "loss": 1.6092, "step": 191 }, { "epoch": 0.06, "learning_rate": 7.985694343680065e-05, "loss": 1.6173, "step": 192 }, { "epoch": 0.06, "learning_rate": 7.985371259408157e-05, "loss": 1.5692, "step": 193 }, { "epoch": 0.06, "learning_rate": 7.985044574115951e-05, "loss": 1.5751, "step": 194 }, { "epoch": 0.06, "learning_rate": 7.984714288098623e-05, "loss": 1.596, "step": 195 }, { "epoch": 0.06, "learning_rate": 7.984380401654608e-05, "loss": 1.6791, "step": 196 }, { "epoch": 0.06, "learning_rate": 7.984042915085592e-05, "loss": 1.608, "step": 197 }, { "epoch": 0.06, "learning_rate": 7.983701828696515e-05, "loss": 1.5983, "step": 198 }, { "epoch": 0.06, "learning_rate": 7.983357142795569e-05, "loss": 1.586, "step": 199 }, { "epoch": 0.06, "learning_rate": 7.983008857694196e-05, "loss": 1.5633, "step": 200 }, { "epoch": 0.06, "eval_loss": 0.17678602039813995, "eval_runtime": 24.7483, "eval_samples_per_second": 80.814, "eval_steps_per_second": 0.647, "step": 200 }, { "epoch": 0.06, "learning_rate": 7.982656973707095e-05, "loss": 1.5357, "step": 201 }, { "epoch": 0.06, "learning_rate": 7.982301491152213e-05, "loss": 1.5821, "step": 202 }, { "epoch": 0.06, "learning_rate": 7.98194241035075e-05, "loss": 1.5463, "step": 203 }, { "epoch": 0.06, "learning_rate": 7.981579731627155e-05, "loss": 1.5106, "step": 204 }, { "epoch": 0.06, "learning_rate": 7.981213455309131e-05, "loss": 1.5734, "step": 205 }, { "epoch": 0.06, "learning_rate": 7.980843581727633e-05, "loss": 1.5619, "step": 206 }, { "epoch": 0.06, "learning_rate": 7.98047011121686e-05, "loss": 1.5589, "step": 207 }, { "epoch": 0.06, "learning_rate": 7.980093044114269e-05, "loss": 1.543, "step": 208 }, { "epoch": 0.06, "learning_rate": 7.979712380760557e-05, "loss": 1.508, "step": 209 }, { "epoch": 0.06, "learning_rate": 7.97932812149968e-05, "loss": 1.5922, "step": 210 }, { "epoch": 0.06, "learning_rate": 7.97894026667884e-05, "loss": 1.5705, "step": 211 }, { "epoch": 0.06, "learning_rate": 7.978548816648484e-05, "loss": 1.5767, "step": 212 }, { "epoch": 0.06, "learning_rate": 7.978153771762311e-05, "loss": 1.5894, "step": 213 }, { "epoch": 0.06, "learning_rate": 7.977755132377269e-05, "loss": 1.592, "step": 214 }, { "epoch": 0.06, "learning_rate": 7.977352898853551e-05, "loss": 1.5524, "step": 215 }, { "epoch": 0.06, "learning_rate": 7.9769470715546e-05, "loss": 1.5463, "step": 216 }, { "epoch": 0.06, "learning_rate": 7.976537650847104e-05, "loss": 1.4902, "step": 217 }, { "epoch": 0.06, "learning_rate": 7.976124637101e-05, "loss": 1.5343, "step": 218 }, { "epoch": 0.06, "learning_rate": 7.975708030689471e-05, "loss": 1.5716, "step": 219 }, { "epoch": 0.06, "learning_rate": 7.975287831988944e-05, "loss": 1.5758, "step": 220 }, { "epoch": 0.06, "learning_rate": 7.974864041379095e-05, "loss": 1.5188, "step": 221 }, { "epoch": 0.07, "learning_rate": 7.974436659242843e-05, "loss": 1.5565, "step": 222 }, { "epoch": 0.07, "learning_rate": 7.974005685966354e-05, "loss": 1.508, "step": 223 }, { "epoch": 0.07, "learning_rate": 7.973571121939037e-05, "loss": 1.5189, "step": 224 }, { "epoch": 0.07, "learning_rate": 7.973132967553549e-05, "loss": 1.4511, "step": 225 }, { "epoch": 0.07, "learning_rate": 7.972691223205784e-05, "loss": 1.5976, "step": 226 }, { "epoch": 0.07, "learning_rate": 7.97224588929489e-05, "loss": 1.5234, "step": 227 }, { "epoch": 0.07, "learning_rate": 7.971796966223248e-05, "loss": 1.5315, "step": 228 }, { "epoch": 0.07, "learning_rate": 7.97134445439649e-05, "loss": 1.5066, "step": 229 }, { "epoch": 0.07, "learning_rate": 7.970888354223485e-05, "loss": 1.5541, "step": 230 }, { "epoch": 0.07, "learning_rate": 7.970428666116348e-05, "loss": 1.5383, "step": 231 }, { "epoch": 0.07, "learning_rate": 7.969965390490434e-05, "loss": 1.5633, "step": 232 }, { "epoch": 0.07, "learning_rate": 7.969498527764341e-05, "loss": 1.5309, "step": 233 }, { "epoch": 0.07, "learning_rate": 7.969028078359905e-05, "loss": 1.5301, "step": 234 }, { "epoch": 0.07, "learning_rate": 7.968554042702208e-05, "loss": 1.5394, "step": 235 }, { "epoch": 0.07, "learning_rate": 7.968076421219568e-05, "loss": 1.4884, "step": 236 }, { "epoch": 0.07, "learning_rate": 7.967595214343544e-05, "loss": 1.5066, "step": 237 }, { "epoch": 0.07, "learning_rate": 7.967110422508936e-05, "loss": 1.455, "step": 238 }, { "epoch": 0.07, "learning_rate": 7.966622046153783e-05, "loss": 1.5569, "step": 239 }, { "epoch": 0.07, "learning_rate": 7.966130085719359e-05, "loss": 1.4966, "step": 240 }, { "epoch": 0.07, "learning_rate": 7.965634541650182e-05, "loss": 1.5731, "step": 241 }, { "epoch": 0.07, "learning_rate": 7.965135414394007e-05, "loss": 1.4655, "step": 242 }, { "epoch": 0.07, "learning_rate": 7.964632704401823e-05, "loss": 1.4646, "step": 243 }, { "epoch": 0.07, "learning_rate": 7.964126412127858e-05, "loss": 1.5041, "step": 244 }, { "epoch": 0.07, "learning_rate": 7.963616538029578e-05, "loss": 1.5773, "step": 245 }, { "epoch": 0.07, "learning_rate": 7.963103082567685e-05, "loss": 1.5009, "step": 246 }, { "epoch": 0.07, "learning_rate": 7.962586046206117e-05, "loss": 1.4912, "step": 247 }, { "epoch": 0.07, "learning_rate": 7.962065429412046e-05, "loss": 1.5504, "step": 248 }, { "epoch": 0.07, "learning_rate": 7.961541232655879e-05, "loss": 1.4792, "step": 249 }, { "epoch": 0.07, "learning_rate": 7.961013456411262e-05, "loss": 1.5081, "step": 250 }, { "epoch": 0.07, "learning_rate": 7.960482101155072e-05, "loss": 1.472, "step": 251 }, { "epoch": 0.07, "learning_rate": 7.959947167367417e-05, "loss": 1.5423, "step": 252 }, { "epoch": 0.07, "learning_rate": 7.959408655531646e-05, "loss": 1.4912, "step": 253 }, { "epoch": 0.07, "learning_rate": 7.958866566134332e-05, "loss": 1.5218, "step": 254 }, { "epoch": 0.07, "learning_rate": 7.958320899665287e-05, "loss": 1.5307, "step": 255 }, { "epoch": 0.08, "learning_rate": 7.957771656617555e-05, "loss": 1.5089, "step": 256 }, { "epoch": 0.08, "learning_rate": 7.957218837487408e-05, "loss": 1.5274, "step": 257 }, { "epoch": 0.08, "learning_rate": 7.956662442774351e-05, "loss": 1.4868, "step": 258 }, { "epoch": 0.08, "learning_rate": 7.95610247298112e-05, "loss": 1.5192, "step": 259 }, { "epoch": 0.08, "learning_rate": 7.95553892861368e-05, "loss": 1.5363, "step": 260 }, { "epoch": 0.08, "learning_rate": 7.954971810181229e-05, "loss": 1.4791, "step": 261 }, { "epoch": 0.08, "learning_rate": 7.95440111819619e-05, "loss": 1.5371, "step": 262 }, { "epoch": 0.08, "learning_rate": 7.953826853174218e-05, "loss": 1.4994, "step": 263 }, { "epoch": 0.08, "learning_rate": 7.953249015634198e-05, "loss": 1.5022, "step": 264 }, { "epoch": 0.08, "learning_rate": 7.952667606098237e-05, "loss": 1.4916, "step": 265 }, { "epoch": 0.08, "learning_rate": 7.952082625091676e-05, "loss": 1.4575, "step": 266 }, { "epoch": 0.08, "learning_rate": 7.951494073143078e-05, "loss": 1.4916, "step": 267 }, { "epoch": 0.08, "learning_rate": 7.950901950784236e-05, "loss": 1.4733, "step": 268 }, { "epoch": 0.08, "learning_rate": 7.950306258550168e-05, "loss": 1.4767, "step": 269 }, { "epoch": 0.08, "learning_rate": 7.949706996979115e-05, "loss": 1.5004, "step": 270 }, { "epoch": 0.08, "learning_rate": 7.949104166612551e-05, "loss": 1.5292, "step": 271 }, { "epoch": 0.08, "learning_rate": 7.948497767995163e-05, "loss": 1.4958, "step": 272 }, { "epoch": 0.08, "learning_rate": 7.947887801674872e-05, "loss": 1.4727, "step": 273 }, { "epoch": 0.08, "learning_rate": 7.947274268202817e-05, "loss": 1.5495, "step": 274 }, { "epoch": 0.08, "learning_rate": 7.946657168133363e-05, "loss": 1.4684, "step": 275 }, { "epoch": 0.08, "learning_rate": 7.946036502024098e-05, "loss": 1.4778, "step": 276 }, { "epoch": 0.08, "learning_rate": 7.945412270435826e-05, "loss": 1.4795, "step": 277 }, { "epoch": 0.08, "learning_rate": 7.944784473932583e-05, "loss": 1.517, "step": 278 }, { "epoch": 0.08, "learning_rate": 7.944153113081618e-05, "loss": 1.4645, "step": 279 }, { "epoch": 0.08, "learning_rate": 7.943518188453403e-05, "loss": 1.5227, "step": 280 }, { "epoch": 0.08, "learning_rate": 7.94287970062163e-05, "loss": 1.5244, "step": 281 }, { "epoch": 0.08, "learning_rate": 7.942237650163213e-05, "loss": 1.5063, "step": 282 }, { "epoch": 0.08, "learning_rate": 7.941592037658279e-05, "loss": 1.4413, "step": 283 }, { "epoch": 0.08, "learning_rate": 7.940942863690181e-05, "loss": 1.5263, "step": 284 }, { "epoch": 0.08, "learning_rate": 7.940290128845485e-05, "loss": 1.4906, "step": 285 }, { "epoch": 0.08, "learning_rate": 7.939633833713975e-05, "loss": 1.5155, "step": 286 }, { "epoch": 0.08, "learning_rate": 7.938973978888654e-05, "loss": 1.5025, "step": 287 }, { "epoch": 0.08, "learning_rate": 7.93831056496574e-05, "loss": 1.4087, "step": 288 }, { "epoch": 0.08, "learning_rate": 7.937643592544665e-05, "loss": 1.5133, "step": 289 }, { "epoch": 0.09, "learning_rate": 7.936973062228081e-05, "loss": 1.5485, "step": 290 }, { "epoch": 0.09, "learning_rate": 7.93629897462185e-05, "loss": 1.4673, "step": 291 }, { "epoch": 0.09, "learning_rate": 7.935621330335052e-05, "loss": 1.4585, "step": 292 }, { "epoch": 0.09, "learning_rate": 7.934940129979979e-05, "loss": 1.5047, "step": 293 }, { "epoch": 0.09, "learning_rate": 7.934255374172134e-05, "loss": 1.5027, "step": 294 }, { "epoch": 0.09, "learning_rate": 7.933567063530236e-05, "loss": 1.4654, "step": 295 }, { "epoch": 0.09, "learning_rate": 7.932875198676215e-05, "loss": 1.4796, "step": 296 }, { "epoch": 0.09, "learning_rate": 7.932179780235212e-05, "loss": 1.5474, "step": 297 }, { "epoch": 0.09, "learning_rate": 7.931480808835577e-05, "loss": 1.4751, "step": 298 }, { "epoch": 0.09, "learning_rate": 7.930778285108876e-05, "loss": 1.4563, "step": 299 }, { "epoch": 0.09, "learning_rate": 7.93007220968988e-05, "loss": 1.4894, "step": 300 }, { "epoch": 0.09, "eval_loss": 0.16949589550495148, "eval_runtime": 24.7827, "eval_samples_per_second": 80.701, "eval_steps_per_second": 0.646, "step": 300 }, { "epoch": 0.09, "learning_rate": 7.929362583216566e-05, "loss": 1.5054, "step": 301 }, { "epoch": 0.09, "learning_rate": 7.92864940633013e-05, "loss": 1.4639, "step": 302 }, { "epoch": 0.09, "learning_rate": 7.927932679674964e-05, "loss": 1.4206, "step": 303 }, { "epoch": 0.09, "learning_rate": 7.927212403898677e-05, "loss": 1.4521, "step": 304 }, { "epoch": 0.09, "learning_rate": 7.926488579652081e-05, "loss": 1.5274, "step": 305 }, { "epoch": 0.09, "learning_rate": 7.925761207589193e-05, "loss": 1.4917, "step": 306 }, { "epoch": 0.09, "learning_rate": 7.925030288367238e-05, "loss": 1.4868, "step": 307 }, { "epoch": 0.09, "learning_rate": 7.924295822646643e-05, "loss": 1.5159, "step": 308 }, { "epoch": 0.09, "learning_rate": 7.923557811091044e-05, "loss": 1.5021, "step": 309 }, { "epoch": 0.09, "learning_rate": 7.922816254367277e-05, "loss": 1.4571, "step": 310 }, { "epoch": 0.09, "learning_rate": 7.922071153145384e-05, "loss": 1.4389, "step": 311 }, { "epoch": 0.09, "learning_rate": 7.921322508098605e-05, "loss": 1.5344, "step": 312 }, { "epoch": 0.09, "learning_rate": 7.92057031990339e-05, "loss": 1.4675, "step": 313 }, { "epoch": 0.09, "learning_rate": 7.919814589239382e-05, "loss": 1.4391, "step": 314 }, { "epoch": 0.09, "learning_rate": 7.919055316789431e-05, "loss": 1.483, "step": 315 }, { "epoch": 0.09, "learning_rate": 7.918292503239584e-05, "loss": 1.4582, "step": 316 }, { "epoch": 0.09, "learning_rate": 7.91752614927909e-05, "loss": 1.4664, "step": 317 }, { "epoch": 0.09, "learning_rate": 7.91675625560039e-05, "loss": 1.4992, "step": 318 }, { "epoch": 0.09, "learning_rate": 7.915982822899136e-05, "loss": 1.4297, "step": 319 }, { "epoch": 0.09, "learning_rate": 7.915205851874168e-05, "loss": 1.5146, "step": 320 }, { "epoch": 0.09, "learning_rate": 7.914425343227523e-05, "loss": 1.4252, "step": 321 }, { "epoch": 0.09, "learning_rate": 7.91364129766444e-05, "loss": 1.3957, "step": 322 }, { "epoch": 0.09, "learning_rate": 7.91285371589335e-05, "loss": 1.4833, "step": 323 }, { "epoch": 0.1, "learning_rate": 7.912062598625879e-05, "loss": 1.4306, "step": 324 }, { "epoch": 0.1, "learning_rate": 7.91126794657685e-05, "loss": 1.4544, "step": 325 }, { "epoch": 0.1, "learning_rate": 7.910469760464277e-05, "loss": 1.442, "step": 326 }, { "epoch": 0.1, "learning_rate": 7.909668041009372e-05, "loss": 1.5333, "step": 327 }, { "epoch": 0.1, "learning_rate": 7.908862788936532e-05, "loss": 1.4602, "step": 328 }, { "epoch": 0.1, "learning_rate": 7.908054004973352e-05, "loss": 1.4749, "step": 329 }, { "epoch": 0.1, "learning_rate": 7.907241689850617e-05, "loss": 1.4597, "step": 330 }, { "epoch": 0.1, "learning_rate": 7.906425844302302e-05, "loss": 1.5203, "step": 331 }, { "epoch": 0.1, "learning_rate": 7.905606469065572e-05, "loss": 1.4625, "step": 332 }, { "epoch": 0.1, "learning_rate": 7.904783564880779e-05, "loss": 1.4267, "step": 333 }, { "epoch": 0.1, "learning_rate": 7.90395713249147e-05, "loss": 1.4752, "step": 334 }, { "epoch": 0.1, "learning_rate": 7.903127172644374e-05, "loss": 1.5467, "step": 335 }, { "epoch": 0.1, "learning_rate": 7.902293686089407e-05, "loss": 1.4317, "step": 336 }, { "epoch": 0.1, "learning_rate": 7.901456673579677e-05, "loss": 1.4016, "step": 337 }, { "epoch": 0.1, "learning_rate": 7.900616135871474e-05, "loss": 1.5466, "step": 338 }, { "epoch": 0.1, "learning_rate": 7.899772073724272e-05, "loss": 1.4207, "step": 339 }, { "epoch": 0.1, "learning_rate": 7.898924487900734e-05, "loss": 1.4678, "step": 340 }, { "epoch": 0.1, "learning_rate": 7.898073379166703e-05, "loss": 1.458, "step": 341 }, { "epoch": 0.1, "learning_rate": 7.897218748291206e-05, "loss": 1.5305, "step": 342 }, { "epoch": 0.1, "learning_rate": 7.896360596046453e-05, "loss": 1.4003, "step": 343 }, { "epoch": 0.1, "learning_rate": 7.895498923207836e-05, "loss": 1.505, "step": 344 }, { "epoch": 0.1, "learning_rate": 7.894633730553928e-05, "loss": 1.4764, "step": 345 }, { "epoch": 0.1, "learning_rate": 7.893765018866482e-05, "loss": 1.4739, "step": 346 }, { "epoch": 0.1, "learning_rate": 7.89289278893043e-05, "loss": 1.4403, "step": 347 }, { "epoch": 0.1, "learning_rate": 7.892017041533886e-05, "loss": 1.4891, "step": 348 }, { "epoch": 0.1, "learning_rate": 7.891137777468136e-05, "loss": 1.4556, "step": 349 }, { "epoch": 0.1, "learning_rate": 7.89025499752765e-05, "loss": 1.5635, "step": 350 }, { "epoch": 0.1, "learning_rate": 7.889368702510073e-05, "loss": 1.5482, "step": 351 }, { "epoch": 0.1, "learning_rate": 7.888478893216223e-05, "loss": 1.4714, "step": 352 }, { "epoch": 0.1, "learning_rate": 7.887585570450098e-05, "loss": 1.5668, "step": 353 }, { "epoch": 0.1, "learning_rate": 7.886688735018866e-05, "loss": 1.6016, "step": 354 }, { "epoch": 0.1, "learning_rate": 7.885788387732874e-05, "loss": 1.6025, "step": 355 }, { "epoch": 0.1, "learning_rate": 7.884884529405636e-05, "loss": 1.5715, "step": 356 }, { "epoch": 0.1, "learning_rate": 7.883977160853847e-05, "loss": 1.6736, "step": 357 }, { "epoch": 0.11, "learning_rate": 7.883066282897362e-05, "loss": 1.61, "step": 358 }, { "epoch": 0.11, "learning_rate": 7.882151896359218e-05, "loss": 1.5429, "step": 359 }, { "epoch": 0.11, "learning_rate": 7.881234002065617e-05, "loss": 1.5019, "step": 360 }, { "epoch": 0.11, "learning_rate": 7.880312600845928e-05, "loss": 1.4937, "step": 361 }, { "epoch": 0.11, "learning_rate": 7.879387693532697e-05, "loss": 1.4798, "step": 362 }, { "epoch": 0.11, "learning_rate": 7.878459280961629e-05, "loss": 1.4661, "step": 363 }, { "epoch": 0.11, "learning_rate": 7.877527363971602e-05, "loss": 1.529, "step": 364 }, { "epoch": 0.11, "learning_rate": 7.876591943404658e-05, "loss": 1.5507, "step": 365 }, { "epoch": 0.11, "learning_rate": 7.875653020106006e-05, "loss": 1.499, "step": 366 }, { "epoch": 0.11, "learning_rate": 7.874710594924017e-05, "loss": 1.4649, "step": 367 }, { "epoch": 0.11, "learning_rate": 7.873764668710228e-05, "loss": 1.5681, "step": 368 }, { "epoch": 0.11, "learning_rate": 7.872815242319343e-05, "loss": 1.4876, "step": 369 }, { "epoch": 0.11, "learning_rate": 7.871862316609224e-05, "loss": 1.5506, "step": 370 }, { "epoch": 0.11, "learning_rate": 7.870905892440895e-05, "loss": 1.5359, "step": 371 }, { "epoch": 0.11, "learning_rate": 7.869945970678541e-05, "loss": 1.5306, "step": 372 }, { "epoch": 0.11, "learning_rate": 7.868982552189514e-05, "loss": 1.489, "step": 373 }, { "epoch": 0.11, "learning_rate": 7.868015637844313e-05, "loss": 1.5582, "step": 374 }, { "epoch": 0.11, "learning_rate": 7.867045228516607e-05, "loss": 1.5321, "step": 375 }, { "epoch": 0.11, "learning_rate": 7.866071325083217e-05, "loss": 1.5471, "step": 376 }, { "epoch": 0.11, "learning_rate": 7.865093928424123e-05, "loss": 1.4685, "step": 377 }, { "epoch": 0.11, "learning_rate": 7.864113039422464e-05, "loss": 1.5347, "step": 378 }, { "epoch": 0.11, "learning_rate": 7.863128658964527e-05, "loss": 1.5319, "step": 379 }, { "epoch": 0.11, "learning_rate": 7.862140787939761e-05, "loss": 1.5242, "step": 380 }, { "epoch": 0.11, "learning_rate": 7.861149427240765e-05, "loss": 1.4791, "step": 381 }, { "epoch": 0.11, "learning_rate": 7.860154577763293e-05, "loss": 1.4532, "step": 382 }, { "epoch": 0.11, "learning_rate": 7.859156240406252e-05, "loss": 1.4927, "step": 383 }, { "epoch": 0.11, "learning_rate": 7.858154416071697e-05, "loss": 1.5222, "step": 384 }, { "epoch": 0.11, "learning_rate": 7.857149105664839e-05, "loss": 1.4811, "step": 385 }, { "epoch": 0.11, "learning_rate": 7.856140310094033e-05, "loss": 1.5013, "step": 386 }, { "epoch": 0.11, "learning_rate": 7.855128030270786e-05, "loss": 1.4733, "step": 387 }, { "epoch": 0.11, "learning_rate": 7.854112267109756e-05, "loss": 1.5002, "step": 388 }, { "epoch": 0.11, "learning_rate": 7.853093021528742e-05, "loss": 1.4687, "step": 389 }, { "epoch": 0.11, "learning_rate": 7.852070294448693e-05, "loss": 1.5572, "step": 390 }, { "epoch": 0.11, "learning_rate": 7.851044086793709e-05, "loss": 1.5006, "step": 391 }, { "epoch": 0.12, "learning_rate": 7.850014399491024e-05, "loss": 1.5307, "step": 392 }, { "epoch": 0.12, "learning_rate": 7.848981233471024e-05, "loss": 1.4129, "step": 393 }, { "epoch": 0.12, "learning_rate": 7.847944589667236e-05, "loss": 1.5944, "step": 394 }, { "epoch": 0.12, "learning_rate": 7.846904469016328e-05, "loss": 1.5001, "step": 395 }, { "epoch": 0.12, "learning_rate": 7.845860872458116e-05, "loss": 1.4848, "step": 396 }, { "epoch": 0.12, "learning_rate": 7.844813800935544e-05, "loss": 1.4631, "step": 397 }, { "epoch": 0.12, "learning_rate": 7.843763255394711e-05, "loss": 1.5068, "step": 398 }, { "epoch": 0.12, "learning_rate": 7.842709236784842e-05, "loss": 1.4764, "step": 399 }, { "epoch": 0.12, "learning_rate": 7.841651746058308e-05, "loss": 1.5033, "step": 400 }, { "epoch": 0.12, "eval_loss": 0.1591062694787979, "eval_runtime": 24.8751, "eval_samples_per_second": 80.402, "eval_steps_per_second": 0.643, "step": 400 }, { "epoch": 0.12, "learning_rate": 7.840590784170614e-05, "loss": 1.5365, "step": 401 }, { "epoch": 0.12, "learning_rate": 7.839526352080403e-05, "loss": 1.5073, "step": 402 }, { "epoch": 0.12, "learning_rate": 7.838458450749452e-05, "loss": 1.429, "step": 403 }, { "epoch": 0.12, "learning_rate": 7.837387081142675e-05, "loss": 1.3831, "step": 404 }, { "epoch": 0.12, "learning_rate": 7.836312244228115e-05, "loss": 1.4639, "step": 405 }, { "epoch": 0.12, "learning_rate": 7.835233940976955e-05, "loss": 1.5145, "step": 406 }, { "epoch": 0.12, "learning_rate": 7.834152172363505e-05, "loss": 1.4166, "step": 407 }, { "epoch": 0.12, "learning_rate": 7.833066939365206e-05, "loss": 1.3999, "step": 408 }, { "epoch": 0.12, "learning_rate": 7.831978242962632e-05, "loss": 1.514, "step": 409 }, { "epoch": 0.12, "learning_rate": 7.830886084139483e-05, "loss": 1.4822, "step": 410 }, { "epoch": 0.12, "learning_rate": 7.829790463882593e-05, "loss": 1.4782, "step": 411 }, { "epoch": 0.12, "learning_rate": 7.828691383181918e-05, "loss": 1.5076, "step": 412 }, { "epoch": 0.12, "learning_rate": 7.827588843030543e-05, "loss": 1.4506, "step": 413 }, { "epoch": 0.12, "learning_rate": 7.826482844424679e-05, "loss": 1.4893, "step": 414 }, { "epoch": 0.12, "learning_rate": 7.825373388363661e-05, "loss": 1.4542, "step": 415 }, { "epoch": 0.12, "learning_rate": 7.824260475849949e-05, "loss": 1.4955, "step": 416 }, { "epoch": 0.12, "learning_rate": 7.823144107889126e-05, "loss": 1.5095, "step": 417 }, { "epoch": 0.12, "learning_rate": 7.822024285489896e-05, "loss": 1.5355, "step": 418 }, { "epoch": 0.12, "learning_rate": 7.820901009664089e-05, "loss": 1.4864, "step": 419 }, { "epoch": 0.12, "learning_rate": 7.819774281426645e-05, "loss": 1.4763, "step": 420 }, { "epoch": 0.12, "learning_rate": 7.818644101795635e-05, "loss": 1.4513, "step": 421 }, { "epoch": 0.12, "learning_rate": 7.817510471792243e-05, "loss": 1.4477, "step": 422 }, { "epoch": 0.12, "learning_rate": 7.81637339244077e-05, "loss": 1.4506, "step": 423 }, { "epoch": 0.12, "learning_rate": 7.815232864768636e-05, "loss": 1.4667, "step": 424 }, { "epoch": 0.12, "learning_rate": 7.814088889806374e-05, "loss": 1.509, "step": 425 }, { "epoch": 0.12, "learning_rate": 7.812941468587636e-05, "loss": 1.5293, "step": 426 }, { "epoch": 0.13, "learning_rate": 7.811790602149182e-05, "loss": 1.4266, "step": 427 }, { "epoch": 0.13, "learning_rate": 7.810636291530893e-05, "loss": 1.4618, "step": 428 }, { "epoch": 0.13, "learning_rate": 7.809478537775753e-05, "loss": 1.4584, "step": 429 }, { "epoch": 0.13, "learning_rate": 7.808317341929863e-05, "loss": 1.544, "step": 430 }, { "epoch": 0.13, "learning_rate": 7.807152705042435e-05, "loss": 1.4677, "step": 431 }, { "epoch": 0.13, "learning_rate": 7.805984628165785e-05, "loss": 1.43, "step": 432 }, { "epoch": 0.13, "learning_rate": 7.804813112355339e-05, "loss": 1.4429, "step": 433 }, { "epoch": 0.13, "learning_rate": 7.803638158669635e-05, "loss": 1.4925, "step": 434 }, { "epoch": 0.13, "learning_rate": 7.80245976817031e-05, "loss": 1.455, "step": 435 }, { "epoch": 0.13, "learning_rate": 7.801277941922114e-05, "loss": 1.4856, "step": 436 }, { "epoch": 0.13, "learning_rate": 7.800092680992893e-05, "loss": 1.436, "step": 437 }, { "epoch": 0.13, "learning_rate": 7.798903986453603e-05, "loss": 1.4624, "step": 438 }, { "epoch": 0.13, "learning_rate": 7.797711859378302e-05, "loss": 1.4458, "step": 439 }, { "epoch": 0.13, "learning_rate": 7.796516300844144e-05, "loss": 1.4932, "step": 440 }, { "epoch": 0.13, "learning_rate": 7.79531731193139e-05, "loss": 1.4725, "step": 441 }, { "epoch": 0.13, "learning_rate": 7.794114893723398e-05, "loss": 1.4608, "step": 442 }, { "epoch": 0.13, "learning_rate": 7.792909047306623e-05, "loss": 1.5006, "step": 443 }, { "epoch": 0.13, "learning_rate": 7.79169977377062e-05, "loss": 1.4568, "step": 444 }, { "epoch": 0.13, "learning_rate": 7.79048707420804e-05, "loss": 1.4616, "step": 445 }, { "epoch": 0.13, "learning_rate": 7.789270949714629e-05, "loss": 1.4262, "step": 446 }, { "epoch": 0.13, "learning_rate": 7.788051401389226e-05, "loss": 1.5236, "step": 447 }, { "epoch": 0.13, "learning_rate": 7.786828430333769e-05, "loss": 1.4592, "step": 448 }, { "epoch": 0.13, "learning_rate": 7.785602037653283e-05, "loss": 1.4754, "step": 449 }, { "epoch": 0.13, "learning_rate": 7.784372224455886e-05, "loss": 1.4002, "step": 450 }, { "epoch": 0.13, "learning_rate": 7.78313899185279e-05, "loss": 1.4484, "step": 451 }, { "epoch": 0.13, "learning_rate": 7.781902340958291e-05, "loss": 1.448, "step": 452 }, { "epoch": 0.13, "learning_rate": 7.78066227288978e-05, "loss": 1.335, "step": 453 }, { "epoch": 0.13, "learning_rate": 7.779418788767732e-05, "loss": 1.476, "step": 454 }, { "epoch": 0.13, "learning_rate": 7.778171889715706e-05, "loss": 1.5081, "step": 455 }, { "epoch": 0.13, "learning_rate": 7.776921576860351e-05, "loss": 1.4232, "step": 456 }, { "epoch": 0.13, "learning_rate": 7.775667851331402e-05, "loss": 1.4606, "step": 457 }, { "epoch": 0.13, "learning_rate": 7.77441071426167e-05, "loss": 1.4605, "step": 458 }, { "epoch": 0.13, "learning_rate": 7.773150166787057e-05, "loss": 1.4318, "step": 459 }, { "epoch": 0.13, "learning_rate": 7.771886210046541e-05, "loss": 1.4087, "step": 460 }, { "epoch": 0.14, "learning_rate": 7.770618845182182e-05, "loss": 1.4923, "step": 461 }, { "epoch": 0.14, "learning_rate": 7.76934807333912e-05, "loss": 1.4581, "step": 462 }, { "epoch": 0.14, "learning_rate": 7.768073895665573e-05, "loss": 1.4013, "step": 463 }, { "epoch": 0.14, "learning_rate": 7.766796313312836e-05, "loss": 1.4862, "step": 464 }, { "epoch": 0.14, "learning_rate": 7.765515327435282e-05, "loss": 1.457, "step": 465 }, { "epoch": 0.14, "learning_rate": 7.764230939190358e-05, "loss": 1.4757, "step": 466 }, { "epoch": 0.14, "learning_rate": 7.762943149738583e-05, "loss": 1.458, "step": 467 }, { "epoch": 0.14, "learning_rate": 7.761651960243554e-05, "loss": 1.4541, "step": 468 }, { "epoch": 0.14, "learning_rate": 7.760357371871935e-05, "loss": 1.4809, "step": 469 }, { "epoch": 0.14, "learning_rate": 7.759059385793466e-05, "loss": 1.4987, "step": 470 }, { "epoch": 0.14, "learning_rate": 7.757758003180956e-05, "loss": 1.4491, "step": 471 }, { "epoch": 0.14, "learning_rate": 7.756453225210279e-05, "loss": 1.39, "step": 472 }, { "epoch": 0.14, "learning_rate": 7.755145053060378e-05, "loss": 1.4782, "step": 473 }, { "epoch": 0.14, "learning_rate": 7.753833487913269e-05, "loss": 1.4619, "step": 474 }, { "epoch": 0.14, "learning_rate": 7.752518530954027e-05, "loss": 1.4314, "step": 475 }, { "epoch": 0.14, "learning_rate": 7.751200183370792e-05, "loss": 1.4804, "step": 476 }, { "epoch": 0.14, "learning_rate": 7.749878446354775e-05, "loss": 1.5127, "step": 477 }, { "epoch": 0.14, "learning_rate": 7.748553321100238e-05, "loss": 1.4101, "step": 478 }, { "epoch": 0.14, "learning_rate": 7.747224808804514e-05, "loss": 1.4358, "step": 479 }, { "epoch": 0.14, "learning_rate": 7.745892910667991e-05, "loss": 1.4502, "step": 480 }, { "epoch": 0.14, "learning_rate": 7.744557627894122e-05, "loss": 1.4834, "step": 481 }, { "epoch": 0.14, "learning_rate": 7.74321896168941e-05, "loss": 1.4163, "step": 482 }, { "epoch": 0.14, "learning_rate": 7.741876913263422e-05, "loss": 1.4211, "step": 483 }, { "epoch": 0.14, "learning_rate": 7.740531483828779e-05, "loss": 1.4433, "step": 484 }, { "epoch": 0.14, "learning_rate": 7.739182674601155e-05, "loss": 1.5171, "step": 485 }, { "epoch": 0.14, "learning_rate": 7.737830486799283e-05, "loss": 1.4501, "step": 486 }, { "epoch": 0.14, "learning_rate": 7.73647492164494e-05, "loss": 1.4429, "step": 487 }, { "epoch": 0.14, "learning_rate": 7.735115980362964e-05, "loss": 1.4322, "step": 488 }, { "epoch": 0.14, "learning_rate": 7.733753664181238e-05, "loss": 1.5037, "step": 489 }, { "epoch": 0.14, "learning_rate": 7.732387974330695e-05, "loss": 1.3909, "step": 490 }, { "epoch": 0.14, "learning_rate": 7.731018912045317e-05, "loss": 1.3948, "step": 491 }, { "epoch": 0.14, "learning_rate": 7.729646478562135e-05, "loss": 1.4809, "step": 492 }, { "epoch": 0.14, "learning_rate": 7.728270675121224e-05, "loss": 1.4446, "step": 493 }, { "epoch": 0.14, "learning_rate": 7.726891502965703e-05, "loss": 1.4451, "step": 494 }, { "epoch": 0.15, "learning_rate": 7.725508963341738e-05, "loss": 1.4505, "step": 495 }, { "epoch": 0.15, "learning_rate": 7.724123057498535e-05, "loss": 1.4686, "step": 496 }, { "epoch": 0.15, "learning_rate": 7.722733786688342e-05, "loss": 1.4688, "step": 497 }, { "epoch": 0.15, "learning_rate": 7.721341152166448e-05, "loss": 1.4049, "step": 498 }, { "epoch": 0.15, "learning_rate": 7.719945155191182e-05, "loss": 1.4108, "step": 499 }, { "epoch": 0.15, "learning_rate": 7.718545797023911e-05, "loss": 1.4831, "step": 500 }, { "epoch": 0.15, "eval_loss": 0.15852396190166473, "eval_runtime": 24.8605, "eval_samples_per_second": 80.449, "eval_steps_per_second": 0.644, "step": 500 }, { "epoch": 0.15, "learning_rate": 7.717143078929036e-05, "loss": 1.3533, "step": 501 }, { "epoch": 0.15, "learning_rate": 7.715737002174e-05, "loss": 1.4558, "step": 502 }, { "epoch": 0.15, "learning_rate": 7.714327568029272e-05, "loss": 1.4789, "step": 503 }, { "epoch": 0.15, "learning_rate": 7.712914777768363e-05, "loss": 1.4575, "step": 504 }, { "epoch": 0.15, "learning_rate": 7.711498632667815e-05, "loss": 1.444, "step": 505 }, { "epoch": 0.15, "learning_rate": 7.710079134007195e-05, "loss": 1.4473, "step": 506 }, { "epoch": 0.15, "learning_rate": 7.708656283069108e-05, "loss": 1.4423, "step": 507 }, { "epoch": 0.15, "learning_rate": 7.707230081139184e-05, "loss": 1.4372, "step": 508 }, { "epoch": 0.15, "learning_rate": 7.705800529506078e-05, "loss": 1.4089, "step": 509 }, { "epoch": 0.15, "learning_rate": 7.70436762946148e-05, "loss": 1.4312, "step": 510 }, { "epoch": 0.15, "learning_rate": 7.702931382300099e-05, "loss": 1.4583, "step": 511 }, { "epoch": 0.15, "learning_rate": 7.701491789319668e-05, "loss": 1.4665, "step": 512 }, { "epoch": 0.15, "learning_rate": 7.700048851820946e-05, "loss": 1.4332, "step": 513 }, { "epoch": 0.15, "learning_rate": 7.698602571107715e-05, "loss": 1.4066, "step": 514 }, { "epoch": 0.15, "learning_rate": 7.697152948486771e-05, "loss": 1.4632, "step": 515 }, { "epoch": 0.15, "learning_rate": 7.695699985267938e-05, "loss": 1.4606, "step": 516 }, { "epoch": 0.15, "learning_rate": 7.694243682764054e-05, "loss": 1.4023, "step": 517 }, { "epoch": 0.15, "learning_rate": 7.692784042290976e-05, "loss": 1.3851, "step": 518 }, { "epoch": 0.15, "learning_rate": 7.691321065167575e-05, "loss": 1.5098, "step": 519 }, { "epoch": 0.15, "learning_rate": 7.689854752715738e-05, "loss": 1.4204, "step": 520 }, { "epoch": 0.15, "learning_rate": 7.688385106260367e-05, "loss": 1.4667, "step": 521 }, { "epoch": 0.15, "learning_rate": 7.686912127129373e-05, "loss": 1.3986, "step": 522 }, { "epoch": 0.15, "learning_rate": 7.685435816653681e-05, "loss": 1.4379, "step": 523 }, { "epoch": 0.15, "learning_rate": 7.683956176167227e-05, "loss": 1.4095, "step": 524 }, { "epoch": 0.15, "learning_rate": 7.682473207006952e-05, "loss": 1.3978, "step": 525 }, { "epoch": 0.15, "learning_rate": 7.680986910512808e-05, "loss": 1.4799, "step": 526 }, { "epoch": 0.15, "learning_rate": 7.679497288027752e-05, "loss": 1.3983, "step": 527 }, { "epoch": 0.15, "learning_rate": 7.678004340897747e-05, "loss": 1.4198, "step": 528 }, { "epoch": 0.16, "learning_rate": 7.676508070471761e-05, "loss": 1.4455, "step": 529 }, { "epoch": 0.16, "learning_rate": 7.675008478101761e-05, "loss": 1.4528, "step": 530 }, { "epoch": 0.16, "learning_rate": 7.673505565142721e-05, "loss": 1.3844, "step": 531 }, { "epoch": 0.16, "learning_rate": 7.671999332952609e-05, "loss": 1.5005, "step": 532 }, { "epoch": 0.16, "learning_rate": 7.670489782892396e-05, "loss": 1.3887, "step": 533 }, { "epoch": 0.16, "learning_rate": 7.668976916326055e-05, "loss": 1.507, "step": 534 }, { "epoch": 0.16, "learning_rate": 7.667460734620545e-05, "loss": 1.4026, "step": 535 }, { "epoch": 0.16, "learning_rate": 7.665941239145829e-05, "loss": 1.4413, "step": 536 }, { "epoch": 0.16, "learning_rate": 7.664418431274861e-05, "loss": 1.4717, "step": 537 }, { "epoch": 0.16, "learning_rate": 7.662892312383592e-05, "loss": 1.4155, "step": 538 }, { "epoch": 0.16, "learning_rate": 7.661362883850955e-05, "loss": 1.3942, "step": 539 }, { "epoch": 0.16, "learning_rate": 7.659830147058885e-05, "loss": 1.4519, "step": 540 }, { "epoch": 0.16, "learning_rate": 7.658294103392299e-05, "loss": 1.4495, "step": 541 }, { "epoch": 0.16, "learning_rate": 7.656754754239105e-05, "loss": 1.4263, "step": 542 }, { "epoch": 0.16, "learning_rate": 7.655212100990195e-05, "loss": 1.4372, "step": 543 }, { "epoch": 0.16, "learning_rate": 7.65366614503945e-05, "loss": 1.416, "step": 544 }, { "epoch": 0.16, "learning_rate": 7.652116887783731e-05, "loss": 1.4372, "step": 545 }, { "epoch": 0.16, "learning_rate": 7.650564330622886e-05, "loss": 1.422, "step": 546 }, { "epoch": 0.16, "learning_rate": 7.649008474959743e-05, "loss": 1.4067, "step": 547 }, { "epoch": 0.16, "learning_rate": 7.647449322200108e-05, "loss": 1.4077, "step": 548 }, { "epoch": 0.16, "learning_rate": 7.645886873752771e-05, "loss": 1.406, "step": 549 }, { "epoch": 0.16, "learning_rate": 7.644321131029497e-05, "loss": 1.3987, "step": 550 }, { "epoch": 0.16, "learning_rate": 7.642752095445027e-05, "loss": 1.4265, "step": 551 }, { "epoch": 0.16, "learning_rate": 7.641179768417076e-05, "loss": 1.4959, "step": 552 }, { "epoch": 0.16, "learning_rate": 7.639604151366339e-05, "loss": 1.3929, "step": 553 }, { "epoch": 0.16, "learning_rate": 7.638025245716478e-05, "loss": 1.3676, "step": 554 }, { "epoch": 0.16, "learning_rate": 7.636443052894126e-05, "loss": 1.3983, "step": 555 }, { "epoch": 0.16, "learning_rate": 7.634857574328892e-05, "loss": 1.4391, "step": 556 }, { "epoch": 0.16, "learning_rate": 7.633268811453351e-05, "loss": 1.4234, "step": 557 }, { "epoch": 0.16, "learning_rate": 7.631676765703042e-05, "loss": 1.4307, "step": 558 }, { "epoch": 0.16, "learning_rate": 7.630081438516474e-05, "loss": 1.4127, "step": 559 }, { "epoch": 0.16, "learning_rate": 7.628482831335121e-05, "loss": 1.4341, "step": 560 }, { "epoch": 0.16, "learning_rate": 7.626880945603418e-05, "loss": 1.3882, "step": 561 }, { "epoch": 0.16, "learning_rate": 7.625275782768765e-05, "loss": 1.3935, "step": 562 }, { "epoch": 0.17, "learning_rate": 7.623667344281522e-05, "loss": 1.4045, "step": 563 }, { "epoch": 0.17, "learning_rate": 7.62205563159501e-05, "loss": 1.439, "step": 564 }, { "epoch": 0.17, "learning_rate": 7.620440646165508e-05, "loss": 1.3988, "step": 565 }, { "epoch": 0.17, "learning_rate": 7.61882238945225e-05, "loss": 1.434, "step": 566 }, { "epoch": 0.17, "learning_rate": 7.617200862917427e-05, "loss": 1.4462, "step": 567 }, { "epoch": 0.17, "learning_rate": 7.615576068026187e-05, "loss": 1.4164, "step": 568 }, { "epoch": 0.17, "learning_rate": 7.613948006246626e-05, "loss": 1.391, "step": 569 }, { "epoch": 0.17, "learning_rate": 7.612316679049796e-05, "loss": 1.4269, "step": 570 }, { "epoch": 0.17, "learning_rate": 7.610682087909699e-05, "loss": 1.501, "step": 571 }, { "epoch": 0.17, "learning_rate": 7.609044234303286e-05, "loss": 1.3566, "step": 572 }, { "epoch": 0.17, "learning_rate": 7.607403119710453e-05, "loss": 1.4112, "step": 573 }, { "epoch": 0.17, "learning_rate": 7.605758745614046e-05, "loss": 1.3702, "step": 574 }, { "epoch": 0.17, "learning_rate": 7.604111113499856e-05, "loss": 1.47, "step": 575 }, { "epoch": 0.17, "learning_rate": 7.602460224856615e-05, "loss": 1.3678, "step": 576 }, { "epoch": 0.17, "learning_rate": 7.600806081176e-05, "loss": 1.3715, "step": 577 }, { "epoch": 0.17, "learning_rate": 7.599148683952628e-05, "loss": 1.4038, "step": 578 }, { "epoch": 0.17, "learning_rate": 7.597488034684058e-05, "loss": 1.3995, "step": 579 }, { "epoch": 0.17, "learning_rate": 7.595824134870782e-05, "loss": 1.4314, "step": 580 }, { "epoch": 0.17, "learning_rate": 7.594156986016235e-05, "loss": 1.4516, "step": 581 }, { "epoch": 0.17, "learning_rate": 7.592486589626784e-05, "loss": 1.4193, "step": 582 }, { "epoch": 0.17, "learning_rate": 7.590812947211733e-05, "loss": 1.3927, "step": 583 }, { "epoch": 0.17, "learning_rate": 7.589136060283318e-05, "loss": 1.4211, "step": 584 }, { "epoch": 0.17, "learning_rate": 7.587455930356704e-05, "loss": 1.4091, "step": 585 }, { "epoch": 0.17, "learning_rate": 7.585772558949989e-05, "loss": 1.485, "step": 586 }, { "epoch": 0.17, "learning_rate": 7.584085947584202e-05, "loss": 1.4084, "step": 587 }, { "epoch": 0.17, "learning_rate": 7.582396097783294e-05, "loss": 1.4219, "step": 588 }, { "epoch": 0.17, "learning_rate": 7.580703011074147e-05, "loss": 1.4331, "step": 589 }, { "epoch": 0.17, "learning_rate": 7.579006688986563e-05, "loss": 1.4127, "step": 590 }, { "epoch": 0.17, "learning_rate": 7.57730713305327e-05, "loss": 1.4424, "step": 591 }, { "epoch": 0.17, "learning_rate": 7.575604344809921e-05, "loss": 1.4335, "step": 592 }, { "epoch": 0.17, "learning_rate": 7.573898325795086e-05, "loss": 1.4293, "step": 593 }, { "epoch": 0.17, "learning_rate": 7.572189077550253e-05, "loss": 1.4775, "step": 594 }, { "epoch": 0.17, "learning_rate": 7.57047660161983e-05, "loss": 1.3645, "step": 595 }, { "epoch": 0.17, "learning_rate": 7.568760899551142e-05, "loss": 1.4359, "step": 596 }, { "epoch": 0.18, "learning_rate": 7.567041972894428e-05, "loss": 1.4445, "step": 597 }, { "epoch": 0.18, "learning_rate": 7.565319823202838e-05, "loss": 1.4056, "step": 598 }, { "epoch": 0.18, "learning_rate": 7.563594452032441e-05, "loss": 1.399, "step": 599 }, { "epoch": 0.18, "learning_rate": 7.561865860942209e-05, "loss": 1.3824, "step": 600 }, { "epoch": 0.18, "eval_loss": 0.14922283589839935, "eval_runtime": 24.4837, "eval_samples_per_second": 81.687, "eval_steps_per_second": 0.653, "step": 600 }, { "epoch": 0.18, "learning_rate": 7.560134051494031e-05, "loss": 1.4521, "step": 601 }, { "epoch": 0.18, "learning_rate": 7.558399025252695e-05, "loss": 1.4126, "step": 602 }, { "epoch": 0.18, "learning_rate": 7.556660783785904e-05, "loss": 1.4326, "step": 603 }, { "epoch": 0.18, "learning_rate": 7.554919328664262e-05, "loss": 1.3813, "step": 604 }, { "epoch": 0.18, "learning_rate": 7.553174661461276e-05, "loss": 1.4193, "step": 605 }, { "epoch": 0.18, "learning_rate": 7.551426783753359e-05, "loss": 1.3702, "step": 606 }, { "epoch": 0.18, "learning_rate": 7.549675697119822e-05, "loss": 1.4451, "step": 607 }, { "epoch": 0.18, "learning_rate": 7.547921403142874e-05, "loss": 1.4015, "step": 608 }, { "epoch": 0.18, "learning_rate": 7.546163903407627e-05, "loss": 1.4294, "step": 609 }, { "epoch": 0.18, "learning_rate": 7.544403199502084e-05, "loss": 1.3551, "step": 610 }, { "epoch": 0.18, "learning_rate": 7.542639293017148e-05, "loss": 1.389, "step": 611 }, { "epoch": 0.18, "learning_rate": 7.540872185546614e-05, "loss": 1.4239, "step": 612 }, { "epoch": 0.18, "learning_rate": 7.539101878687165e-05, "loss": 1.4485, "step": 613 }, { "epoch": 0.18, "learning_rate": 7.537328374038383e-05, "loss": 1.4248, "step": 614 }, { "epoch": 0.18, "learning_rate": 7.535551673202734e-05, "loss": 1.3853, "step": 615 }, { "epoch": 0.18, "learning_rate": 7.533771777785572e-05, "loss": 1.4486, "step": 616 }, { "epoch": 0.18, "learning_rate": 7.531988689395137e-05, "loss": 1.3802, "step": 617 }, { "epoch": 0.18, "learning_rate": 7.53020240964256e-05, "loss": 1.4025, "step": 618 }, { "epoch": 0.18, "learning_rate": 7.528412940141847e-05, "loss": 1.4193, "step": 619 }, { "epoch": 0.18, "learning_rate": 7.526620282509891e-05, "loss": 1.4409, "step": 620 }, { "epoch": 0.18, "learning_rate": 7.524824438366468e-05, "loss": 1.4012, "step": 621 }, { "epoch": 0.18, "learning_rate": 7.523025409334227e-05, "loss": 1.3932, "step": 622 }, { "epoch": 0.18, "learning_rate": 7.5212231970387e-05, "loss": 1.4654, "step": 623 }, { "epoch": 0.18, "learning_rate": 7.51941780310829e-05, "loss": 1.4722, "step": 624 }, { "epoch": 0.18, "learning_rate": 7.517609229174281e-05, "loss": 1.4342, "step": 625 }, { "epoch": 0.18, "learning_rate": 7.515797476870827e-05, "loss": 1.3967, "step": 626 }, { "epoch": 0.18, "learning_rate": 7.513982547834952e-05, "loss": 1.4339, "step": 627 }, { "epoch": 0.18, "learning_rate": 7.512164443706555e-05, "loss": 1.4651, "step": 628 }, { "epoch": 0.18, "learning_rate": 7.510343166128399e-05, "loss": 1.4477, "step": 629 }, { "epoch": 0.18, "learning_rate": 7.508518716746118e-05, "loss": 1.4284, "step": 630 }, { "epoch": 0.19, "learning_rate": 7.506691097208211e-05, "loss": 1.4734, "step": 631 }, { "epoch": 0.19, "learning_rate": 7.504860309166042e-05, "loss": 1.3807, "step": 632 }, { "epoch": 0.19, "learning_rate": 7.503026354273834e-05, "loss": 1.414, "step": 633 }, { "epoch": 0.19, "learning_rate": 7.501189234188677e-05, "loss": 1.398, "step": 634 }, { "epoch": 0.19, "learning_rate": 7.499348950570518e-05, "loss": 1.4523, "step": 635 }, { "epoch": 0.19, "learning_rate": 7.497505505082161e-05, "loss": 1.3659, "step": 636 }, { "epoch": 0.19, "learning_rate": 7.495658899389271e-05, "loss": 1.4372, "step": 637 }, { "epoch": 0.19, "learning_rate": 7.493809135160367e-05, "loss": 1.381, "step": 638 }, { "epoch": 0.19, "learning_rate": 7.491956214066818e-05, "loss": 1.4524, "step": 639 }, { "epoch": 0.19, "learning_rate": 7.490100137782851e-05, "loss": 1.3724, "step": 640 }, { "epoch": 0.19, "learning_rate": 7.48824090798554e-05, "loss": 1.4382, "step": 641 }, { "epoch": 0.19, "learning_rate": 7.486378526354812e-05, "loss": 1.4203, "step": 642 }, { "epoch": 0.19, "learning_rate": 7.484512994573438e-05, "loss": 1.3743, "step": 643 }, { "epoch": 0.19, "learning_rate": 7.482644314327036e-05, "loss": 1.419, "step": 644 }, { "epoch": 0.19, "learning_rate": 7.480772487304072e-05, "loss": 1.4332, "step": 645 }, { "epoch": 0.19, "learning_rate": 7.478897515195852e-05, "loss": 1.4577, "step": 646 }, { "epoch": 0.19, "learning_rate": 7.477019399696528e-05, "loss": 1.3823, "step": 647 }, { "epoch": 0.19, "learning_rate": 7.475138142503083e-05, "loss": 1.4017, "step": 648 }, { "epoch": 0.19, "learning_rate": 7.473253745315352e-05, "loss": 1.4017, "step": 649 }, { "epoch": 0.19, "learning_rate": 7.471366209835994e-05, "loss": 1.4157, "step": 650 }, { "epoch": 0.19, "learning_rate": 7.469475537770513e-05, "loss": 1.373, "step": 651 }, { "epoch": 0.19, "learning_rate": 7.467581730827244e-05, "loss": 1.4197, "step": 652 }, { "epoch": 0.19, "learning_rate": 7.465684790717354e-05, "loss": 1.3704, "step": 653 }, { "epoch": 0.19, "learning_rate": 7.46378471915484e-05, "loss": 1.3811, "step": 654 }, { "epoch": 0.19, "learning_rate": 7.46188151785653e-05, "loss": 1.3896, "step": 655 }, { "epoch": 0.19, "learning_rate": 7.459975188542082e-05, "loss": 1.38, "step": 656 }, { "epoch": 0.19, "learning_rate": 7.458065732933976e-05, "loss": 1.4459, "step": 657 }, { "epoch": 0.19, "learning_rate": 7.45615315275752e-05, "loss": 1.4214, "step": 658 }, { "epoch": 0.19, "learning_rate": 7.454237449740843e-05, "loss": 1.3855, "step": 659 }, { "epoch": 0.19, "learning_rate": 7.452318625614898e-05, "loss": 1.3763, "step": 660 }, { "epoch": 0.19, "learning_rate": 7.450396682113453e-05, "loss": 1.4183, "step": 661 }, { "epoch": 0.19, "learning_rate": 7.448471620973106e-05, "loss": 1.4542, "step": 662 }, { "epoch": 0.19, "learning_rate": 7.446543443933258e-05, "loss": 1.3802, "step": 663 }, { "epoch": 0.19, "learning_rate": 7.444612152736135e-05, "loss": 1.4174, "step": 664 }, { "epoch": 0.2, "learning_rate": 7.44267774912677e-05, "loss": 1.4064, "step": 665 }, { "epoch": 0.2, "learning_rate": 7.440740234853017e-05, "loss": 1.4343, "step": 666 }, { "epoch": 0.2, "learning_rate": 7.438799611665531e-05, "loss": 1.3878, "step": 667 }, { "epoch": 0.2, "learning_rate": 7.436855881317784e-05, "loss": 1.3862, "step": 668 }, { "epoch": 0.2, "learning_rate": 7.43490904556605e-05, "loss": 1.4194, "step": 669 }, { "epoch": 0.2, "learning_rate": 7.432959106169412e-05, "loss": 1.3693, "step": 670 }, { "epoch": 0.2, "learning_rate": 7.431006064889755e-05, "loss": 1.3839, "step": 671 }, { "epoch": 0.2, "learning_rate": 7.42904992349177e-05, "loss": 1.4427, "step": 672 }, { "epoch": 0.2, "learning_rate": 7.427090683742947e-05, "loss": 1.4084, "step": 673 }, { "epoch": 0.2, "learning_rate": 7.425128347413574e-05, "loss": 1.4009, "step": 674 }, { "epoch": 0.2, "learning_rate": 7.42316291627674e-05, "loss": 1.4115, "step": 675 }, { "epoch": 0.2, "learning_rate": 7.42119439210833e-05, "loss": 1.4218, "step": 676 }, { "epoch": 0.2, "learning_rate": 7.419222776687022e-05, "loss": 1.4334, "step": 677 }, { "epoch": 0.2, "learning_rate": 7.417248071794288e-05, "loss": 1.4145, "step": 678 }, { "epoch": 0.2, "learning_rate": 7.415270279214392e-05, "loss": 1.3816, "step": 679 }, { "epoch": 0.2, "learning_rate": 7.413289400734385e-05, "loss": 1.413, "step": 680 }, { "epoch": 0.2, "learning_rate": 7.411305438144111e-05, "loss": 1.3879, "step": 681 }, { "epoch": 0.2, "learning_rate": 7.409318393236199e-05, "loss": 1.3713, "step": 682 }, { "epoch": 0.2, "learning_rate": 7.40732826780606e-05, "loss": 1.4331, "step": 683 }, { "epoch": 0.2, "learning_rate": 7.405335063651893e-05, "loss": 1.4204, "step": 684 }, { "epoch": 0.2, "learning_rate": 7.403338782574674e-05, "loss": 1.4173, "step": 685 }, { "epoch": 0.2, "learning_rate": 7.401339426378165e-05, "loss": 1.3759, "step": 686 }, { "epoch": 0.2, "learning_rate": 7.399336996868902e-05, "loss": 1.4106, "step": 687 }, { "epoch": 0.2, "learning_rate": 7.397331495856199e-05, "loss": 1.3805, "step": 688 }, { "epoch": 0.2, "learning_rate": 7.395322925152145e-05, "loss": 1.4235, "step": 689 }, { "epoch": 0.2, "learning_rate": 7.393311286571607e-05, "loss": 1.3466, "step": 690 }, { "epoch": 0.2, "learning_rate": 7.391296581932216e-05, "loss": 1.4122, "step": 691 }, { "epoch": 0.2, "learning_rate": 7.389278813054378e-05, "loss": 1.3957, "step": 692 }, { "epoch": 0.2, "learning_rate": 7.38725798176127e-05, "loss": 1.3724, "step": 693 }, { "epoch": 0.2, "learning_rate": 7.385234089878831e-05, "loss": 1.4261, "step": 694 }, { "epoch": 0.2, "learning_rate": 7.383207139235769e-05, "loss": 1.4567, "step": 695 }, { "epoch": 0.2, "learning_rate": 7.381177131663555e-05, "loss": 1.4201, "step": 696 }, { "epoch": 0.2, "learning_rate": 7.37914406899642e-05, "loss": 1.4183, "step": 697 }, { "epoch": 0.2, "learning_rate": 7.37710795307136e-05, "loss": 1.3535, "step": 698 }, { "epoch": 0.21, "learning_rate": 7.375068785728124e-05, "loss": 1.4394, "step": 699 }, { "epoch": 0.21, "learning_rate": 7.373026568809224e-05, "loss": 1.4229, "step": 700 }, { "epoch": 0.21, "eval_loss": 0.14763599634170532, "eval_runtime": 25.4163, "eval_samples_per_second": 78.69, "eval_steps_per_second": 0.63, "step": 700 }, { "epoch": 0.21, "learning_rate": 7.370981304159921e-05, "loss": 1.3756, "step": 701 }, { "epoch": 0.21, "learning_rate": 7.368932993628237e-05, "loss": 1.4517, "step": 702 }, { "epoch": 0.21, "learning_rate": 7.36688163906494e-05, "loss": 1.3883, "step": 703 }, { "epoch": 0.21, "learning_rate": 7.364827242323554e-05, "loss": 1.4049, "step": 704 }, { "epoch": 0.21, "learning_rate": 7.362769805260345e-05, "loss": 1.3641, "step": 705 }, { "epoch": 0.21, "learning_rate": 7.360709329734332e-05, "loss": 1.4187, "step": 706 }, { "epoch": 0.21, "learning_rate": 7.358645817607279e-05, "loss": 1.3369, "step": 707 }, { "epoch": 0.21, "learning_rate": 7.356579270743689e-05, "loss": 1.3943, "step": 708 }, { "epoch": 0.21, "learning_rate": 7.354509691010813e-05, "loss": 1.3244, "step": 709 }, { "epoch": 0.21, "learning_rate": 7.352437080278637e-05, "loss": 1.429, "step": 710 }, { "epoch": 0.21, "learning_rate": 7.35036144041989e-05, "loss": 1.3726, "step": 711 }, { "epoch": 0.21, "learning_rate": 7.348282773310035e-05, "loss": 1.4076, "step": 712 }, { "epoch": 0.21, "learning_rate": 7.346201080827272e-05, "loss": 1.3623, "step": 713 }, { "epoch": 0.21, "learning_rate": 7.344116364852534e-05, "loss": 1.4287, "step": 714 }, { "epoch": 0.21, "learning_rate": 7.342028627269485e-05, "loss": 1.369, "step": 715 }, { "epoch": 0.21, "learning_rate": 7.339937869964524e-05, "loss": 1.3475, "step": 716 }, { "epoch": 0.21, "learning_rate": 7.337844094826771e-05, "loss": 1.4277, "step": 717 }, { "epoch": 0.21, "learning_rate": 7.335747303748079e-05, "loss": 1.4137, "step": 718 }, { "epoch": 0.21, "learning_rate": 7.333647498623022e-05, "loss": 1.4134, "step": 719 }, { "epoch": 0.21, "learning_rate": 7.331544681348898e-05, "loss": 1.3573, "step": 720 }, { "epoch": 0.21, "learning_rate": 7.32943885382573e-05, "loss": 1.4284, "step": 721 }, { "epoch": 0.21, "learning_rate": 7.327330017956259e-05, "loss": 1.4261, "step": 722 }, { "epoch": 0.21, "learning_rate": 7.325218175645942e-05, "loss": 1.4344, "step": 723 }, { "epoch": 0.21, "learning_rate": 7.323103328802954e-05, "loss": 1.3898, "step": 724 }, { "epoch": 0.21, "learning_rate": 7.320985479338187e-05, "loss": 1.4311, "step": 725 }, { "epoch": 0.21, "learning_rate": 7.318864629165242e-05, "loss": 1.3822, "step": 726 }, { "epoch": 0.21, "learning_rate": 7.316740780200434e-05, "loss": 1.3852, "step": 727 }, { "epoch": 0.21, "learning_rate": 7.314613934362788e-05, "loss": 1.3979, "step": 728 }, { "epoch": 0.21, "learning_rate": 7.312484093574035e-05, "loss": 1.3993, "step": 729 }, { "epoch": 0.21, "learning_rate": 7.310351259758614e-05, "loss": 1.3642, "step": 730 }, { "epoch": 0.21, "learning_rate": 7.308215434843668e-05, "loss": 1.3717, "step": 731 }, { "epoch": 0.21, "learning_rate": 7.30607662075904e-05, "loss": 1.3792, "step": 732 }, { "epoch": 0.22, "learning_rate": 7.303934819437277e-05, "loss": 1.4255, "step": 733 }, { "epoch": 0.22, "learning_rate": 7.301790032813622e-05, "loss": 1.3832, "step": 734 }, { "epoch": 0.22, "learning_rate": 7.299642262826019e-05, "loss": 1.4055, "step": 735 }, { "epoch": 0.22, "learning_rate": 7.297491511415109e-05, "loss": 1.384, "step": 736 }, { "epoch": 0.22, "learning_rate": 7.29533778052422e-05, "loss": 1.4092, "step": 737 }, { "epoch": 0.22, "learning_rate": 7.293181072099377e-05, "loss": 1.4142, "step": 738 }, { "epoch": 0.22, "learning_rate": 7.291021388089295e-05, "loss": 1.3888, "step": 739 }, { "epoch": 0.22, "learning_rate": 7.288858730445378e-05, "loss": 1.3622, "step": 740 }, { "epoch": 0.22, "learning_rate": 7.286693101121714e-05, "loss": 1.3975, "step": 741 }, { "epoch": 0.22, "learning_rate": 7.28452450207508e-05, "loss": 1.3726, "step": 742 }, { "epoch": 0.22, "learning_rate": 7.282352935264934e-05, "loss": 1.419, "step": 743 }, { "epoch": 0.22, "learning_rate": 7.280178402653415e-05, "loss": 1.4451, "step": 744 }, { "epoch": 0.22, "learning_rate": 7.278000906205343e-05, "loss": 1.393, "step": 745 }, { "epoch": 0.22, "learning_rate": 7.275820447888216e-05, "loss": 1.3612, "step": 746 }, { "epoch": 0.22, "learning_rate": 7.273637029672208e-05, "loss": 1.341, "step": 747 }, { "epoch": 0.22, "learning_rate": 7.271450653530167e-05, "loss": 1.4138, "step": 748 }, { "epoch": 0.22, "learning_rate": 7.269261321437616e-05, "loss": 1.3845, "step": 749 }, { "epoch": 0.22, "learning_rate": 7.267069035372742e-05, "loss": 1.3907, "step": 750 }, { "epoch": 0.22, "learning_rate": 7.264873797316412e-05, "loss": 1.4135, "step": 751 }, { "epoch": 0.22, "learning_rate": 7.262675609252151e-05, "loss": 1.3303, "step": 752 }, { "epoch": 0.22, "learning_rate": 7.260474473166154e-05, "loss": 1.4013, "step": 753 }, { "epoch": 0.22, "learning_rate": 7.258270391047279e-05, "loss": 1.3951, "step": 754 }, { "epoch": 0.22, "learning_rate": 7.256063364887043e-05, "loss": 1.3926, "step": 755 }, { "epoch": 0.22, "learning_rate": 7.253853396679628e-05, "loss": 1.329, "step": 756 }, { "epoch": 0.22, "learning_rate": 7.25164048842187e-05, "loss": 1.394, "step": 757 }, { "epoch": 0.22, "learning_rate": 7.249424642113266e-05, "loss": 1.3518, "step": 758 }, { "epoch": 0.22, "learning_rate": 7.247205859755962e-05, "loss": 1.3932, "step": 759 }, { "epoch": 0.22, "learning_rate": 7.244984143354763e-05, "loss": 1.3802, "step": 760 }, { "epoch": 0.22, "learning_rate": 7.24275949491712e-05, "loss": 1.3144, "step": 761 }, { "epoch": 0.22, "learning_rate": 7.240531916453136e-05, "loss": 1.4218, "step": 762 }, { "epoch": 0.22, "learning_rate": 7.238301409975561e-05, "loss": 1.3826, "step": 763 }, { "epoch": 0.22, "learning_rate": 7.236067977499791e-05, "loss": 1.4198, "step": 764 }, { "epoch": 0.22, "learning_rate": 7.233831621043864e-05, "loss": 1.3776, "step": 765 }, { "epoch": 0.22, "learning_rate": 7.231592342628462e-05, "loss": 1.3987, "step": 766 }, { "epoch": 0.23, "learning_rate": 7.229350144276906e-05, "loss": 1.382, "step": 767 }, { "epoch": 0.23, "learning_rate": 7.227105028015156e-05, "loss": 1.3476, "step": 768 }, { "epoch": 0.23, "learning_rate": 7.224856995871808e-05, "loss": 1.3935, "step": 769 }, { "epoch": 0.23, "learning_rate": 7.222606049878092e-05, "loss": 1.4203, "step": 770 }, { "epoch": 0.23, "learning_rate": 7.220352192067875e-05, "loss": 1.4174, "step": 771 }, { "epoch": 0.23, "learning_rate": 7.218095424477649e-05, "loss": 1.4153, "step": 772 }, { "epoch": 0.23, "learning_rate": 7.21583574914654e-05, "loss": 1.4617, "step": 773 }, { "epoch": 0.23, "learning_rate": 7.213573168116295e-05, "loss": 1.4448, "step": 774 }, { "epoch": 0.23, "learning_rate": 7.211307683431295e-05, "loss": 1.3778, "step": 775 }, { "epoch": 0.23, "learning_rate": 7.209039297138541e-05, "loss": 1.4113, "step": 776 }, { "epoch": 0.23, "learning_rate": 7.206768011287651e-05, "loss": 1.3898, "step": 777 }, { "epoch": 0.23, "learning_rate": 7.204493827930869e-05, "loss": 1.4325, "step": 778 }, { "epoch": 0.23, "learning_rate": 7.202216749123057e-05, "loss": 1.3944, "step": 779 }, { "epoch": 0.23, "learning_rate": 7.199936776921688e-05, "loss": 1.3671, "step": 780 }, { "epoch": 0.23, "learning_rate": 7.197653913386855e-05, "loss": 1.4266, "step": 781 }, { "epoch": 0.23, "learning_rate": 7.19536816058126e-05, "loss": 1.3937, "step": 782 }, { "epoch": 0.23, "learning_rate": 7.193079520570217e-05, "loss": 1.4037, "step": 783 }, { "epoch": 0.23, "learning_rate": 7.19078799542165e-05, "loss": 1.3865, "step": 784 }, { "epoch": 0.23, "learning_rate": 7.188493587206087e-05, "loss": 1.4308, "step": 785 }, { "epoch": 0.23, "learning_rate": 7.186196297996661e-05, "loss": 1.3721, "step": 786 }, { "epoch": 0.23, "learning_rate": 7.183896129869112e-05, "loss": 1.4385, "step": 787 }, { "epoch": 0.23, "learning_rate": 7.18159308490178e-05, "loss": 1.3767, "step": 788 }, { "epoch": 0.23, "learning_rate": 7.179287165175599e-05, "loss": 1.4287, "step": 789 }, { "epoch": 0.23, "learning_rate": 7.17697837277411e-05, "loss": 1.3299, "step": 790 }, { "epoch": 0.23, "learning_rate": 7.174666709783438e-05, "loss": 1.419, "step": 791 }, { "epoch": 0.23, "learning_rate": 7.172352178292315e-05, "loss": 1.3639, "step": 792 }, { "epoch": 0.23, "learning_rate": 7.170034780392055e-05, "loss": 1.3968, "step": 793 }, { "epoch": 0.23, "learning_rate": 7.167714518176564e-05, "loss": 1.4081, "step": 794 }, { "epoch": 0.23, "learning_rate": 7.16539139374234e-05, "loss": 1.3629, "step": 795 }, { "epoch": 0.23, "learning_rate": 7.163065409188461e-05, "loss": 1.4389, "step": 796 }, { "epoch": 0.23, "learning_rate": 7.160736566616595e-05, "loss": 1.3337, "step": 797 }, { "epoch": 0.23, "learning_rate": 7.158404868130988e-05, "loss": 1.3868, "step": 798 }, { "epoch": 0.23, "learning_rate": 7.156070315838468e-05, "loss": 1.4085, "step": 799 }, { "epoch": 0.23, "learning_rate": 7.153732911848446e-05, "loss": 1.4004, "step": 800 }, { "epoch": 0.23, "eval_loss": 0.13538677990436554, "eval_runtime": 25.0737, "eval_samples_per_second": 79.765, "eval_steps_per_second": 0.638, "step": 800 }, { "epoch": 0.24, "learning_rate": 7.151392658272902e-05, "loss": 1.3815, "step": 801 }, { "epoch": 0.24, "learning_rate": 7.149049557226397e-05, "loss": 1.3802, "step": 802 }, { "epoch": 0.24, "learning_rate": 7.14670361082606e-05, "loss": 1.3954, "step": 803 }, { "epoch": 0.24, "learning_rate": 7.144354821191595e-05, "loss": 1.417, "step": 804 }, { "epoch": 0.24, "learning_rate": 7.142003190445272e-05, "loss": 1.4042, "step": 805 }, { "epoch": 0.24, "learning_rate": 7.139648720711931e-05, "loss": 1.3799, "step": 806 }, { "epoch": 0.24, "learning_rate": 7.137291414118974e-05, "loss": 1.3941, "step": 807 }, { "epoch": 0.24, "learning_rate": 7.134931272796368e-05, "loss": 1.4179, "step": 808 }, { "epoch": 0.24, "learning_rate": 7.132568298876643e-05, "loss": 1.3838, "step": 809 }, { "epoch": 0.24, "learning_rate": 7.130202494494886e-05, "loss": 1.3578, "step": 810 }, { "epoch": 0.24, "learning_rate": 7.127833861788741e-05, "loss": 1.4084, "step": 811 }, { "epoch": 0.24, "learning_rate": 7.12546240289841e-05, "loss": 1.3892, "step": 812 }, { "epoch": 0.24, "learning_rate": 7.123088119966647e-05, "loss": 1.3437, "step": 813 }, { "epoch": 0.24, "learning_rate": 7.120711015138756e-05, "loss": 1.3919, "step": 814 }, { "epoch": 0.24, "learning_rate": 7.118331090562598e-05, "loss": 1.3928, "step": 815 }, { "epoch": 0.24, "learning_rate": 7.11594834838857e-05, "loss": 1.3909, "step": 816 }, { "epoch": 0.24, "learning_rate": 7.113562790769626e-05, "loss": 1.3852, "step": 817 }, { "epoch": 0.24, "learning_rate": 7.11117441986126e-05, "loss": 1.409, "step": 818 }, { "epoch": 0.24, "learning_rate": 7.108783237821503e-05, "loss": 1.4075, "step": 819 }, { "epoch": 0.24, "learning_rate": 7.106389246810934e-05, "loss": 1.3625, "step": 820 }, { "epoch": 0.24, "learning_rate": 7.103992448992665e-05, "loss": 1.3513, "step": 821 }, { "epoch": 0.24, "learning_rate": 7.101592846532346e-05, "loss": 1.4093, "step": 822 }, { "epoch": 0.24, "learning_rate": 7.099190441598161e-05, "loss": 1.385, "step": 823 }, { "epoch": 0.24, "learning_rate": 7.096785236360826e-05, "loss": 1.3637, "step": 824 }, { "epoch": 0.24, "learning_rate": 7.094377232993586e-05, "loss": 1.3566, "step": 825 }, { "epoch": 0.24, "learning_rate": 7.091966433672217e-05, "loss": 1.3794, "step": 826 }, { "epoch": 0.24, "learning_rate": 7.089552840575018e-05, "loss": 1.3432, "step": 827 }, { "epoch": 0.24, "learning_rate": 7.087136455882816e-05, "loss": 1.3394, "step": 828 }, { "epoch": 0.24, "learning_rate": 7.084717281778957e-05, "loss": 1.3437, "step": 829 }, { "epoch": 0.24, "learning_rate": 7.08229532044931e-05, "loss": 1.3786, "step": 830 }, { "epoch": 0.24, "learning_rate": 7.07987057408226e-05, "loss": 1.3717, "step": 831 }, { "epoch": 0.24, "learning_rate": 7.077443044868712e-05, "loss": 1.3552, "step": 832 }, { "epoch": 0.24, "learning_rate": 7.07501273500208e-05, "loss": 1.3643, "step": 833 }, { "epoch": 0.24, "learning_rate": 7.072579646678297e-05, "loss": 1.3818, "step": 834 }, { "epoch": 0.24, "learning_rate": 7.070143782095802e-05, "loss": 1.4252, "step": 835 }, { "epoch": 0.25, "learning_rate": 7.067705143455543e-05, "loss": 1.3912, "step": 836 }, { "epoch": 0.25, "learning_rate": 7.065263732960976e-05, "loss": 1.3889, "step": 837 }, { "epoch": 0.25, "learning_rate": 7.06281955281806e-05, "loss": 1.3984, "step": 838 }, { "epoch": 0.25, "learning_rate": 7.060372605235259e-05, "loss": 1.3513, "step": 839 }, { "epoch": 0.25, "learning_rate": 7.057922892423534e-05, "loss": 1.3649, "step": 840 }, { "epoch": 0.25, "learning_rate": 7.055470416596347e-05, "loss": 1.413, "step": 841 }, { "epoch": 0.25, "learning_rate": 7.053015179969656e-05, "loss": 1.4038, "step": 842 }, { "epoch": 0.25, "learning_rate": 7.050557184761915e-05, "loss": 1.381, "step": 843 }, { "epoch": 0.25, "learning_rate": 7.048096433194067e-05, "loss": 1.3958, "step": 844 }, { "epoch": 0.25, "learning_rate": 7.045632927489548e-05, "loss": 1.4419, "step": 845 }, { "epoch": 0.25, "learning_rate": 7.043166669874282e-05, "loss": 1.3879, "step": 846 }, { "epoch": 0.25, "learning_rate": 7.04069766257668e-05, "loss": 1.3386, "step": 847 }, { "epoch": 0.25, "learning_rate": 7.038225907827639e-05, "loss": 1.3618, "step": 848 }, { "epoch": 0.25, "learning_rate": 7.035751407860533e-05, "loss": 1.4196, "step": 849 }, { "epoch": 0.25, "learning_rate": 7.033274164911223e-05, "loss": 1.3625, "step": 850 }, { "epoch": 0.25, "learning_rate": 7.030794181218045e-05, "loss": 1.3966, "step": 851 }, { "epoch": 0.25, "learning_rate": 7.028311459021812e-05, "loss": 1.3846, "step": 852 }, { "epoch": 0.25, "learning_rate": 7.02582600056581e-05, "loss": 1.4307, "step": 853 }, { "epoch": 0.25, "learning_rate": 7.023337808095798e-05, "loss": 1.3722, "step": 854 }, { "epoch": 0.25, "learning_rate": 7.020846883860011e-05, "loss": 1.3685, "step": 855 }, { "epoch": 0.25, "learning_rate": 7.018353230109142e-05, "loss": 1.3929, "step": 856 }, { "epoch": 0.25, "learning_rate": 7.015856849096357e-05, "loss": 1.436, "step": 857 }, { "epoch": 0.25, "learning_rate": 7.013357743077289e-05, "loss": 1.3745, "step": 858 }, { "epoch": 0.25, "learning_rate": 7.010855914310023e-05, "loss": 1.3611, "step": 859 }, { "epoch": 0.25, "learning_rate": 7.008351365055116e-05, "loss": 1.3971, "step": 860 }, { "epoch": 0.25, "learning_rate": 7.00584409757557e-05, "loss": 1.3832, "step": 861 }, { "epoch": 0.25, "learning_rate": 7.00333411413686e-05, "loss": 1.3349, "step": 862 }, { "epoch": 0.25, "learning_rate": 7.000821417006898e-05, "loss": 1.3833, "step": 863 }, { "epoch": 0.25, "learning_rate": 6.998306008456059e-05, "loss": 1.4131, "step": 864 }, { "epoch": 0.25, "learning_rate": 6.995787890757162e-05, "loss": 1.3692, "step": 865 }, { "epoch": 0.25, "learning_rate": 6.993267066185481e-05, "loss": 1.3741, "step": 866 }, { "epoch": 0.25, "learning_rate": 6.990743537018726e-05, "loss": 1.3803, "step": 867 }, { "epoch": 0.25, "learning_rate": 6.988217305537058e-05, "loss": 1.4036, "step": 868 }, { "epoch": 0.25, "learning_rate": 6.985688374023079e-05, "loss": 1.3663, "step": 869 }, { "epoch": 0.26, "learning_rate": 6.983156744761826e-05, "loss": 1.3626, "step": 870 }, { "epoch": 0.26, "learning_rate": 6.98062242004078e-05, "loss": 1.414, "step": 871 }, { "epoch": 0.26, "learning_rate": 6.978085402149853e-05, "loss": 1.3694, "step": 872 }, { "epoch": 0.26, "learning_rate": 6.97554569338139e-05, "loss": 1.3627, "step": 873 }, { "epoch": 0.26, "learning_rate": 6.973003296030172e-05, "loss": 1.338, "step": 874 }, { "epoch": 0.26, "learning_rate": 6.970458212393405e-05, "loss": 1.3856, "step": 875 }, { "epoch": 0.26, "learning_rate": 6.967910444770725e-05, "loss": 1.4076, "step": 876 }, { "epoch": 0.26, "learning_rate": 6.96535999546419e-05, "loss": 1.3819, "step": 877 }, { "epoch": 0.26, "learning_rate": 6.962806866778284e-05, "loss": 1.3942, "step": 878 }, { "epoch": 0.26, "learning_rate": 6.960251061019909e-05, "loss": 1.3432, "step": 879 }, { "epoch": 0.26, "learning_rate": 6.95769258049839e-05, "loss": 1.4081, "step": 880 }, { "epoch": 0.26, "learning_rate": 6.955131427525466e-05, "loss": 1.3791, "step": 881 }, { "epoch": 0.26, "learning_rate": 6.95256760441529e-05, "loss": 1.3528, "step": 882 }, { "epoch": 0.26, "learning_rate": 6.950001113484432e-05, "loss": 1.4087, "step": 883 }, { "epoch": 0.26, "learning_rate": 6.947431957051865e-05, "loss": 1.3472, "step": 884 }, { "epoch": 0.26, "learning_rate": 6.944860137438978e-05, "loss": 1.3455, "step": 885 }, { "epoch": 0.26, "learning_rate": 6.942285656969563e-05, "loss": 1.3966, "step": 886 }, { "epoch": 0.26, "learning_rate": 6.939708517969815e-05, "loss": 1.3807, "step": 887 }, { "epoch": 0.26, "learning_rate": 6.937128722768333e-05, "loss": 1.3226, "step": 888 }, { "epoch": 0.26, "learning_rate": 6.934546273696116e-05, "loss": 1.3624, "step": 889 }, { "epoch": 0.26, "learning_rate": 6.931961173086561e-05, "loss": 1.3662, "step": 890 }, { "epoch": 0.26, "learning_rate": 6.92937342327546e-05, "loss": 1.3635, "step": 891 }, { "epoch": 0.26, "learning_rate": 6.926783026600997e-05, "loss": 1.3248, "step": 892 }, { "epoch": 0.26, "learning_rate": 6.924189985403753e-05, "loss": 1.3703, "step": 893 }, { "epoch": 0.26, "learning_rate": 6.921594302026693e-05, "loss": 1.4017, "step": 894 }, { "epoch": 0.26, "learning_rate": 6.918995978815174e-05, "loss": 1.3598, "step": 895 }, { "epoch": 0.26, "learning_rate": 6.916395018116932e-05, "loss": 1.3817, "step": 896 }, { "epoch": 0.26, "learning_rate": 6.913791422282094e-05, "loss": 1.3994, "step": 897 }, { "epoch": 0.26, "learning_rate": 6.91118519366316e-05, "loss": 1.4117, "step": 898 }, { "epoch": 0.26, "learning_rate": 6.908576334615016e-05, "loss": 1.4131, "step": 899 }, { "epoch": 0.26, "learning_rate": 6.90596484749492e-05, "loss": 1.3987, "step": 900 }, { "epoch": 0.26, "eval_loss": 0.1372963935136795, "eval_runtime": 25.0998, "eval_samples_per_second": 79.682, "eval_steps_per_second": 0.637, "step": 900 }, { "epoch": 0.26, "learning_rate": 6.903350734662505e-05, "loss": 1.3955, "step": 901 }, { "epoch": 0.26, "learning_rate": 6.90073399847978e-05, "loss": 1.3236, "step": 902 }, { "epoch": 0.26, "learning_rate": 6.898114641311122e-05, "loss": 1.4109, "step": 903 }, { "epoch": 0.27, "learning_rate": 6.895492665523274e-05, "loss": 1.3628, "step": 904 }, { "epoch": 0.27, "learning_rate": 6.892868073485348e-05, "loss": 1.4298, "step": 905 }, { "epoch": 0.27, "learning_rate": 6.89024086756882e-05, "loss": 1.3471, "step": 906 }, { "epoch": 0.27, "learning_rate": 6.887611050147528e-05, "loss": 1.3855, "step": 907 }, { "epoch": 0.27, "learning_rate": 6.884978623597665e-05, "loss": 1.3755, "step": 908 }, { "epoch": 0.27, "learning_rate": 6.88234359029779e-05, "loss": 1.4083, "step": 909 }, { "epoch": 0.27, "learning_rate": 6.879705952628809e-05, "loss": 1.3681, "step": 910 }, { "epoch": 0.27, "learning_rate": 6.877065712973986e-05, "loss": 1.3067, "step": 911 }, { "epoch": 0.27, "learning_rate": 6.874422873718934e-05, "loss": 1.3881, "step": 912 }, { "epoch": 0.27, "learning_rate": 6.871777437251617e-05, "loss": 1.3749, "step": 913 }, { "epoch": 0.27, "learning_rate": 6.869129405962342e-05, "loss": 1.3276, "step": 914 }, { "epoch": 0.27, "learning_rate": 6.866478782243766e-05, "loss": 1.3876, "step": 915 }, { "epoch": 0.27, "learning_rate": 6.863825568490885e-05, "loss": 1.4264, "step": 916 }, { "epoch": 0.27, "learning_rate": 6.861169767101034e-05, "loss": 1.3915, "step": 917 }, { "epoch": 0.27, "learning_rate": 6.858511380473887e-05, "loss": 1.383, "step": 918 }, { "epoch": 0.27, "learning_rate": 6.855850411011459e-05, "loss": 1.3867, "step": 919 }, { "epoch": 0.27, "learning_rate": 6.853186861118089e-05, "loss": 1.419, "step": 920 }, { "epoch": 0.27, "learning_rate": 6.850520733200456e-05, "loss": 1.3738, "step": 921 }, { "epoch": 0.27, "learning_rate": 6.847852029667565e-05, "loss": 1.3506, "step": 922 }, { "epoch": 0.27, "learning_rate": 6.845180752930749e-05, "loss": 1.3391, "step": 923 }, { "epoch": 0.27, "learning_rate": 6.842506905403664e-05, "loss": 1.3604, "step": 924 }, { "epoch": 0.27, "learning_rate": 6.839830489502293e-05, "loss": 1.44, "step": 925 }, { "epoch": 0.27, "learning_rate": 6.837151507644935e-05, "loss": 1.3542, "step": 926 }, { "epoch": 0.27, "learning_rate": 6.83446996225221e-05, "loss": 1.3602, "step": 927 }, { "epoch": 0.27, "learning_rate": 6.831785855747054e-05, "loss": 1.4267, "step": 928 }, { "epoch": 0.27, "learning_rate": 6.829099190554717e-05, "loss": 1.3565, "step": 929 }, { "epoch": 0.27, "learning_rate": 6.826409969102763e-05, "loss": 1.3914, "step": 930 }, { "epoch": 0.27, "learning_rate": 6.823718193821061e-05, "loss": 1.3666, "step": 931 }, { "epoch": 0.27, "learning_rate": 6.821023867141791e-05, "loss": 1.3441, "step": 932 }, { "epoch": 0.27, "learning_rate": 6.81832699149944e-05, "loss": 1.3724, "step": 933 }, { "epoch": 0.27, "learning_rate": 6.815627569330793e-05, "loss": 1.3677, "step": 934 }, { "epoch": 0.27, "learning_rate": 6.81292560307494e-05, "loss": 1.3859, "step": 935 }, { "epoch": 0.27, "learning_rate": 6.810221095173268e-05, "loss": 1.3665, "step": 936 }, { "epoch": 0.27, "learning_rate": 6.807514048069461e-05, "loss": 1.334, "step": 937 }, { "epoch": 0.28, "learning_rate": 6.804804464209498e-05, "loss": 1.4157, "step": 938 }, { "epoch": 0.28, "learning_rate": 6.802092346041647e-05, "loss": 1.383, "step": 939 }, { "epoch": 0.28, "learning_rate": 6.799377696016471e-05, "loss": 1.3366, "step": 940 }, { "epoch": 0.28, "learning_rate": 6.796660516586817e-05, "loss": 1.4138, "step": 941 }, { "epoch": 0.28, "learning_rate": 6.793940810207819e-05, "loss": 1.3729, "step": 942 }, { "epoch": 0.28, "learning_rate": 6.791218579336891e-05, "loss": 1.3978, "step": 943 }, { "epoch": 0.28, "learning_rate": 6.788493826433735e-05, "loss": 1.3338, "step": 944 }, { "epoch": 0.28, "learning_rate": 6.785766553960324e-05, "loss": 1.3656, "step": 945 }, { "epoch": 0.28, "learning_rate": 6.783036764380913e-05, "loss": 1.3965, "step": 946 }, { "epoch": 0.28, "learning_rate": 6.780304460162029e-05, "loss": 1.3199, "step": 947 }, { "epoch": 0.28, "learning_rate": 6.777569643772472e-05, "loss": 1.4047, "step": 948 }, { "epoch": 0.28, "learning_rate": 6.77483231768331e-05, "loss": 1.4125, "step": 949 }, { "epoch": 0.28, "learning_rate": 6.772092484367882e-05, "loss": 1.3662, "step": 950 }, { "epoch": 0.28, "learning_rate": 6.76935014630179e-05, "loss": 1.3421, "step": 951 }, { "epoch": 0.28, "learning_rate": 6.7666053059629e-05, "loss": 1.3097, "step": 952 }, { "epoch": 0.28, "learning_rate": 6.763857965831337e-05, "loss": 1.3788, "step": 953 }, { "epoch": 0.28, "learning_rate": 6.761108128389488e-05, "loss": 1.4091, "step": 954 }, { "epoch": 0.28, "learning_rate": 6.758355796121994e-05, "loss": 1.3692, "step": 955 }, { "epoch": 0.28, "learning_rate": 6.755600971515753e-05, "loss": 1.3709, "step": 956 }, { "epoch": 0.28, "learning_rate": 6.752843657059911e-05, "loss": 1.3784, "step": 957 }, { "epoch": 0.28, "learning_rate": 6.750083855245869e-05, "loss": 1.4132, "step": 958 }, { "epoch": 0.28, "learning_rate": 6.747321568567267e-05, "loss": 1.3661, "step": 959 }, { "epoch": 0.28, "learning_rate": 6.74455679952e-05, "loss": 1.3735, "step": 960 }, { "epoch": 0.28, "learning_rate": 6.7417895506022e-05, "loss": 1.3725, "step": 961 }, { "epoch": 0.28, "learning_rate": 6.739019824314242e-05, "loss": 1.3935, "step": 962 }, { "epoch": 0.28, "learning_rate": 6.736247623158738e-05, "loss": 1.3863, "step": 963 }, { "epoch": 0.28, "learning_rate": 6.733472949640534e-05, "loss": 1.3645, "step": 964 }, { "epoch": 0.28, "learning_rate": 6.730695806266719e-05, "loss": 1.3497, "step": 965 }, { "epoch": 0.28, "learning_rate": 6.727916195546602e-05, "loss": 1.3656, "step": 966 }, { "epoch": 0.28, "learning_rate": 6.72513411999173e-05, "loss": 1.303, "step": 967 }, { "epoch": 0.28, "learning_rate": 6.722349582115872e-05, "loss": 1.3516, "step": 968 }, { "epoch": 0.28, "learning_rate": 6.719562584435026e-05, "loss": 1.4204, "step": 969 }, { "epoch": 0.28, "learning_rate": 6.716773129467406e-05, "loss": 1.3299, "step": 970 }, { "epoch": 0.28, "learning_rate": 6.713981219733457e-05, "loss": 1.4038, "step": 971 }, { "epoch": 0.29, "learning_rate": 6.711186857755831e-05, "loss": 1.349, "step": 972 }, { "epoch": 0.29, "learning_rate": 6.708390046059402e-05, "loss": 1.3678, "step": 973 }, { "epoch": 0.29, "learning_rate": 6.705590787171256e-05, "loss": 1.3807, "step": 974 }, { "epoch": 0.29, "learning_rate": 6.70278908362069e-05, "loss": 1.383, "step": 975 }, { "epoch": 0.29, "learning_rate": 6.69998493793921e-05, "loss": 1.361, "step": 976 }, { "epoch": 0.29, "learning_rate": 6.697178352660528e-05, "loss": 1.377, "step": 977 }, { "epoch": 0.29, "learning_rate": 6.694369330320563e-05, "loss": 1.3704, "step": 978 }, { "epoch": 0.29, "learning_rate": 6.691557873457432e-05, "loss": 1.3005, "step": 979 }, { "epoch": 0.29, "learning_rate": 6.688743984611455e-05, "loss": 1.4302, "step": 980 }, { "epoch": 0.29, "learning_rate": 6.685927666325146e-05, "loss": 1.3991, "step": 981 }, { "epoch": 0.29, "learning_rate": 6.68310892114322e-05, "loss": 1.3321, "step": 982 }, { "epoch": 0.29, "learning_rate": 6.680287751612576e-05, "loss": 1.3784, "step": 983 }, { "epoch": 0.29, "learning_rate": 6.677464160282315e-05, "loss": 1.4145, "step": 984 }, { "epoch": 0.29, "learning_rate": 6.674638149703714e-05, "loss": 1.398, "step": 985 }, { "epoch": 0.29, "learning_rate": 6.671809722430247e-05, "loss": 1.3176, "step": 986 }, { "epoch": 0.29, "learning_rate": 6.668978881017562e-05, "loss": 1.3949, "step": 987 }, { "epoch": 0.29, "learning_rate": 6.666145628023495e-05, "loss": 1.3917, "step": 988 }, { "epoch": 0.29, "learning_rate": 6.66330996600806e-05, "loss": 1.321, "step": 989 }, { "epoch": 0.29, "learning_rate": 6.660471897533444e-05, "loss": 1.3186, "step": 990 }, { "epoch": 0.29, "learning_rate": 6.657631425164013e-05, "loss": 1.3523, "step": 991 }, { "epoch": 0.29, "learning_rate": 6.6547885514663e-05, "loss": 1.3991, "step": 992 }, { "epoch": 0.29, "learning_rate": 6.651943279009015e-05, "loss": 1.365, "step": 993 }, { "epoch": 0.29, "learning_rate": 6.649095610363028e-05, "loss": 1.3931, "step": 994 }, { "epoch": 0.29, "learning_rate": 6.646245548101379e-05, "loss": 1.331, "step": 995 }, { "epoch": 0.29, "learning_rate": 6.643393094799265e-05, "loss": 1.3461, "step": 996 }, { "epoch": 0.29, "learning_rate": 6.640538253034055e-05, "loss": 1.3782, "step": 997 }, { "epoch": 0.29, "learning_rate": 6.637681025385261e-05, "loss": 1.3777, "step": 998 }, { "epoch": 0.29, "learning_rate": 6.634821414434563e-05, "loss": 1.4046, "step": 999 }, { "epoch": 0.29, "learning_rate": 6.631959422765787e-05, "loss": 1.3816, "step": 1000 }, { "epoch": 0.29, "eval_loss": 0.1353466957807541, "eval_runtime": 25.1181, "eval_samples_per_second": 79.624, "eval_steps_per_second": 0.637, "step": 1000 }, { "epoch": 0.29, "learning_rate": 6.629095052964915e-05, "loss": 1.3722, "step": 1001 }, { "epoch": 0.29, "learning_rate": 6.626228307620074e-05, "loss": 1.3547, "step": 1002 }, { "epoch": 0.29, "learning_rate": 6.62335918932154e-05, "loss": 1.386, "step": 1003 }, { "epoch": 0.29, "learning_rate": 6.620487700661734e-05, "loss": 1.3744, "step": 1004 }, { "epoch": 0.29, "learning_rate": 6.617613844235214e-05, "loss": 1.3426, "step": 1005 }, { "epoch": 0.3, "learning_rate": 6.614737622638684e-05, "loss": 1.3546, "step": 1006 }, { "epoch": 0.3, "learning_rate": 6.611859038470977e-05, "loss": 1.3962, "step": 1007 }, { "epoch": 0.3, "learning_rate": 6.608978094333068e-05, "loss": 1.3445, "step": 1008 }, { "epoch": 0.3, "learning_rate": 6.606094792828062e-05, "loss": 1.3404, "step": 1009 }, { "epoch": 0.3, "learning_rate": 6.603209136561192e-05, "loss": 1.4451, "step": 1010 }, { "epoch": 0.3, "learning_rate": 6.600321128139822e-05, "loss": 1.3401, "step": 1011 }, { "epoch": 0.3, "learning_rate": 6.597430770173437e-05, "loss": 1.3405, "step": 1012 }, { "epoch": 0.3, "learning_rate": 6.594538065273652e-05, "loss": 1.3704, "step": 1013 }, { "epoch": 0.3, "learning_rate": 6.591643016054195e-05, "loss": 1.4086, "step": 1014 }, { "epoch": 0.3, "learning_rate": 6.588745625130914e-05, "loss": 1.3891, "step": 1015 }, { "epoch": 0.3, "learning_rate": 6.585845895121777e-05, "loss": 1.3765, "step": 1016 }, { "epoch": 0.3, "learning_rate": 6.582943828646862e-05, "loss": 1.3589, "step": 1017 }, { "epoch": 0.3, "learning_rate": 6.58003942832836e-05, "loss": 1.3866, "step": 1018 }, { "epoch": 0.3, "learning_rate": 6.577132696790568e-05, "loss": 1.3543, "step": 1019 }, { "epoch": 0.3, "learning_rate": 6.574223636659891e-05, "loss": 1.3239, "step": 1020 }, { "epoch": 0.3, "learning_rate": 6.571312250564838e-05, "loss": 1.2979, "step": 1021 }, { "epoch": 0.3, "learning_rate": 6.568398541136022e-05, "loss": 1.3911, "step": 1022 }, { "epoch": 0.3, "learning_rate": 6.56548251100615e-05, "loss": 1.333, "step": 1023 }, { "epoch": 0.3, "learning_rate": 6.562564162810031e-05, "loss": 1.3779, "step": 1024 }, { "epoch": 0.3, "learning_rate": 6.559643499184566e-05, "loss": 1.403, "step": 1025 }, { "epoch": 0.3, "learning_rate": 6.556720522768748e-05, "loss": 1.4145, "step": 1026 }, { "epoch": 0.3, "learning_rate": 6.553795236203661e-05, "loss": 1.3439, "step": 1027 }, { "epoch": 0.3, "learning_rate": 6.550867642132474e-05, "loss": 1.3679, "step": 1028 }, { "epoch": 0.3, "learning_rate": 6.547937743200445e-05, "loss": 1.4203, "step": 1029 }, { "epoch": 0.3, "learning_rate": 6.54500554205491e-05, "loss": 1.3372, "step": 1030 }, { "epoch": 0.3, "learning_rate": 6.542071041345288e-05, "loss": 1.3788, "step": 1031 }, { "epoch": 0.3, "learning_rate": 6.539134243723078e-05, "loss": 1.3547, "step": 1032 }, { "epoch": 0.3, "learning_rate": 6.536195151841847e-05, "loss": 1.4116, "step": 1033 }, { "epoch": 0.3, "learning_rate": 6.533253768357243e-05, "loss": 1.3164, "step": 1034 }, { "epoch": 0.3, "learning_rate": 6.530310095926979e-05, "loss": 1.3799, "step": 1035 }, { "epoch": 0.3, "learning_rate": 6.527364137210839e-05, "loss": 1.3404, "step": 1036 }, { "epoch": 0.3, "learning_rate": 6.524415894870674e-05, "loss": 1.3918, "step": 1037 }, { "epoch": 0.3, "learning_rate": 6.521465371570393e-05, "loss": 1.3864, "step": 1038 }, { "epoch": 0.3, "learning_rate": 6.518512569975972e-05, "loss": 1.3427, "step": 1039 }, { "epoch": 0.31, "learning_rate": 6.515557492755441e-05, "loss": 1.4154, "step": 1040 }, { "epoch": 0.31, "learning_rate": 6.512600142578892e-05, "loss": 1.4025, "step": 1041 }, { "epoch": 0.31, "learning_rate": 6.509640522118465e-05, "loss": 1.3397, "step": 1042 }, { "epoch": 0.31, "learning_rate": 6.506678634048353e-05, "loss": 1.38, "step": 1043 }, { "epoch": 0.31, "learning_rate": 6.503714481044798e-05, "loss": 1.41, "step": 1044 }, { "epoch": 0.31, "learning_rate": 6.500748065786088e-05, "loss": 1.3764, "step": 1045 }, { "epoch": 0.31, "learning_rate": 6.497779390952559e-05, "loss": 1.39, "step": 1046 }, { "epoch": 0.31, "learning_rate": 6.494808459226584e-05, "loss": 1.3411, "step": 1047 }, { "epoch": 0.31, "learning_rate": 6.491835273292574e-05, "loss": 1.3865, "step": 1048 }, { "epoch": 0.31, "learning_rate": 6.488859835836984e-05, "loss": 1.3903, "step": 1049 }, { "epoch": 0.31, "learning_rate": 6.485882149548296e-05, "loss": 1.422, "step": 1050 }, { "epoch": 0.31, "learning_rate": 6.482902217117028e-05, "loss": 1.3057, "step": 1051 }, { "epoch": 0.31, "learning_rate": 6.479920041235727e-05, "loss": 1.3637, "step": 1052 }, { "epoch": 0.31, "learning_rate": 6.476935624598966e-05, "loss": 1.4102, "step": 1053 }, { "epoch": 0.31, "learning_rate": 6.473948969903342e-05, "loss": 1.3363, "step": 1054 }, { "epoch": 0.31, "learning_rate": 6.470960079847477e-05, "loss": 1.3814, "step": 1055 }, { "epoch": 0.31, "learning_rate": 6.467968957132011e-05, "loss": 1.3407, "step": 1056 }, { "epoch": 0.31, "learning_rate": 6.4649756044596e-05, "loss": 1.4138, "step": 1057 }, { "epoch": 0.31, "learning_rate": 6.461980024534918e-05, "loss": 1.3256, "step": 1058 }, { "epoch": 0.31, "learning_rate": 6.458982220064649e-05, "loss": 1.3617, "step": 1059 }, { "epoch": 0.31, "learning_rate": 6.455982193757489e-05, "loss": 1.3438, "step": 1060 }, { "epoch": 0.31, "learning_rate": 6.452979948324139e-05, "loss": 1.3553, "step": 1061 }, { "epoch": 0.31, "learning_rate": 6.449975486477309e-05, "loss": 1.3779, "step": 1062 }, { "epoch": 0.31, "learning_rate": 6.446968810931707e-05, "loss": 1.3494, "step": 1063 }, { "epoch": 0.31, "learning_rate": 6.443959924404041e-05, "loss": 1.3816, "step": 1064 }, { "epoch": 0.31, "learning_rate": 6.440948829613025e-05, "loss": 1.3517, "step": 1065 }, { "epoch": 0.31, "learning_rate": 6.43793552927936e-05, "loss": 1.3375, "step": 1066 }, { "epoch": 0.31, "learning_rate": 6.434920026125741e-05, "loss": 1.4003, "step": 1067 }, { "epoch": 0.31, "learning_rate": 6.431902322876855e-05, "loss": 1.3346, "step": 1068 }, { "epoch": 0.31, "learning_rate": 6.428882422259379e-05, "loss": 1.3349, "step": 1069 }, { "epoch": 0.31, "learning_rate": 6.42586032700197e-05, "loss": 1.3448, "step": 1070 }, { "epoch": 0.31, "learning_rate": 6.422836039835273e-05, "loss": 1.3875, "step": 1071 }, { "epoch": 0.31, "learning_rate": 6.41980956349191e-05, "loss": 1.4008, "step": 1072 }, { "epoch": 0.31, "learning_rate": 6.416780900706484e-05, "loss": 1.3485, "step": 1073 }, { "epoch": 0.32, "learning_rate": 6.413750054215574e-05, "loss": 1.4057, "step": 1074 }, { "epoch": 0.32, "learning_rate": 6.410717026757726e-05, "loss": 1.3803, "step": 1075 }, { "epoch": 0.32, "learning_rate": 6.407681821073463e-05, "loss": 1.3363, "step": 1076 }, { "epoch": 0.32, "learning_rate": 6.404644439905274e-05, "loss": 1.342, "step": 1077 }, { "epoch": 0.32, "learning_rate": 6.401604885997614e-05, "loss": 1.4685, "step": 1078 }, { "epoch": 0.32, "learning_rate": 6.398563162096903e-05, "loss": 1.3779, "step": 1079 }, { "epoch": 0.32, "learning_rate": 6.395519270951517e-05, "loss": 1.3877, "step": 1080 }, { "epoch": 0.32, "learning_rate": 6.392473215311792e-05, "loss": 1.3241, "step": 1081 }, { "epoch": 0.32, "learning_rate": 6.389424997930023e-05, "loss": 1.4014, "step": 1082 }, { "epoch": 0.32, "learning_rate": 6.386374621560455e-05, "loss": 1.3541, "step": 1083 }, { "epoch": 0.32, "learning_rate": 6.383322088959287e-05, "loss": 1.3849, "step": 1084 }, { "epoch": 0.32, "learning_rate": 6.38026740288466e-05, "loss": 1.3652, "step": 1085 }, { "epoch": 0.32, "learning_rate": 6.377210566096668e-05, "loss": 1.3465, "step": 1086 }, { "epoch": 0.32, "learning_rate": 6.374151581357343e-05, "loss": 1.4152, "step": 1087 }, { "epoch": 0.32, "learning_rate": 6.37109045143066e-05, "loss": 1.341, "step": 1088 }, { "epoch": 0.32, "learning_rate": 6.368027179082536e-05, "loss": 1.3858, "step": 1089 }, { "epoch": 0.32, "learning_rate": 6.364961767080816e-05, "loss": 1.3795, "step": 1090 }, { "epoch": 0.32, "learning_rate": 6.361894218195282e-05, "loss": 1.3392, "step": 1091 }, { "epoch": 0.32, "learning_rate": 6.35882453519765e-05, "loss": 1.3573, "step": 1092 }, { "epoch": 0.32, "learning_rate": 6.355752720861559e-05, "loss": 1.3841, "step": 1093 }, { "epoch": 0.32, "learning_rate": 6.352678777962578e-05, "loss": 1.3559, "step": 1094 }, { "epoch": 0.32, "learning_rate": 6.349602709278195e-05, "loss": 1.3413, "step": 1095 }, { "epoch": 0.32, "learning_rate": 6.346524517587825e-05, "loss": 1.3898, "step": 1096 }, { "epoch": 0.32, "learning_rate": 6.343444205672794e-05, "loss": 1.4117, "step": 1097 }, { "epoch": 0.32, "learning_rate": 6.340361776316349e-05, "loss": 1.3277, "step": 1098 }, { "epoch": 0.32, "learning_rate": 6.337277232303647e-05, "loss": 1.2878, "step": 1099 }, { "epoch": 0.32, "learning_rate": 6.33419057642176e-05, "loss": 1.3592, "step": 1100 }, { "epoch": 0.32, "eval_loss": 0.12910032272338867, "eval_runtime": 25.0654, "eval_samples_per_second": 79.791, "eval_steps_per_second": 0.638, "step": 1100 }, { "epoch": 0.32, "learning_rate": 6.331101811459663e-05, "loss": 1.3193, "step": 1101 }, { "epoch": 0.32, "learning_rate": 6.328010940208241e-05, "loss": 1.3701, "step": 1102 }, { "epoch": 0.32, "learning_rate": 6.324917965460279e-05, "loss": 1.3797, "step": 1103 }, { "epoch": 0.32, "learning_rate": 6.321822890010464e-05, "loss": 1.3794, "step": 1104 }, { "epoch": 0.32, "learning_rate": 6.318725716655382e-05, "loss": 1.3539, "step": 1105 }, { "epoch": 0.32, "learning_rate": 6.315626448193511e-05, "loss": 1.3542, "step": 1106 }, { "epoch": 0.32, "learning_rate": 6.312525087425227e-05, "loss": 1.3845, "step": 1107 }, { "epoch": 0.33, "learning_rate": 6.309421637152794e-05, "loss": 1.3844, "step": 1108 }, { "epoch": 0.33, "learning_rate": 6.306316100180363e-05, "loss": 1.3809, "step": 1109 }, { "epoch": 0.33, "learning_rate": 6.303208479313974e-05, "loss": 1.4075, "step": 1110 }, { "epoch": 0.33, "learning_rate": 6.300098777361545e-05, "loss": 1.3933, "step": 1111 }, { "epoch": 0.33, "learning_rate": 6.296986997132877e-05, "loss": 1.3927, "step": 1112 }, { "epoch": 0.33, "learning_rate": 6.29387314143965e-05, "loss": 1.3684, "step": 1113 }, { "epoch": 0.33, "learning_rate": 6.290757213095417e-05, "loss": 1.366, "step": 1114 }, { "epoch": 0.33, "learning_rate": 6.287639214915606e-05, "loss": 1.3546, "step": 1115 }, { "epoch": 0.33, "learning_rate": 6.284519149717514e-05, "loss": 1.3749, "step": 1116 }, { "epoch": 0.33, "learning_rate": 6.281397020320307e-05, "loss": 1.3547, "step": 1117 }, { "epoch": 0.33, "learning_rate": 6.278272829545011e-05, "loss": 1.3282, "step": 1118 }, { "epoch": 0.33, "learning_rate": 6.27514658021452e-05, "loss": 1.3842, "step": 1119 }, { "epoch": 0.33, "learning_rate": 6.272018275153588e-05, "loss": 1.3804, "step": 1120 }, { "epoch": 0.33, "learning_rate": 6.268887917188824e-05, "loss": 1.3177, "step": 1121 }, { "epoch": 0.33, "learning_rate": 6.265755509148692e-05, "loss": 1.3476, "step": 1122 }, { "epoch": 0.33, "learning_rate": 6.26262105386351e-05, "loss": 1.3697, "step": 1123 }, { "epoch": 0.33, "learning_rate": 6.259484554165446e-05, "loss": 1.3288, "step": 1124 }, { "epoch": 0.33, "learning_rate": 6.256346012888514e-05, "loss": 1.3559, "step": 1125 }, { "epoch": 0.33, "learning_rate": 6.253205432868573e-05, "loss": 1.3796, "step": 1126 }, { "epoch": 0.33, "learning_rate": 6.250062816943322e-05, "loss": 1.3589, "step": 1127 }, { "epoch": 0.33, "learning_rate": 6.246918167952304e-05, "loss": 1.3468, "step": 1128 }, { "epoch": 0.33, "learning_rate": 6.243771488736896e-05, "loss": 1.3535, "step": 1129 }, { "epoch": 0.33, "learning_rate": 6.240622782140308e-05, "loss": 1.3399, "step": 1130 }, { "epoch": 0.33, "learning_rate": 6.237472051007589e-05, "loss": 1.3423, "step": 1131 }, { "epoch": 0.33, "learning_rate": 6.234319298185606e-05, "loss": 1.3419, "step": 1132 }, { "epoch": 0.33, "learning_rate": 6.231164526523063e-05, "loss": 1.3864, "step": 1133 }, { "epoch": 0.33, "learning_rate": 6.228007738870484e-05, "loss": 1.3656, "step": 1134 }, { "epoch": 0.33, "learning_rate": 6.224848938080212e-05, "loss": 1.3756, "step": 1135 }, { "epoch": 0.33, "learning_rate": 6.221688127006415e-05, "loss": 1.4115, "step": 1136 }, { "epoch": 0.33, "learning_rate": 6.218525308505072e-05, "loss": 1.3585, "step": 1137 }, { "epoch": 0.33, "learning_rate": 6.21536048543398e-05, "loss": 1.3752, "step": 1138 }, { "epoch": 0.33, "learning_rate": 6.212193660652743e-05, "loss": 1.3487, "step": 1139 }, { "epoch": 0.33, "learning_rate": 6.209024837022774e-05, "loss": 1.3239, "step": 1140 }, { "epoch": 0.33, "learning_rate": 6.205854017407297e-05, "loss": 1.3535, "step": 1141 }, { "epoch": 0.34, "learning_rate": 6.202681204671338e-05, "loss": 1.3799, "step": 1142 }, { "epoch": 0.34, "learning_rate": 6.199506401681718e-05, "loss": 1.3443, "step": 1143 }, { "epoch": 0.34, "learning_rate": 6.196329611307062e-05, "loss": 1.3641, "step": 1144 }, { "epoch": 0.34, "learning_rate": 6.19315083641779e-05, "loss": 1.3903, "step": 1145 }, { "epoch": 0.34, "learning_rate": 6.189970079886115e-05, "loss": 1.3888, "step": 1146 }, { "epoch": 0.34, "learning_rate": 6.18678734458604e-05, "loss": 1.3682, "step": 1147 }, { "epoch": 0.34, "learning_rate": 6.183602633393352e-05, "loss": 1.3903, "step": 1148 }, { "epoch": 0.34, "learning_rate": 6.180415949185633e-05, "loss": 1.4066, "step": 1149 }, { "epoch": 0.34, "learning_rate": 6.17722729484224e-05, "loss": 1.3253, "step": 1150 }, { "epoch": 0.34, "learning_rate": 6.17403667324431e-05, "loss": 1.3374, "step": 1151 }, { "epoch": 0.34, "learning_rate": 6.170844087274761e-05, "loss": 1.3239, "step": 1152 }, { "epoch": 0.34, "learning_rate": 6.167649539818284e-05, "loss": 1.3604, "step": 1153 }, { "epoch": 0.34, "learning_rate": 6.164453033761345e-05, "loss": 1.3063, "step": 1154 }, { "epoch": 0.34, "learning_rate": 6.161254571992175e-05, "loss": 1.344, "step": 1155 }, { "epoch": 0.34, "learning_rate": 6.158054157400776e-05, "loss": 1.342, "step": 1156 }, { "epoch": 0.34, "learning_rate": 6.154851792878915e-05, "loss": 1.4195, "step": 1157 }, { "epoch": 0.34, "learning_rate": 6.151647481320114e-05, "loss": 1.3671, "step": 1158 }, { "epoch": 0.34, "learning_rate": 6.148441225619664e-05, "loss": 1.3076, "step": 1159 }, { "epoch": 0.34, "learning_rate": 6.145233028674605e-05, "loss": 1.3985, "step": 1160 }, { "epoch": 0.34, "learning_rate": 6.142022893383739e-05, "loss": 1.292, "step": 1161 }, { "epoch": 0.34, "learning_rate": 6.138810822647608e-05, "loss": 1.349, "step": 1162 }, { "epoch": 0.34, "learning_rate": 6.135596819368512e-05, "loss": 1.3283, "step": 1163 }, { "epoch": 0.34, "learning_rate": 6.132380886450497e-05, "loss": 1.3997, "step": 1164 }, { "epoch": 0.34, "learning_rate": 6.129163026799346e-05, "loss": 1.3227, "step": 1165 }, { "epoch": 0.34, "learning_rate": 6.125943243322588e-05, "loss": 1.3925, "step": 1166 }, { "epoch": 0.34, "learning_rate": 6.12272153892949e-05, "loss": 1.3542, "step": 1167 }, { "epoch": 0.34, "learning_rate": 6.119497916531053e-05, "loss": 1.3854, "step": 1168 }, { "epoch": 0.34, "learning_rate": 6.116272379040011e-05, "loss": 1.3879, "step": 1169 }, { "epoch": 0.34, "learning_rate": 6.113044929370832e-05, "loss": 1.361, "step": 1170 }, { "epoch": 0.34, "learning_rate": 6.109815570439707e-05, "loss": 1.3438, "step": 1171 }, { "epoch": 0.34, "learning_rate": 6.106584305164553e-05, "loss": 1.4087, "step": 1172 }, { "epoch": 0.34, "learning_rate": 6.103351136465014e-05, "loss": 1.3363, "step": 1173 }, { "epoch": 0.34, "learning_rate": 6.1001160672624464e-05, "loss": 1.3155, "step": 1174 }, { "epoch": 0.34, "learning_rate": 6.0968791004799316e-05, "loss": 1.3705, "step": 1175 }, { "epoch": 0.35, "learning_rate": 6.093640239042261e-05, "loss": 1.2851, "step": 1176 }, { "epoch": 0.35, "learning_rate": 6.090399485875937e-05, "loss": 1.3735, "step": 1177 }, { "epoch": 0.35, "learning_rate": 6.0871568439091743e-05, "loss": 1.3692, "step": 1178 }, { "epoch": 0.35, "learning_rate": 6.083912316071893e-05, "loss": 1.333, "step": 1179 }, { "epoch": 0.35, "learning_rate": 6.080665905295718e-05, "loss": 1.369, "step": 1180 }, { "epoch": 0.35, "learning_rate": 6.077417614513973e-05, "loss": 1.3416, "step": 1181 }, { "epoch": 0.35, "learning_rate": 6.0741674466616854e-05, "loss": 1.3586, "step": 1182 }, { "epoch": 0.35, "learning_rate": 6.070915404675571e-05, "loss": 1.4149, "step": 1183 }, { "epoch": 0.35, "learning_rate": 6.067661491494046e-05, "loss": 1.3259, "step": 1184 }, { "epoch": 0.35, "learning_rate": 6.064405710057215e-05, "loss": 1.3377, "step": 1185 }, { "epoch": 0.35, "learning_rate": 6.061148063306869e-05, "loss": 1.3291, "step": 1186 }, { "epoch": 0.35, "learning_rate": 6.0578885541864875e-05, "loss": 1.4016, "step": 1187 }, { "epoch": 0.35, "learning_rate": 6.0546271856412306e-05, "loss": 1.3682, "step": 1188 }, { "epoch": 0.35, "learning_rate": 6.051363960617938e-05, "loss": 1.3339, "step": 1189 }, { "epoch": 0.35, "learning_rate": 6.0480988820651306e-05, "loss": 1.3934, "step": 1190 }, { "epoch": 0.35, "learning_rate": 6.044831952932997e-05, "loss": 1.3039, "step": 1191 }, { "epoch": 0.35, "learning_rate": 6.041563176173406e-05, "loss": 1.2647, "step": 1192 }, { "epoch": 0.35, "learning_rate": 6.03829255473989e-05, "loss": 1.3321, "step": 1193 }, { "epoch": 0.35, "learning_rate": 6.035020091587651e-05, "loss": 1.3712, "step": 1194 }, { "epoch": 0.35, "learning_rate": 6.031745789673554e-05, "loss": 1.3317, "step": 1195 }, { "epoch": 0.35, "learning_rate": 6.028469651956128e-05, "loss": 1.3625, "step": 1196 }, { "epoch": 0.35, "learning_rate": 6.0251916813955554e-05, "loss": 1.3284, "step": 1197 }, { "epoch": 0.35, "learning_rate": 6.0219118809536794e-05, "loss": 1.3712, "step": 1198 }, { "epoch": 0.35, "learning_rate": 6.018630253593994e-05, "loss": 1.3322, "step": 1199 }, { "epoch": 0.35, "learning_rate": 6.015346802281647e-05, "loss": 1.368, "step": 1200 }, { "epoch": 0.35, "eval_loss": 0.12655386328697205, "eval_runtime": 24.7692, "eval_samples_per_second": 80.746, "eval_steps_per_second": 0.646, "step": 1200 }, { "epoch": 0.35, "learning_rate": 6.012061529983431e-05, "loss": 1.3421, "step": 1201 }, { "epoch": 0.35, "learning_rate": 6.008774439667786e-05, "loss": 1.3628, "step": 1202 }, { "epoch": 0.35, "learning_rate": 6.0054855343047914e-05, "loss": 1.3855, "step": 1203 }, { "epoch": 0.35, "learning_rate": 6.002194816866171e-05, "loss": 1.3642, "step": 1204 }, { "epoch": 0.35, "learning_rate": 5.998902290325283e-05, "loss": 1.3857, "step": 1205 }, { "epoch": 0.35, "learning_rate": 5.995607957657124e-05, "loss": 1.3355, "step": 1206 }, { "epoch": 0.35, "learning_rate": 5.992311821838316e-05, "loss": 1.3002, "step": 1207 }, { "epoch": 0.35, "learning_rate": 5.989013885847117e-05, "loss": 1.3445, "step": 1208 }, { "epoch": 0.35, "learning_rate": 5.985714152663406e-05, "loss": 1.3945, "step": 1209 }, { "epoch": 0.35, "learning_rate": 5.982412625268691e-05, "loss": 1.4002, "step": 1210 }, { "epoch": 0.36, "learning_rate": 5.979109306646097e-05, "loss": 1.3741, "step": 1211 }, { "epoch": 0.36, "learning_rate": 5.9758041997803685e-05, "loss": 1.341, "step": 1212 }, { "epoch": 0.36, "learning_rate": 5.972497307657869e-05, "loss": 1.4316, "step": 1213 }, { "epoch": 0.36, "learning_rate": 5.969188633266571e-05, "loss": 1.344, "step": 1214 }, { "epoch": 0.36, "learning_rate": 5.9658781795960574e-05, "loss": 1.307, "step": 1215 }, { "epoch": 0.36, "learning_rate": 5.9625659496375234e-05, "loss": 1.3425, "step": 1216 }, { "epoch": 0.36, "learning_rate": 5.959251946383763e-05, "loss": 1.3767, "step": 1217 }, { "epoch": 0.36, "learning_rate": 5.955936172829179e-05, "loss": 1.3464, "step": 1218 }, { "epoch": 0.36, "learning_rate": 5.952618631969766e-05, "loss": 1.4277, "step": 1219 }, { "epoch": 0.36, "learning_rate": 5.9492993268031225e-05, "loss": 1.3417, "step": 1220 }, { "epoch": 0.36, "learning_rate": 5.9459782603284384e-05, "loss": 1.3843, "step": 1221 }, { "epoch": 0.36, "learning_rate": 5.9426554355464944e-05, "loss": 1.352, "step": 1222 }, { "epoch": 0.36, "learning_rate": 5.939330855459661e-05, "loss": 1.3184, "step": 1223 }, { "epoch": 0.36, "learning_rate": 5.936004523071894e-05, "loss": 1.3992, "step": 1224 }, { "epoch": 0.36, "learning_rate": 5.932676441388731e-05, "loss": 1.3183, "step": 1225 }, { "epoch": 0.36, "learning_rate": 5.929346613417296e-05, "loss": 1.3492, "step": 1226 }, { "epoch": 0.36, "learning_rate": 5.926015042166283e-05, "loss": 1.38, "step": 1227 }, { "epoch": 0.36, "learning_rate": 5.922681730645968e-05, "loss": 1.3634, "step": 1228 }, { "epoch": 0.36, "learning_rate": 5.919346681868195e-05, "loss": 1.3223, "step": 1229 }, { "epoch": 0.36, "learning_rate": 5.91600989884638e-05, "loss": 1.3518, "step": 1230 }, { "epoch": 0.36, "learning_rate": 5.912671384595504e-05, "loss": 1.3404, "step": 1231 }, { "epoch": 0.36, "learning_rate": 5.909331142132114e-05, "loss": 1.3751, "step": 1232 }, { "epoch": 0.36, "learning_rate": 5.905989174474319e-05, "loss": 1.3541, "step": 1233 }, { "epoch": 0.36, "learning_rate": 5.902645484641785e-05, "loss": 1.3202, "step": 1234 }, { "epoch": 0.36, "learning_rate": 5.899300075655734e-05, "loss": 1.3419, "step": 1235 }, { "epoch": 0.36, "learning_rate": 5.895952950538944e-05, "loss": 1.3577, "step": 1236 }, { "epoch": 0.36, "learning_rate": 5.892604112315741e-05, "loss": 1.348, "step": 1237 }, { "epoch": 0.36, "learning_rate": 5.889253564011999e-05, "loss": 1.3128, "step": 1238 }, { "epoch": 0.36, "learning_rate": 5.8859013086551394e-05, "loss": 1.3995, "step": 1239 }, { "epoch": 0.36, "learning_rate": 5.882547349274125e-05, "loss": 1.333, "step": 1240 }, { "epoch": 0.36, "learning_rate": 5.879191688899456e-05, "loss": 1.3532, "step": 1241 }, { "epoch": 0.36, "learning_rate": 5.8758343305631734e-05, "loss": 1.3281, "step": 1242 }, { "epoch": 0.36, "learning_rate": 5.872475277298847e-05, "loss": 1.3446, "step": 1243 }, { "epoch": 0.36, "learning_rate": 5.869114532141586e-05, "loss": 1.3548, "step": 1244 }, { "epoch": 0.37, "learning_rate": 5.8657520981280195e-05, "loss": 1.3182, "step": 1245 }, { "epoch": 0.37, "learning_rate": 5.86238797829631e-05, "loss": 1.3321, "step": 1246 }, { "epoch": 0.37, "learning_rate": 5.8590221756861395e-05, "loss": 1.3825, "step": 1247 }, { "epoch": 0.37, "learning_rate": 5.855654693338711e-05, "loss": 1.3687, "step": 1248 }, { "epoch": 0.37, "learning_rate": 5.8522855342967434e-05, "loss": 1.3752, "step": 1249 }, { "epoch": 0.37, "learning_rate": 5.848914701604476e-05, "loss": 1.343, "step": 1250 }, { "epoch": 0.37, "learning_rate": 5.845542198307654e-05, "loss": 1.3619, "step": 1251 }, { "epoch": 0.37, "learning_rate": 5.8421680274535374e-05, "loss": 1.3333, "step": 1252 }, { "epoch": 0.37, "learning_rate": 5.838792192090889e-05, "loss": 1.3347, "step": 1253 }, { "epoch": 0.37, "learning_rate": 5.835414695269976e-05, "loss": 1.3822, "step": 1254 }, { "epoch": 0.37, "learning_rate": 5.8320355400425714e-05, "loss": 1.3525, "step": 1255 }, { "epoch": 0.37, "learning_rate": 5.82865472946194e-05, "loss": 1.3225, "step": 1256 }, { "epoch": 0.37, "learning_rate": 5.825272266582847e-05, "loss": 1.3568, "step": 1257 }, { "epoch": 0.37, "learning_rate": 5.821888154461549e-05, "loss": 1.3643, "step": 1258 }, { "epoch": 0.37, "learning_rate": 5.8185023961557916e-05, "loss": 1.3287, "step": 1259 }, { "epoch": 0.37, "learning_rate": 5.8151149947248106e-05, "loss": 1.35, "step": 1260 }, { "epoch": 0.37, "learning_rate": 5.8117259532293246e-05, "loss": 1.347, "step": 1261 }, { "epoch": 0.37, "learning_rate": 5.8083352747315345e-05, "loss": 1.367, "step": 1262 }, { "epoch": 0.37, "learning_rate": 5.8049429622951194e-05, "loss": 1.3318, "step": 1263 }, { "epoch": 0.37, "learning_rate": 5.8015490189852365e-05, "loss": 1.3634, "step": 1264 }, { "epoch": 0.37, "learning_rate": 5.798153447868516e-05, "loss": 1.3485, "step": 1265 }, { "epoch": 0.37, "learning_rate": 5.794756252013057e-05, "loss": 1.3954, "step": 1266 }, { "epoch": 0.37, "learning_rate": 5.7913574344884314e-05, "loss": 1.3475, "step": 1267 }, { "epoch": 0.37, "learning_rate": 5.7879569983656694e-05, "loss": 1.3377, "step": 1268 }, { "epoch": 0.37, "learning_rate": 5.78455494671727e-05, "loss": 1.3657, "step": 1269 }, { "epoch": 0.37, "learning_rate": 5.7811512826171894e-05, "loss": 1.3459, "step": 1270 }, { "epoch": 0.37, "learning_rate": 5.77774600914084e-05, "loss": 1.3223, "step": 1271 }, { "epoch": 0.37, "learning_rate": 5.7743391293650895e-05, "loss": 1.3277, "step": 1272 }, { "epoch": 0.37, "learning_rate": 5.770930646368257e-05, "loss": 1.3707, "step": 1273 }, { "epoch": 0.37, "learning_rate": 5.7675205632301096e-05, "loss": 1.3402, "step": 1274 }, { "epoch": 0.37, "learning_rate": 5.764108883031861e-05, "loss": 1.336, "step": 1275 }, { "epoch": 0.37, "learning_rate": 5.7606956088561664e-05, "loss": 1.3107, "step": 1276 }, { "epoch": 0.37, "learning_rate": 5.757280743787125e-05, "loss": 1.3793, "step": 1277 }, { "epoch": 0.37, "learning_rate": 5.7538642909102654e-05, "loss": 1.3571, "step": 1278 }, { "epoch": 0.38, "learning_rate": 5.750446253312562e-05, "loss": 1.3524, "step": 1279 }, { "epoch": 0.38, "learning_rate": 5.747026634082412e-05, "loss": 1.3535, "step": 1280 }, { "epoch": 0.38, "learning_rate": 5.743605436309645e-05, "loss": 1.3684, "step": 1281 }, { "epoch": 0.38, "learning_rate": 5.740182663085519e-05, "loss": 1.375, "step": 1282 }, { "epoch": 0.38, "learning_rate": 5.736758317502714e-05, "loss": 1.3781, "step": 1283 }, { "epoch": 0.38, "learning_rate": 5.7333324026553264e-05, "loss": 1.3732, "step": 1284 }, { "epoch": 0.38, "learning_rate": 5.729904921638877e-05, "loss": 1.3345, "step": 1285 }, { "epoch": 0.38, "learning_rate": 5.7264758775502986e-05, "loss": 1.3591, "step": 1286 }, { "epoch": 0.38, "learning_rate": 5.723045273487937e-05, "loss": 1.3373, "step": 1287 }, { "epoch": 0.38, "learning_rate": 5.719613112551546e-05, "loss": 1.3948, "step": 1288 }, { "epoch": 0.38, "learning_rate": 5.7161793978422895e-05, "loss": 1.3502, "step": 1289 }, { "epoch": 0.38, "learning_rate": 5.712744132462732e-05, "loss": 1.364, "step": 1290 }, { "epoch": 0.38, "learning_rate": 5.709307319516841e-05, "loss": 1.3257, "step": 1291 }, { "epoch": 0.38, "learning_rate": 5.7058689621099803e-05, "loss": 1.3829, "step": 1292 }, { "epoch": 0.38, "learning_rate": 5.702429063348912e-05, "loss": 1.3489, "step": 1293 }, { "epoch": 0.38, "learning_rate": 5.6989876263417884e-05, "loss": 1.3713, "step": 1294 }, { "epoch": 0.38, "learning_rate": 5.6955446541981546e-05, "loss": 1.3407, "step": 1295 }, { "epoch": 0.38, "learning_rate": 5.69210015002894e-05, "loss": 1.3283, "step": 1296 }, { "epoch": 0.38, "learning_rate": 5.688654116946459e-05, "loss": 1.3342, "step": 1297 }, { "epoch": 0.38, "learning_rate": 5.685206558064407e-05, "loss": 1.3356, "step": 1298 }, { "epoch": 0.38, "learning_rate": 5.6817574764978594e-05, "loss": 1.3943, "step": 1299 }, { "epoch": 0.38, "learning_rate": 5.678306875363268e-05, "loss": 1.3543, "step": 1300 }, { "epoch": 0.38, "eval_loss": 0.12639901041984558, "eval_runtime": 24.668, "eval_samples_per_second": 81.077, "eval_steps_per_second": 0.649, "step": 1300 }, { "epoch": 0.38, "learning_rate": 5.6748547577784554e-05, "loss": 1.3064, "step": 1301 }, { "epoch": 0.38, "learning_rate": 5.671401126862615e-05, "loss": 1.3409, "step": 1302 }, { "epoch": 0.38, "learning_rate": 5.667945985736308e-05, "loss": 1.4306, "step": 1303 }, { "epoch": 0.38, "learning_rate": 5.664489337521463e-05, "loss": 1.3559, "step": 1304 }, { "epoch": 0.38, "learning_rate": 5.661031185341363e-05, "loss": 1.3144, "step": 1305 }, { "epoch": 0.38, "learning_rate": 5.657571532320658e-05, "loss": 1.375, "step": 1306 }, { "epoch": 0.38, "learning_rate": 5.6541103815853496e-05, "loss": 1.3723, "step": 1307 }, { "epoch": 0.38, "learning_rate": 5.6506477362627926e-05, "loss": 1.3533, "step": 1308 }, { "epoch": 0.38, "learning_rate": 5.647183599481694e-05, "loss": 1.2937, "step": 1309 }, { "epoch": 0.38, "learning_rate": 5.6437179743721075e-05, "loss": 1.3527, "step": 1310 }, { "epoch": 0.38, "learning_rate": 5.640250864065434e-05, "loss": 1.3708, "step": 1311 }, { "epoch": 0.38, "learning_rate": 5.6367822716944095e-05, "loss": 1.3469, "step": 1312 }, { "epoch": 0.39, "learning_rate": 5.6333122003931186e-05, "loss": 1.3701, "step": 1313 }, { "epoch": 0.39, "learning_rate": 5.629840653296974e-05, "loss": 1.3732, "step": 1314 }, { "epoch": 0.39, "learning_rate": 5.626367633542728e-05, "loss": 1.3294, "step": 1315 }, { "epoch": 0.39, "learning_rate": 5.622893144268458e-05, "loss": 1.3438, "step": 1316 }, { "epoch": 0.39, "learning_rate": 5.6194171886135754e-05, "loss": 1.3662, "step": 1317 }, { "epoch": 0.39, "learning_rate": 5.615939769718809e-05, "loss": 1.3315, "step": 1318 }, { "epoch": 0.39, "learning_rate": 5.612460890726216e-05, "loss": 1.352, "step": 1319 }, { "epoch": 0.39, "learning_rate": 5.608980554779171e-05, "loss": 1.3524, "step": 1320 }, { "epoch": 0.39, "learning_rate": 5.605498765022366e-05, "loss": 1.3169, "step": 1321 }, { "epoch": 0.39, "learning_rate": 5.602015524601802e-05, "loss": 1.3533, "step": 1322 }, { "epoch": 0.39, "learning_rate": 5.5985308366647985e-05, "loss": 1.3195, "step": 1323 }, { "epoch": 0.39, "learning_rate": 5.595044704359974e-05, "loss": 1.3552, "step": 1324 }, { "epoch": 0.39, "learning_rate": 5.59155713083726e-05, "loss": 1.3847, "step": 1325 }, { "epoch": 0.39, "learning_rate": 5.588068119247886e-05, "loss": 1.3543, "step": 1326 }, { "epoch": 0.39, "learning_rate": 5.584577672744382e-05, "loss": 1.33, "step": 1327 }, { "epoch": 0.39, "learning_rate": 5.5810857944805744e-05, "loss": 1.3055, "step": 1328 }, { "epoch": 0.39, "learning_rate": 5.577592487611581e-05, "loss": 1.3662, "step": 1329 }, { "epoch": 0.39, "learning_rate": 5.574097755293816e-05, "loss": 1.3242, "step": 1330 }, { "epoch": 0.39, "learning_rate": 5.570601600684974e-05, "loss": 1.3101, "step": 1331 }, { "epoch": 0.39, "learning_rate": 5.567104026944044e-05, "loss": 1.4008, "step": 1332 }, { "epoch": 0.39, "learning_rate": 5.5636050372312896e-05, "loss": 1.3579, "step": 1333 }, { "epoch": 0.39, "learning_rate": 5.560104634708254e-05, "loss": 1.3676, "step": 1334 }, { "epoch": 0.39, "learning_rate": 5.5566028225377645e-05, "loss": 1.325, "step": 1335 }, { "epoch": 0.39, "learning_rate": 5.553099603883913e-05, "loss": 1.3566, "step": 1336 }, { "epoch": 0.39, "learning_rate": 5.5495949819120676e-05, "loss": 1.3672, "step": 1337 }, { "epoch": 0.39, "learning_rate": 5.546088959788862e-05, "loss": 1.3204, "step": 1338 }, { "epoch": 0.39, "learning_rate": 5.5425815406821984e-05, "loss": 1.3678, "step": 1339 }, { "epoch": 0.39, "learning_rate": 5.539072727761238e-05, "loss": 1.3447, "step": 1340 }, { "epoch": 0.39, "learning_rate": 5.535562524196399e-05, "loss": 1.3748, "step": 1341 }, { "epoch": 0.39, "learning_rate": 5.5320509331593644e-05, "loss": 1.3437, "step": 1342 }, { "epoch": 0.39, "learning_rate": 5.528537957823061e-05, "loss": 1.3259, "step": 1343 }, { "epoch": 0.39, "learning_rate": 5.5250236013616766e-05, "loss": 1.3488, "step": 1344 }, { "epoch": 0.39, "learning_rate": 5.521507866950636e-05, "loss": 1.3266, "step": 1345 }, { "epoch": 0.39, "learning_rate": 5.517990757766619e-05, "loss": 1.3167, "step": 1346 }, { "epoch": 0.4, "learning_rate": 5.51447227698754e-05, "loss": 1.3166, "step": 1347 }, { "epoch": 0.4, "learning_rate": 5.510952427792559e-05, "loss": 1.4062, "step": 1348 }, { "epoch": 0.4, "learning_rate": 5.507431213362067e-05, "loss": 1.3962, "step": 1349 }, { "epoch": 0.4, "learning_rate": 5.5039086368776917e-05, "loss": 1.3542, "step": 1350 }, { "epoch": 0.4, "learning_rate": 5.500384701522291e-05, "loss": 1.3419, "step": 1351 }, { "epoch": 0.4, "learning_rate": 5.4968594104799515e-05, "loss": 1.3706, "step": 1352 }, { "epoch": 0.4, "learning_rate": 5.493332766935981e-05, "loss": 1.3761, "step": 1353 }, { "epoch": 0.4, "learning_rate": 5.489804774076914e-05, "loss": 1.3292, "step": 1354 }, { "epoch": 0.4, "learning_rate": 5.4862754350905e-05, "loss": 1.3335, "step": 1355 }, { "epoch": 0.4, "learning_rate": 5.48274475316571e-05, "loss": 1.3948, "step": 1356 }, { "epoch": 0.4, "learning_rate": 5.479212731492723e-05, "loss": 1.3258, "step": 1357 }, { "epoch": 0.4, "learning_rate": 5.4756793732629335e-05, "loss": 1.3009, "step": 1358 }, { "epoch": 0.4, "learning_rate": 5.472144681668938e-05, "loss": 1.3647, "step": 1359 }, { "epoch": 0.4, "learning_rate": 5.4686086599045424e-05, "loss": 1.3574, "step": 1360 }, { "epoch": 0.4, "learning_rate": 5.465071311164753e-05, "loss": 1.3108, "step": 1361 }, { "epoch": 0.4, "learning_rate": 5.4615326386457755e-05, "loss": 1.36, "step": 1362 }, { "epoch": 0.4, "learning_rate": 5.45799264554501e-05, "loss": 1.3399, "step": 1363 }, { "epoch": 0.4, "learning_rate": 5.4544513350610527e-05, "loss": 1.355, "step": 1364 }, { "epoch": 0.4, "learning_rate": 5.4509087103936866e-05, "loss": 1.3424, "step": 1365 }, { "epoch": 0.4, "learning_rate": 5.447364774743887e-05, "loss": 1.3227, "step": 1366 }, { "epoch": 0.4, "learning_rate": 5.443819531313807e-05, "loss": 1.3209, "step": 1367 }, { "epoch": 0.4, "learning_rate": 5.440272983306789e-05, "loss": 1.3286, "step": 1368 }, { "epoch": 0.4, "learning_rate": 5.436725133927347e-05, "loss": 1.3217, "step": 1369 }, { "epoch": 0.4, "learning_rate": 5.4331759863811764e-05, "loss": 1.2709, "step": 1370 }, { "epoch": 0.4, "learning_rate": 5.429625543875143e-05, "loss": 1.3699, "step": 1371 }, { "epoch": 0.4, "learning_rate": 5.426073809617284e-05, "loss": 1.2988, "step": 1372 }, { "epoch": 0.4, "learning_rate": 5.4225207868167994e-05, "loss": 1.3489, "step": 1373 }, { "epoch": 0.4, "learning_rate": 5.418966478684061e-05, "loss": 1.3632, "step": 1374 }, { "epoch": 0.4, "learning_rate": 5.415410888430595e-05, "loss": 1.3064, "step": 1375 }, { "epoch": 0.4, "learning_rate": 5.4118540192690905e-05, "loss": 1.3105, "step": 1376 }, { "epoch": 0.4, "learning_rate": 5.408295874413389e-05, "loss": 1.3146, "step": 1377 }, { "epoch": 0.4, "learning_rate": 5.404736457078489e-05, "loss": 1.3923, "step": 1378 }, { "epoch": 0.4, "learning_rate": 5.401175770480534e-05, "loss": 1.3058, "step": 1379 }, { "epoch": 0.4, "learning_rate": 5.397613817836818e-05, "loss": 1.3104, "step": 1380 }, { "epoch": 0.41, "learning_rate": 5.3940506023657745e-05, "loss": 1.3358, "step": 1381 }, { "epoch": 0.41, "learning_rate": 5.390486127286984e-05, "loss": 1.377, "step": 1382 }, { "epoch": 0.41, "learning_rate": 5.38692039582116e-05, "loss": 1.2862, "step": 1383 }, { "epoch": 0.41, "learning_rate": 5.3833534111901555e-05, "loss": 1.3436, "step": 1384 }, { "epoch": 0.41, "learning_rate": 5.379785176616951e-05, "loss": 1.3288, "step": 1385 }, { "epoch": 0.41, "learning_rate": 5.3762156953256596e-05, "loss": 1.376, "step": 1386 }, { "epoch": 0.41, "learning_rate": 5.3726449705415214e-05, "loss": 1.2981, "step": 1387 }, { "epoch": 0.41, "learning_rate": 5.3690730054908985e-05, "loss": 1.3244, "step": 1388 }, { "epoch": 0.41, "learning_rate": 5.365499803401273e-05, "loss": 1.3828, "step": 1389 }, { "epoch": 0.41, "learning_rate": 5.361925367501248e-05, "loss": 1.3495, "step": 1390 }, { "epoch": 0.41, "learning_rate": 5.3583497010205356e-05, "loss": 1.2911, "step": 1391 }, { "epoch": 0.41, "learning_rate": 5.354772807189967e-05, "loss": 1.3158, "step": 1392 }, { "epoch": 0.41, "learning_rate": 5.3511946892414775e-05, "loss": 1.3606, "step": 1393 }, { "epoch": 0.41, "learning_rate": 5.34761535040811e-05, "loss": 1.3096, "step": 1394 }, { "epoch": 0.41, "learning_rate": 5.3440347939240086e-05, "loss": 1.3594, "step": 1395 }, { "epoch": 0.41, "learning_rate": 5.340453023024422e-05, "loss": 1.3525, "step": 1396 }, { "epoch": 0.41, "learning_rate": 5.336870040945691e-05, "loss": 1.3859, "step": 1397 }, { "epoch": 0.41, "learning_rate": 5.333285850925256e-05, "loss": 1.3238, "step": 1398 }, { "epoch": 0.41, "learning_rate": 5.3297004562016436e-05, "loss": 1.3199, "step": 1399 }, { "epoch": 0.41, "learning_rate": 5.3261138600144745e-05, "loss": 1.3119, "step": 1400 }, { "epoch": 0.41, "eval_loss": 0.12737244367599487, "eval_runtime": 24.4081, "eval_samples_per_second": 81.94, "eval_steps_per_second": 0.656, "step": 1400 }, { "epoch": 0.41, "learning_rate": 5.3225260656044484e-05, "loss": 1.3797, "step": 1401 }, { "epoch": 0.41, "learning_rate": 5.318937076213354e-05, "loss": 1.2837, "step": 1402 }, { "epoch": 0.41, "learning_rate": 5.315346895084056e-05, "loss": 1.3252, "step": 1403 }, { "epoch": 0.41, "learning_rate": 5.3117555254604956e-05, "loss": 1.4006, "step": 1404 }, { "epoch": 0.41, "learning_rate": 5.3081629705876915e-05, "loss": 1.3279, "step": 1405 }, { "epoch": 0.41, "learning_rate": 5.3045692337117294e-05, "loss": 1.3905, "step": 1406 }, { "epoch": 0.41, "learning_rate": 5.300974318079766e-05, "loss": 1.3005, "step": 1407 }, { "epoch": 0.41, "learning_rate": 5.297378226940019e-05, "loss": 1.3374, "step": 1408 }, { "epoch": 0.41, "learning_rate": 5.293780963541775e-05, "loss": 1.302, "step": 1409 }, { "epoch": 0.41, "learning_rate": 5.290182531135372e-05, "loss": 1.3665, "step": 1410 }, { "epoch": 0.41, "learning_rate": 5.2865829329722093e-05, "loss": 1.3417, "step": 1411 }, { "epoch": 0.41, "learning_rate": 5.2829821723047386e-05, "loss": 1.3647, "step": 1412 }, { "epoch": 0.41, "learning_rate": 5.279380252386461e-05, "loss": 1.329, "step": 1413 }, { "epoch": 0.41, "learning_rate": 5.275777176471927e-05, "loss": 1.324, "step": 1414 }, { "epoch": 0.42, "learning_rate": 5.272172947816729e-05, "loss": 1.3447, "step": 1415 }, { "epoch": 0.42, "learning_rate": 5.268567569677502e-05, "loss": 1.3662, "step": 1416 }, { "epoch": 0.42, "learning_rate": 5.264961045311921e-05, "loss": 1.3524, "step": 1417 }, { "epoch": 0.42, "learning_rate": 5.2613533779786945e-05, "loss": 1.2874, "step": 1418 }, { "epoch": 0.42, "learning_rate": 5.2577445709375666e-05, "loss": 1.4179, "step": 1419 }, { "epoch": 0.42, "learning_rate": 5.2541346274493065e-05, "loss": 1.3462, "step": 1420 }, { "epoch": 0.42, "learning_rate": 5.250523550775716e-05, "loss": 1.3295, "step": 1421 }, { "epoch": 0.42, "learning_rate": 5.246911344179616e-05, "loss": 1.3016, "step": 1422 }, { "epoch": 0.42, "learning_rate": 5.243298010924852e-05, "loss": 1.3403, "step": 1423 }, { "epoch": 0.42, "learning_rate": 5.239683554276283e-05, "loss": 1.3318, "step": 1424 }, { "epoch": 0.42, "learning_rate": 5.23606797749979e-05, "loss": 1.2756, "step": 1425 }, { "epoch": 0.42, "learning_rate": 5.232451283862259e-05, "loss": 1.2976, "step": 1426 }, { "epoch": 0.42, "learning_rate": 5.2288334766315896e-05, "loss": 1.3963, "step": 1427 }, { "epoch": 0.42, "learning_rate": 5.225214559076683e-05, "loss": 1.3155, "step": 1428 }, { "epoch": 0.42, "learning_rate": 5.221594534467452e-05, "loss": 1.3423, "step": 1429 }, { "epoch": 0.42, "learning_rate": 5.217973406074798e-05, "loss": 1.3264, "step": 1430 }, { "epoch": 0.42, "learning_rate": 5.2143511771706324e-05, "loss": 1.344, "step": 1431 }, { "epoch": 0.42, "learning_rate": 5.210727851027851e-05, "loss": 1.3426, "step": 1432 }, { "epoch": 0.42, "learning_rate": 5.207103430920345e-05, "loss": 1.3934, "step": 1433 }, { "epoch": 0.42, "learning_rate": 5.203477920122996e-05, "loss": 1.355, "step": 1434 }, { "epoch": 0.42, "learning_rate": 5.199851321911667e-05, "loss": 1.3506, "step": 1435 }, { "epoch": 0.42, "learning_rate": 5.1962236395632055e-05, "loss": 1.3267, "step": 1436 }, { "epoch": 0.42, "learning_rate": 5.1925948763554396e-05, "loss": 1.2905, "step": 1437 }, { "epoch": 0.42, "learning_rate": 5.1889650355671725e-05, "loss": 1.3592, "step": 1438 }, { "epoch": 0.42, "learning_rate": 5.1853341204781815e-05, "loss": 1.3158, "step": 1439 }, { "epoch": 0.42, "learning_rate": 5.1817021343692156e-05, "loss": 1.29, "step": 1440 }, { "epoch": 0.42, "learning_rate": 5.178069080521989e-05, "loss": 1.3404, "step": 1441 }, { "epoch": 0.42, "learning_rate": 5.174434962219182e-05, "loss": 1.388, "step": 1442 }, { "epoch": 0.42, "learning_rate": 5.1707997827444394e-05, "loss": 1.3279, "step": 1443 }, { "epoch": 0.42, "learning_rate": 5.167163545382359e-05, "loss": 1.2866, "step": 1444 }, { "epoch": 0.42, "learning_rate": 5.163526253418501e-05, "loss": 1.3182, "step": 1445 }, { "epoch": 0.42, "learning_rate": 5.1598879101393734e-05, "loss": 1.3818, "step": 1446 }, { "epoch": 0.42, "learning_rate": 5.156248518832438e-05, "loss": 1.3141, "step": 1447 }, { "epoch": 0.42, "learning_rate": 5.152608082786098e-05, "loss": 1.3134, "step": 1448 }, { "epoch": 0.43, "learning_rate": 5.1489666052897075e-05, "loss": 1.4078, "step": 1449 }, { "epoch": 0.43, "learning_rate": 5.1453240896335564e-05, "loss": 1.333, "step": 1450 }, { "epoch": 0.43, "learning_rate": 5.1416805391088765e-05, "loss": 1.3424, "step": 1451 }, { "epoch": 0.43, "learning_rate": 5.13803595700783e-05, "loss": 1.2766, "step": 1452 }, { "epoch": 0.43, "learning_rate": 5.1343903466235174e-05, "loss": 1.3573, "step": 1453 }, { "epoch": 0.43, "learning_rate": 5.130743711249961e-05, "loss": 1.3733, "step": 1454 }, { "epoch": 0.43, "learning_rate": 5.127096054182116e-05, "loss": 1.372, "step": 1455 }, { "epoch": 0.43, "learning_rate": 5.123447378715856e-05, "loss": 1.3083, "step": 1456 }, { "epoch": 0.43, "learning_rate": 5.1197976881479786e-05, "loss": 1.3941, "step": 1457 }, { "epoch": 0.43, "learning_rate": 5.116146985776194e-05, "loss": 1.3385, "step": 1458 }, { "epoch": 0.43, "learning_rate": 5.112495274899131e-05, "loss": 1.3056, "step": 1459 }, { "epoch": 0.43, "learning_rate": 5.108842558816327e-05, "loss": 1.3337, "step": 1460 }, { "epoch": 0.43, "learning_rate": 5.10518884082823e-05, "loss": 1.3294, "step": 1461 }, { "epoch": 0.43, "learning_rate": 5.101534124236189e-05, "loss": 1.3105, "step": 1462 }, { "epoch": 0.43, "learning_rate": 5.0978784123424617e-05, "loss": 1.2798, "step": 1463 }, { "epoch": 0.43, "learning_rate": 5.094221708450199e-05, "loss": 1.345, "step": 1464 }, { "epoch": 0.43, "learning_rate": 5.090564015863452e-05, "loss": 1.339, "step": 1465 }, { "epoch": 0.43, "learning_rate": 5.086905337887162e-05, "loss": 1.2971, "step": 1466 }, { "epoch": 0.43, "learning_rate": 5.083245677827165e-05, "loss": 1.3337, "step": 1467 }, { "epoch": 0.43, "learning_rate": 5.0795850389901784e-05, "loss": 1.4053, "step": 1468 }, { "epoch": 0.43, "learning_rate": 5.075923424683811e-05, "loss": 1.2797, "step": 1469 }, { "epoch": 0.43, "learning_rate": 5.0722608382165473e-05, "loss": 1.3172, "step": 1470 }, { "epoch": 0.43, "learning_rate": 5.068597282897754e-05, "loss": 1.3616, "step": 1471 }, { "epoch": 0.43, "learning_rate": 5.06493276203767e-05, "loss": 1.3528, "step": 1472 }, { "epoch": 0.43, "learning_rate": 5.061267278947408e-05, "loss": 1.3289, "step": 1473 }, { "epoch": 0.43, "learning_rate": 5.05760083693895e-05, "loss": 1.2941, "step": 1474 }, { "epoch": 0.43, "learning_rate": 5.053933439325147e-05, "loss": 1.3064, "step": 1475 }, { "epoch": 0.43, "learning_rate": 5.050265089419709e-05, "loss": 1.3308, "step": 1476 }, { "epoch": 0.43, "learning_rate": 5.04659579053721e-05, "loss": 1.3564, "step": 1477 }, { "epoch": 0.43, "learning_rate": 5.042925545993079e-05, "loss": 1.2551, "step": 1478 }, { "epoch": 0.43, "learning_rate": 5.039254359103602e-05, "loss": 1.2842, "step": 1479 }, { "epoch": 0.43, "learning_rate": 5.035582233185914e-05, "loss": 1.3351, "step": 1480 }, { "epoch": 0.43, "learning_rate": 5.031909171557998e-05, "loss": 1.389, "step": 1481 }, { "epoch": 0.43, "learning_rate": 5.0282351775386866e-05, "loss": 1.3236, "step": 1482 }, { "epoch": 0.44, "learning_rate": 5.02456025444765e-05, "loss": 1.3634, "step": 1483 }, { "epoch": 0.44, "learning_rate": 5.0208844056054025e-05, "loss": 1.3284, "step": 1484 }, { "epoch": 0.44, "learning_rate": 5.0172076343332914e-05, "loss": 1.329, "step": 1485 }, { "epoch": 0.44, "learning_rate": 5.013529943953497e-05, "loss": 1.3316, "step": 1486 }, { "epoch": 0.44, "learning_rate": 5.0098513377890336e-05, "loss": 1.3781, "step": 1487 }, { "epoch": 0.44, "learning_rate": 5.0061718191637394e-05, "loss": 1.3008, "step": 1488 }, { "epoch": 0.44, "learning_rate": 5.0024913914022796e-05, "loss": 1.292, "step": 1489 }, { "epoch": 0.44, "learning_rate": 4.99881005783014e-05, "loss": 1.3426, "step": 1490 }, { "epoch": 0.44, "learning_rate": 4.995127821773625e-05, "loss": 1.3207, "step": 1491 }, { "epoch": 0.44, "learning_rate": 4.991444686559853e-05, "loss": 1.3345, "step": 1492 }, { "epoch": 0.44, "learning_rate": 4.987760655516757e-05, "loss": 1.3345, "step": 1493 }, { "epoch": 0.44, "learning_rate": 4.984075731973079e-05, "loss": 1.3544, "step": 1494 }, { "epoch": 0.44, "learning_rate": 4.980389919258364e-05, "loss": 1.3371, "step": 1495 }, { "epoch": 0.44, "learning_rate": 4.976703220702967e-05, "loss": 1.3507, "step": 1496 }, { "epoch": 0.44, "learning_rate": 4.973015639638037e-05, "loss": 1.2927, "step": 1497 }, { "epoch": 0.44, "learning_rate": 4.9693271793955255e-05, "loss": 1.3525, "step": 1498 }, { "epoch": 0.44, "learning_rate": 4.965637843308173e-05, "loss": 1.3101, "step": 1499 }, { "epoch": 0.44, "learning_rate": 4.961947634709518e-05, "loss": 1.3267, "step": 1500 }, { "epoch": 0.44, "eval_loss": 0.1289711445569992, "eval_runtime": 24.6907, "eval_samples_per_second": 81.002, "eval_steps_per_second": 0.648, "step": 1500 }, { "epoch": 0.44, "learning_rate": 4.9582565569338806e-05, "loss": 1.3406, "step": 1501 }, { "epoch": 0.44, "learning_rate": 4.954564613316371e-05, "loss": 1.318, "step": 1502 }, { "epoch": 0.44, "learning_rate": 4.95087180719288e-05, "loss": 1.3414, "step": 1503 }, { "epoch": 0.44, "learning_rate": 4.947178141900079e-05, "loss": 1.3382, "step": 1504 }, { "epoch": 0.44, "learning_rate": 4.9434836207754104e-05, "loss": 1.3201, "step": 1505 }, { "epoch": 0.44, "learning_rate": 4.939788247157099e-05, "loss": 1.3079, "step": 1506 }, { "epoch": 0.44, "learning_rate": 4.936092024384132e-05, "loss": 1.2724, "step": 1507 }, { "epoch": 0.44, "learning_rate": 4.9323949557962684e-05, "loss": 1.2962, "step": 1508 }, { "epoch": 0.44, "learning_rate": 4.928697044734029e-05, "loss": 1.2819, "step": 1509 }, { "epoch": 0.44, "learning_rate": 4.924998294538697e-05, "loss": 1.3792, "step": 1510 }, { "epoch": 0.44, "learning_rate": 4.9212987085523123e-05, "loss": 1.2954, "step": 1511 }, { "epoch": 0.44, "learning_rate": 4.917598290117674e-05, "loss": 1.3168, "step": 1512 }, { "epoch": 0.44, "learning_rate": 4.913897042578327e-05, "loss": 1.3593, "step": 1513 }, { "epoch": 0.44, "learning_rate": 4.910194969278573e-05, "loss": 1.3198, "step": 1514 }, { "epoch": 0.44, "learning_rate": 4.906492073563452e-05, "loss": 1.365, "step": 1515 }, { "epoch": 0.44, "learning_rate": 4.902788358778753e-05, "loss": 1.3292, "step": 1516 }, { "epoch": 0.45, "learning_rate": 4.8990838282710015e-05, "loss": 1.4016, "step": 1517 }, { "epoch": 0.45, "learning_rate": 4.8953784853874624e-05, "loss": 1.3013, "step": 1518 }, { "epoch": 0.45, "learning_rate": 4.8916723334761334e-05, "loss": 1.3434, "step": 1519 }, { "epoch": 0.45, "learning_rate": 4.887965375885744e-05, "loss": 1.3012, "step": 1520 }, { "epoch": 0.45, "learning_rate": 4.88425761596575e-05, "loss": 1.3854, "step": 1521 }, { "epoch": 0.45, "learning_rate": 4.880549057066335e-05, "loss": 1.3082, "step": 1522 }, { "epoch": 0.45, "learning_rate": 4.8768397025383996e-05, "loss": 1.3121, "step": 1523 }, { "epoch": 0.45, "learning_rate": 4.873129555733571e-05, "loss": 1.334, "step": 1524 }, { "epoch": 0.45, "learning_rate": 4.869418620004184e-05, "loss": 1.3792, "step": 1525 }, { "epoch": 0.45, "learning_rate": 4.865706898703293e-05, "loss": 1.3608, "step": 1526 }, { "epoch": 0.45, "learning_rate": 4.861994395184657e-05, "loss": 1.3345, "step": 1527 }, { "epoch": 0.45, "learning_rate": 4.858281112802745e-05, "loss": 1.3449, "step": 1528 }, { "epoch": 0.45, "learning_rate": 4.8545670549127295e-05, "loss": 1.3325, "step": 1529 }, { "epoch": 0.45, "learning_rate": 4.8508522248704816e-05, "loss": 1.3026, "step": 1530 }, { "epoch": 0.45, "learning_rate": 4.8471366260325715e-05, "loss": 1.3181, "step": 1531 }, { "epoch": 0.45, "learning_rate": 4.843420261756266e-05, "loss": 1.3538, "step": 1532 }, { "epoch": 0.45, "learning_rate": 4.83970313539952e-05, "loss": 1.3219, "step": 1533 }, { "epoch": 0.45, "learning_rate": 4.8359852503209785e-05, "loss": 1.2695, "step": 1534 }, { "epoch": 0.45, "learning_rate": 4.832266609879972e-05, "loss": 1.3187, "step": 1535 }, { "epoch": 0.45, "learning_rate": 4.828547217436515e-05, "loss": 1.3121, "step": 1536 }, { "epoch": 0.45, "learning_rate": 4.824827076351298e-05, "loss": 1.2867, "step": 1537 }, { "epoch": 0.45, "learning_rate": 4.821106189985693e-05, "loss": 1.3149, "step": 1538 }, { "epoch": 0.45, "learning_rate": 4.817384561701738e-05, "loss": 1.3152, "step": 1539 }, { "epoch": 0.45, "learning_rate": 4.8136621948621525e-05, "loss": 1.3443, "step": 1540 }, { "epoch": 0.45, "learning_rate": 4.809939092830309e-05, "loss": 1.348, "step": 1541 }, { "epoch": 0.45, "learning_rate": 4.8062152589702573e-05, "loss": 1.3102, "step": 1542 }, { "epoch": 0.45, "learning_rate": 4.8024906966467e-05, "loss": 1.301, "step": 1543 }, { "epoch": 0.45, "learning_rate": 4.798765409225004e-05, "loss": 1.3714, "step": 1544 }, { "epoch": 0.45, "learning_rate": 4.7950394000711844e-05, "loss": 1.2528, "step": 1545 }, { "epoch": 0.45, "learning_rate": 4.7913126725519164e-05, "loss": 1.3302, "step": 1546 }, { "epoch": 0.45, "learning_rate": 4.787585230034518e-05, "loss": 1.3359, "step": 1547 }, { "epoch": 0.45, "learning_rate": 4.783857075886956e-05, "loss": 1.3627, "step": 1548 }, { "epoch": 0.45, "learning_rate": 4.7801282134778395e-05, "loss": 1.3164, "step": 1549 }, { "epoch": 0.45, "learning_rate": 4.7763986461764186e-05, "loss": 1.3441, "step": 1550 }, { "epoch": 0.46, "learning_rate": 4.772668377352579e-05, "loss": 1.3688, "step": 1551 }, { "epoch": 0.46, "learning_rate": 4.76893741037684e-05, "loss": 1.3072, "step": 1552 }, { "epoch": 0.46, "learning_rate": 4.7652057486203525e-05, "loss": 1.3128, "step": 1553 }, { "epoch": 0.46, "learning_rate": 4.7614733954548974e-05, "loss": 1.3613, "step": 1554 }, { "epoch": 0.46, "learning_rate": 4.757740354252876e-05, "loss": 1.3199, "step": 1555 }, { "epoch": 0.46, "learning_rate": 4.7540066283873134e-05, "loss": 1.3488, "step": 1556 }, { "epoch": 0.46, "learning_rate": 4.750272221231854e-05, "loss": 1.3038, "step": 1557 }, { "epoch": 0.46, "learning_rate": 4.746537136160757e-05, "loss": 1.348, "step": 1558 }, { "epoch": 0.46, "learning_rate": 4.7428013765488946e-05, "loss": 1.3155, "step": 1559 }, { "epoch": 0.46, "learning_rate": 4.7390649457717476e-05, "loss": 1.3273, "step": 1560 }, { "epoch": 0.46, "learning_rate": 4.735327847205402e-05, "loss": 1.3423, "step": 1561 }, { "epoch": 0.46, "learning_rate": 4.731590084226552e-05, "loss": 1.3632, "step": 1562 }, { "epoch": 0.46, "learning_rate": 4.727851660212487e-05, "loss": 1.2966, "step": 1563 }, { "epoch": 0.46, "learning_rate": 4.724112578541098e-05, "loss": 1.3457, "step": 1564 }, { "epoch": 0.46, "learning_rate": 4.7203728425908644e-05, "loss": 1.2946, "step": 1565 }, { "epoch": 0.46, "learning_rate": 4.716632455740865e-05, "loss": 1.3452, "step": 1566 }, { "epoch": 0.46, "learning_rate": 4.712891421370757e-05, "loss": 1.3059, "step": 1567 }, { "epoch": 0.46, "learning_rate": 4.709149742860792e-05, "loss": 1.3404, "step": 1568 }, { "epoch": 0.46, "learning_rate": 4.705407423591797e-05, "loss": 1.3027, "step": 1569 }, { "epoch": 0.46, "learning_rate": 4.7016644669451806e-05, "loss": 1.3268, "step": 1570 }, { "epoch": 0.46, "learning_rate": 4.697920876302928e-05, "loss": 1.3423, "step": 1571 }, { "epoch": 0.46, "learning_rate": 4.694176655047596e-05, "loss": 1.3156, "step": 1572 }, { "epoch": 0.46, "learning_rate": 4.690431806562311e-05, "loss": 1.3373, "step": 1573 }, { "epoch": 0.46, "learning_rate": 4.686686334230766e-05, "loss": 1.3333, "step": 1574 }, { "epoch": 0.46, "learning_rate": 4.682940241437221e-05, "loss": 1.3011, "step": 1575 }, { "epoch": 0.46, "learning_rate": 4.679193531566491e-05, "loss": 1.2923, "step": 1576 }, { "epoch": 0.46, "learning_rate": 4.675446208003954e-05, "loss": 1.3618, "step": 1577 }, { "epoch": 0.46, "learning_rate": 4.6716982741355386e-05, "loss": 1.3239, "step": 1578 }, { "epoch": 0.46, "learning_rate": 4.667949733347726e-05, "loss": 1.2958, "step": 1579 }, { "epoch": 0.46, "learning_rate": 4.664200589027546e-05, "loss": 1.3201, "step": 1580 }, { "epoch": 0.46, "learning_rate": 4.660450844562576e-05, "loss": 1.2923, "step": 1581 }, { "epoch": 0.46, "learning_rate": 4.6567005033409294e-05, "loss": 1.3134, "step": 1582 }, { "epoch": 0.46, "learning_rate": 4.652949568751267e-05, "loss": 1.3365, "step": 1583 }, { "epoch": 0.46, "learning_rate": 4.649198044182779e-05, "loss": 1.3283, "step": 1584 }, { "epoch": 0.47, "learning_rate": 4.645445933025192e-05, "loss": 1.4115, "step": 1585 }, { "epoch": 0.47, "learning_rate": 4.6416932386687624e-05, "loss": 1.3075, "step": 1586 }, { "epoch": 0.47, "learning_rate": 4.637939964504274e-05, "loss": 1.3186, "step": 1587 }, { "epoch": 0.47, "learning_rate": 4.63418611392303e-05, "loss": 1.3817, "step": 1588 }, { "epoch": 0.47, "learning_rate": 4.630431690316861e-05, "loss": 1.3419, "step": 1589 }, { "epoch": 0.47, "learning_rate": 4.6266766970781116e-05, "loss": 1.2899, "step": 1590 }, { "epoch": 0.47, "learning_rate": 4.622921137599643e-05, "loss": 1.2792, "step": 1591 }, { "epoch": 0.47, "learning_rate": 4.619165015274824e-05, "loss": 1.3751, "step": 1592 }, { "epoch": 0.47, "learning_rate": 4.615408333497538e-05, "loss": 1.2905, "step": 1593 }, { "epoch": 0.47, "learning_rate": 4.611651095662168e-05, "loss": 1.3497, "step": 1594 }, { "epoch": 0.47, "learning_rate": 4.607893305163604e-05, "loss": 1.3325, "step": 1595 }, { "epoch": 0.47, "learning_rate": 4.6041349653972336e-05, "loss": 1.3999, "step": 1596 }, { "epoch": 0.47, "learning_rate": 4.6003760797589404e-05, "loss": 1.2777, "step": 1597 }, { "epoch": 0.47, "learning_rate": 4.5966166516450985e-05, "loss": 1.3144, "step": 1598 }, { "epoch": 0.47, "learning_rate": 4.59285668445258e-05, "loss": 1.3161, "step": 1599 }, { "epoch": 0.47, "learning_rate": 4.589096181578734e-05, "loss": 1.3084, "step": 1600 }, { "epoch": 0.47, "eval_loss": 0.12573568522930145, "eval_runtime": 24.516, "eval_samples_per_second": 81.579, "eval_steps_per_second": 0.653, "step": 1600 }, { "epoch": 0.47, "learning_rate": 4.585335146421404e-05, "loss": 1.3642, "step": 1601 }, { "epoch": 0.47, "learning_rate": 4.581573582378906e-05, "loss": 1.3107, "step": 1602 }, { "epoch": 0.47, "learning_rate": 4.577811492850039e-05, "loss": 1.3872, "step": 1603 }, { "epoch": 0.47, "learning_rate": 4.574048881234073e-05, "loss": 1.3334, "step": 1604 }, { "epoch": 0.47, "learning_rate": 4.5702857509307536e-05, "loss": 1.2477, "step": 1605 }, { "epoch": 0.47, "learning_rate": 4.5665221053402915e-05, "loss": 1.3203, "step": 1606 }, { "epoch": 0.47, "learning_rate": 4.562757947863365e-05, "loss": 1.3816, "step": 1607 }, { "epoch": 0.47, "learning_rate": 4.558993281901116e-05, "loss": 1.2813, "step": 1608 }, { "epoch": 0.47, "learning_rate": 4.5552281108551435e-05, "loss": 1.2786, "step": 1609 }, { "epoch": 0.47, "learning_rate": 4.551462438127503e-05, "loss": 1.3509, "step": 1610 }, { "epoch": 0.47, "learning_rate": 4.547696267120705e-05, "loss": 1.3546, "step": 1611 }, { "epoch": 0.47, "learning_rate": 4.543929601237709e-05, "loss": 1.316, "step": 1612 }, { "epoch": 0.47, "learning_rate": 4.540162443881922e-05, "loss": 1.3354, "step": 1613 }, { "epoch": 0.47, "learning_rate": 4.536394798457195e-05, "loss": 1.2978, "step": 1614 }, { "epoch": 0.47, "learning_rate": 4.5326266683678226e-05, "loss": 1.319, "step": 1615 }, { "epoch": 0.47, "learning_rate": 4.5288580570185294e-05, "loss": 1.318, "step": 1616 }, { "epoch": 0.47, "learning_rate": 4.525088967814485e-05, "loss": 1.3458, "step": 1617 }, { "epoch": 0.47, "learning_rate": 4.5213194041612824e-05, "loss": 1.3451, "step": 1618 }, { "epoch": 0.47, "learning_rate": 4.517549369464949e-05, "loss": 1.2594, "step": 1619 }, { "epoch": 0.48, "learning_rate": 4.513778867131936e-05, "loss": 1.3143, "step": 1620 }, { "epoch": 0.48, "learning_rate": 4.510007900569116e-05, "loss": 1.32, "step": 1621 }, { "epoch": 0.48, "learning_rate": 4.506236473183782e-05, "loss": 1.3629, "step": 1622 }, { "epoch": 0.48, "learning_rate": 4.5024645883836426e-05, "loss": 1.3109, "step": 1623 }, { "epoch": 0.48, "learning_rate": 4.498692249576821e-05, "loss": 1.3158, "step": 1624 }, { "epoch": 0.48, "learning_rate": 4.494919460171851e-05, "loss": 1.276, "step": 1625 }, { "epoch": 0.48, "learning_rate": 4.4911462235776704e-05, "loss": 1.3392, "step": 1626 }, { "epoch": 0.48, "learning_rate": 4.4873725432036255e-05, "loss": 1.288, "step": 1627 }, { "epoch": 0.48, "learning_rate": 4.4835984224594586e-05, "loss": 1.2946, "step": 1628 }, { "epoch": 0.48, "learning_rate": 4.479823864755316e-05, "loss": 1.2925, "step": 1629 }, { "epoch": 0.48, "learning_rate": 4.476048873501733e-05, "loss": 1.3792, "step": 1630 }, { "epoch": 0.48, "learning_rate": 4.472273452109639e-05, "loss": 1.2779, "step": 1631 }, { "epoch": 0.48, "learning_rate": 4.468497603990352e-05, "loss": 1.3084, "step": 1632 }, { "epoch": 0.48, "learning_rate": 4.464721332555577e-05, "loss": 1.374, "step": 1633 }, { "epoch": 0.48, "learning_rate": 4.4609446412174e-05, "loss": 1.2989, "step": 1634 }, { "epoch": 0.48, "learning_rate": 4.457167533388286e-05, "loss": 1.3216, "step": 1635 }, { "epoch": 0.48, "learning_rate": 4.4533900124810756e-05, "loss": 1.2977, "step": 1636 }, { "epoch": 0.48, "learning_rate": 4.449612081908987e-05, "loss": 1.339, "step": 1637 }, { "epoch": 0.48, "learning_rate": 4.445833745085602e-05, "loss": 1.2815, "step": 1638 }, { "epoch": 0.48, "learning_rate": 4.4420550054248756e-05, "loss": 1.3592, "step": 1639 }, { "epoch": 0.48, "learning_rate": 4.438275866341123e-05, "loss": 1.3131, "step": 1640 }, { "epoch": 0.48, "learning_rate": 4.4344963312490216e-05, "loss": 1.3288, "step": 1641 }, { "epoch": 0.48, "learning_rate": 4.4307164035636075e-05, "loss": 1.3354, "step": 1642 }, { "epoch": 0.48, "learning_rate": 4.4269360867002675e-05, "loss": 1.3171, "step": 1643 }, { "epoch": 0.48, "learning_rate": 4.423155384074747e-05, "loss": 1.3129, "step": 1644 }, { "epoch": 0.48, "learning_rate": 4.4193742991031325e-05, "loss": 1.3287, "step": 1645 }, { "epoch": 0.48, "learning_rate": 4.415592835201862e-05, "loss": 1.3433, "step": 1646 }, { "epoch": 0.48, "learning_rate": 4.4118109957877114e-05, "loss": 1.2809, "step": 1647 }, { "epoch": 0.48, "learning_rate": 4.408028784277799e-05, "loss": 1.3841, "step": 1648 }, { "epoch": 0.48, "learning_rate": 4.404246204089578e-05, "loss": 1.3282, "step": 1649 }, { "epoch": 0.48, "learning_rate": 4.4004632586408366e-05, "loss": 1.2509, "step": 1650 }, { "epoch": 0.48, "learning_rate": 4.396679951349687e-05, "loss": 1.2912, "step": 1651 }, { "epoch": 0.48, "learning_rate": 4.392896285634577e-05, "loss": 1.3519, "step": 1652 }, { "epoch": 0.48, "learning_rate": 4.389112264914273e-05, "loss": 1.3311, "step": 1653 }, { "epoch": 0.49, "learning_rate": 4.385327892607861e-05, "loss": 1.3475, "step": 1654 }, { "epoch": 0.49, "learning_rate": 4.381543172134748e-05, "loss": 1.3375, "step": 1655 }, { "epoch": 0.49, "learning_rate": 4.377758106914655e-05, "loss": 1.351, "step": 1656 }, { "epoch": 0.49, "learning_rate": 4.373972700367613e-05, "loss": 1.2965, "step": 1657 }, { "epoch": 0.49, "learning_rate": 4.370186955913962e-05, "loss": 1.2933, "step": 1658 }, { "epoch": 0.49, "learning_rate": 4.3664008769743484e-05, "loss": 1.2752, "step": 1659 }, { "epoch": 0.49, "learning_rate": 4.362614466969721e-05, "loss": 1.3498, "step": 1660 }, { "epoch": 0.49, "learning_rate": 4.3588277293213236e-05, "loss": 1.3237, "step": 1661 }, { "epoch": 0.49, "learning_rate": 4.3550406674507026e-05, "loss": 1.3121, "step": 1662 }, { "epoch": 0.49, "learning_rate": 4.351253284779692e-05, "loss": 1.3118, "step": 1663 }, { "epoch": 0.49, "learning_rate": 4.3474655847304177e-05, "loss": 1.2915, "step": 1664 }, { "epoch": 0.49, "learning_rate": 4.343677570725293e-05, "loss": 1.3193, "step": 1665 }, { "epoch": 0.49, "learning_rate": 4.3398892461870146e-05, "loss": 1.3559, "step": 1666 }, { "epoch": 0.49, "learning_rate": 4.336100614538557e-05, "loss": 1.3361, "step": 1667 }, { "epoch": 0.49, "learning_rate": 4.332311679203177e-05, "loss": 1.3126, "step": 1668 }, { "epoch": 0.49, "learning_rate": 4.3285224436044026e-05, "loss": 1.2813, "step": 1669 }, { "epoch": 0.49, "learning_rate": 4.324732911166034e-05, "loss": 1.3133, "step": 1670 }, { "epoch": 0.49, "learning_rate": 4.3209430853121395e-05, "loss": 1.3254, "step": 1671 }, { "epoch": 0.49, "learning_rate": 4.317152969467053e-05, "loss": 1.3145, "step": 1672 }, { "epoch": 0.49, "learning_rate": 4.313362567055367e-05, "loss": 1.3175, "step": 1673 }, { "epoch": 0.49, "learning_rate": 4.3095718815019395e-05, "loss": 1.3076, "step": 1674 }, { "epoch": 0.49, "learning_rate": 4.305780916231878e-05, "loss": 1.3127, "step": 1675 }, { "epoch": 0.49, "learning_rate": 4.301989674670547e-05, "loss": 1.2822, "step": 1676 }, { "epoch": 0.49, "learning_rate": 4.298198160243558e-05, "loss": 1.3067, "step": 1677 }, { "epoch": 0.49, "learning_rate": 4.294406376376771e-05, "loss": 1.3409, "step": 1678 }, { "epoch": 0.49, "learning_rate": 4.290614326496289e-05, "loss": 1.3005, "step": 1679 }, { "epoch": 0.49, "learning_rate": 4.286822014028453e-05, "loss": 1.317, "step": 1680 }, { "epoch": 0.49, "learning_rate": 4.283029442399844e-05, "loss": 1.2928, "step": 1681 }, { "epoch": 0.49, "learning_rate": 4.2792366150372784e-05, "loss": 1.2939, "step": 1682 }, { "epoch": 0.49, "learning_rate": 4.2754435353677985e-05, "loss": 1.3105, "step": 1683 }, { "epoch": 0.49, "learning_rate": 4.27165020681868e-05, "loss": 1.3318, "step": 1684 }, { "epoch": 0.49, "learning_rate": 4.2678566328174196e-05, "loss": 1.3283, "step": 1685 }, { "epoch": 0.49, "learning_rate": 4.2640628167917387e-05, "loss": 1.3281, "step": 1686 }, { "epoch": 0.49, "learning_rate": 4.260268762169575e-05, "loss": 1.3179, "step": 1687 }, { "epoch": 0.5, "learning_rate": 4.2564744723790835e-05, "loss": 1.3296, "step": 1688 }, { "epoch": 0.5, "learning_rate": 4.25267995084863e-05, "loss": 1.2747, "step": 1689 }, { "epoch": 0.5, "learning_rate": 4.248885201006792e-05, "loss": 1.352, "step": 1690 }, { "epoch": 0.5, "learning_rate": 4.245090226282352e-05, "loss": 1.256, "step": 1691 }, { "epoch": 0.5, "learning_rate": 4.241295030104295e-05, "loss": 1.2635, "step": 1692 }, { "epoch": 0.5, "learning_rate": 4.237499615901805e-05, "loss": 1.3027, "step": 1693 }, { "epoch": 0.5, "learning_rate": 4.2337039871042684e-05, "loss": 1.3289, "step": 1694 }, { "epoch": 0.5, "learning_rate": 4.2299081471412586e-05, "loss": 1.306, "step": 1695 }, { "epoch": 0.5, "learning_rate": 4.2261120994425464e-05, "loss": 1.2563, "step": 1696 }, { "epoch": 0.5, "learning_rate": 4.2223158474380834e-05, "loss": 1.303, "step": 1697 }, { "epoch": 0.5, "learning_rate": 4.218519394558013e-05, "loss": 1.2909, "step": 1698 }, { "epoch": 0.5, "learning_rate": 4.214722744232654e-05, "loss": 1.2942, "step": 1699 }, { "epoch": 0.5, "learning_rate": 4.210925899892507e-05, "loss": 1.2985, "step": 1700 }, { "epoch": 0.5, "eval_loss": 0.1250637024641037, "eval_runtime": 24.3845, "eval_samples_per_second": 82.019, "eval_steps_per_second": 0.656, "step": 1700 }, { "epoch": 0.5, "learning_rate": 4.2071288649682455e-05, "loss": 1.3565, "step": 1701 }, { "epoch": 0.5, "learning_rate": 4.20333164289072e-05, "loss": 1.2961, "step": 1702 }, { "epoch": 0.5, "learning_rate": 4.199534237090943e-05, "loss": 1.3133, "step": 1703 }, { "epoch": 0.5, "learning_rate": 4.1957366510001e-05, "loss": 1.3518, "step": 1704 }, { "epoch": 0.5, "learning_rate": 4.191938888049534e-05, "loss": 1.3327, "step": 1705 }, { "epoch": 0.5, "learning_rate": 4.188140951670751e-05, "loss": 1.3062, "step": 1706 }, { "epoch": 0.5, "learning_rate": 4.184342845295414e-05, "loss": 1.3093, "step": 1707 }, { "epoch": 0.5, "learning_rate": 4.1805445723553346e-05, "loss": 1.3004, "step": 1708 }, { "epoch": 0.5, "learning_rate": 4.1767461362824824e-05, "loss": 1.3283, "step": 1709 }, { "epoch": 0.5, "learning_rate": 4.172947540508969e-05, "loss": 1.2971, "step": 1710 }, { "epoch": 0.5, "learning_rate": 4.169148788467051e-05, "loss": 1.2979, "step": 1711 }, { "epoch": 0.5, "learning_rate": 4.1653498835891304e-05, "loss": 1.3378, "step": 1712 }, { "epoch": 0.5, "learning_rate": 4.1615508293077394e-05, "loss": 1.2779, "step": 1713 }, { "epoch": 0.5, "learning_rate": 4.157751629055553e-05, "loss": 1.3048, "step": 1714 }, { "epoch": 0.5, "learning_rate": 4.153952286265375e-05, "loss": 1.3023, "step": 1715 }, { "epoch": 0.5, "learning_rate": 4.150152804370136e-05, "loss": 1.364, "step": 1716 }, { "epoch": 0.5, "learning_rate": 4.146353186802895e-05, "loss": 1.3194, "step": 1717 }, { "epoch": 0.5, "learning_rate": 4.142553436996834e-05, "loss": 1.2827, "step": 1718 }, { "epoch": 0.5, "learning_rate": 4.1387535583852525e-05, "loss": 1.346, "step": 1719 }, { "epoch": 0.5, "learning_rate": 4.1349535544015655e-05, "loss": 1.2819, "step": 1720 }, { "epoch": 0.5, "learning_rate": 4.131153428479306e-05, "loss": 1.2768, "step": 1721 }, { "epoch": 0.51, "learning_rate": 4.127353184052111e-05, "loss": 1.279, "step": 1722 }, { "epoch": 0.51, "learning_rate": 4.12355282455373e-05, "loss": 1.3018, "step": 1723 }, { "epoch": 0.51, "learning_rate": 4.119752353418012e-05, "loss": 1.3279, "step": 1724 }, { "epoch": 0.51, "learning_rate": 4.1159517740789114e-05, "loss": 1.2705, "step": 1725 }, { "epoch": 0.51, "learning_rate": 4.1121510899704766e-05, "loss": 1.3457, "step": 1726 }, { "epoch": 0.51, "learning_rate": 4.1083503045268526e-05, "loss": 1.3252, "step": 1727 }, { "epoch": 0.51, "learning_rate": 4.1045494211822756e-05, "loss": 1.3029, "step": 1728 }, { "epoch": 0.51, "learning_rate": 4.1007484433710684e-05, "loss": 1.241, "step": 1729 }, { "epoch": 0.51, "learning_rate": 4.0969473745276434e-05, "loss": 1.3415, "step": 1730 }, { "epoch": 0.51, "learning_rate": 4.093146218086491e-05, "loss": 1.3393, "step": 1731 }, { "epoch": 0.51, "learning_rate": 4.0893449774821814e-05, "loss": 1.3265, "step": 1732 }, { "epoch": 0.51, "learning_rate": 4.085543656149365e-05, "loss": 1.3064, "step": 1733 }, { "epoch": 0.51, "learning_rate": 4.081742257522759e-05, "loss": 1.3113, "step": 1734 }, { "epoch": 0.51, "learning_rate": 4.077940785037156e-05, "loss": 1.3173, "step": 1735 }, { "epoch": 0.51, "learning_rate": 4.074139242127411e-05, "loss": 1.3347, "step": 1736 }, { "epoch": 0.51, "learning_rate": 4.070337632228445e-05, "loss": 1.2996, "step": 1737 }, { "epoch": 0.51, "learning_rate": 4.0665359587752394e-05, "loss": 1.2962, "step": 1738 }, { "epoch": 0.51, "learning_rate": 4.0627342252028316e-05, "loss": 1.2962, "step": 1739 }, { "epoch": 0.51, "learning_rate": 4.058932434946314e-05, "loss": 1.3119, "step": 1740 }, { "epoch": 0.51, "learning_rate": 4.055130591440834e-05, "loss": 1.3299, "step": 1741 }, { "epoch": 0.51, "learning_rate": 4.051328698121579e-05, "loss": 1.3256, "step": 1742 }, { "epoch": 0.51, "learning_rate": 4.04752675842379e-05, "loss": 1.2831, "step": 1743 }, { "epoch": 0.51, "learning_rate": 4.0437247757827417e-05, "loss": 1.2794, "step": 1744 }, { "epoch": 0.51, "learning_rate": 4.039922753633757e-05, "loss": 1.2921, "step": 1745 }, { "epoch": 0.51, "learning_rate": 4.036120695412184e-05, "loss": 1.3653, "step": 1746 }, { "epoch": 0.51, "learning_rate": 4.032318604553414e-05, "loss": 1.3592, "step": 1747 }, { "epoch": 0.51, "learning_rate": 4.028516484492857e-05, "loss": 1.2766, "step": 1748 }, { "epoch": 0.51, "learning_rate": 4.02471433866596e-05, "loss": 1.348, "step": 1749 }, { "epoch": 0.51, "learning_rate": 4.0209121705081845e-05, "loss": 1.2858, "step": 1750 }, { "epoch": 0.51, "learning_rate": 4.0171099834550185e-05, "loss": 1.2677, "step": 1751 }, { "epoch": 0.51, "learning_rate": 4.013307780941963e-05, "loss": 1.2854, "step": 1752 }, { "epoch": 0.51, "learning_rate": 4.009505566404535e-05, "loss": 1.2893, "step": 1753 }, { "epoch": 0.51, "learning_rate": 4.005703343278262e-05, "loss": 1.3607, "step": 1754 }, { "epoch": 0.51, "learning_rate": 4.0019011149986774e-05, "loss": 1.2593, "step": 1755 }, { "epoch": 0.52, "learning_rate": 3.998098885001323e-05, "loss": 1.3124, "step": 1756 }, { "epoch": 0.52, "learning_rate": 3.9942966567217396e-05, "loss": 1.3126, "step": 1757 }, { "epoch": 0.52, "learning_rate": 3.990494433595466e-05, "loss": 1.2887, "step": 1758 }, { "epoch": 0.52, "learning_rate": 3.9866922190580376e-05, "loss": 1.3259, "step": 1759 }, { "epoch": 0.52, "learning_rate": 3.982890016544982e-05, "loss": 1.315, "step": 1760 }, { "epoch": 0.52, "learning_rate": 3.979087829491816e-05, "loss": 1.3628, "step": 1761 }, { "epoch": 0.52, "learning_rate": 3.975285661334041e-05, "loss": 1.2928, "step": 1762 }, { "epoch": 0.52, "learning_rate": 3.9714835155071435e-05, "loss": 1.3065, "step": 1763 }, { "epoch": 0.52, "learning_rate": 3.967681395446588e-05, "loss": 1.3409, "step": 1764 }, { "epoch": 0.52, "learning_rate": 3.963879304587816e-05, "loss": 1.3625, "step": 1765 }, { "epoch": 0.52, "learning_rate": 3.960077246366244e-05, "loss": 1.2532, "step": 1766 }, { "epoch": 0.52, "learning_rate": 3.956275224217258e-05, "loss": 1.2926, "step": 1767 }, { "epoch": 0.52, "learning_rate": 3.952473241576211e-05, "loss": 1.2772, "step": 1768 }, { "epoch": 0.52, "learning_rate": 3.9486713018784214e-05, "loss": 1.3211, "step": 1769 }, { "epoch": 0.52, "learning_rate": 3.9448694085591674e-05, "loss": 1.3049, "step": 1770 }, { "epoch": 0.52, "learning_rate": 3.941067565053685e-05, "loss": 1.2892, "step": 1771 }, { "epoch": 0.52, "learning_rate": 3.937265774797169e-05, "loss": 1.3263, "step": 1772 }, { "epoch": 0.52, "learning_rate": 3.933464041224761e-05, "loss": 1.2876, "step": 1773 }, { "epoch": 0.52, "learning_rate": 3.929662367771556e-05, "loss": 1.2873, "step": 1774 }, { "epoch": 0.52, "learning_rate": 3.925860757872591e-05, "loss": 1.2619, "step": 1775 }, { "epoch": 0.52, "learning_rate": 3.922059214962846e-05, "loss": 1.3191, "step": 1776 }, { "epoch": 0.52, "learning_rate": 3.918257742477243e-05, "loss": 1.3316, "step": 1777 }, { "epoch": 0.52, "learning_rate": 3.914456343850637e-05, "loss": 1.2509, "step": 1778 }, { "epoch": 0.52, "learning_rate": 3.9106550225178206e-05, "loss": 1.2963, "step": 1779 }, { "epoch": 0.52, "learning_rate": 3.906853781913511e-05, "loss": 1.3364, "step": 1780 }, { "epoch": 0.52, "learning_rate": 3.903052625472359e-05, "loss": 1.3181, "step": 1781 }, { "epoch": 0.52, "learning_rate": 3.899251556628933e-05, "loss": 1.2798, "step": 1782 }, { "epoch": 0.52, "learning_rate": 3.895450578817727e-05, "loss": 1.2428, "step": 1783 }, { "epoch": 0.52, "learning_rate": 3.891649695473149e-05, "loss": 1.3046, "step": 1784 }, { "epoch": 0.52, "learning_rate": 3.887848910029525e-05, "loss": 1.3501, "step": 1785 }, { "epoch": 0.52, "learning_rate": 3.88404822592109e-05, "loss": 1.2689, "step": 1786 }, { "epoch": 0.52, "learning_rate": 3.880247646581989e-05, "loss": 1.3381, "step": 1787 }, { "epoch": 0.52, "learning_rate": 3.8764471754462714e-05, "loss": 1.2986, "step": 1788 }, { "epoch": 0.52, "learning_rate": 3.8726468159478904e-05, "loss": 1.2904, "step": 1789 }, { "epoch": 0.53, "learning_rate": 3.868846571520696e-05, "loss": 1.3375, "step": 1790 }, { "epoch": 0.53, "learning_rate": 3.865046445598435e-05, "loss": 1.3237, "step": 1791 }, { "epoch": 0.53, "learning_rate": 3.861246441614749e-05, "loss": 1.299, "step": 1792 }, { "epoch": 0.53, "learning_rate": 3.857446563003167e-05, "loss": 1.3175, "step": 1793 }, { "epoch": 0.53, "learning_rate": 3.853646813197106e-05, "loss": 1.3175, "step": 1794 }, { "epoch": 0.53, "learning_rate": 3.849847195629865e-05, "loss": 1.2389, "step": 1795 }, { "epoch": 0.53, "learning_rate": 3.846047713734627e-05, "loss": 1.2977, "step": 1796 }, { "epoch": 0.53, "learning_rate": 3.842248370944447e-05, "loss": 1.3138, "step": 1797 }, { "epoch": 0.53, "learning_rate": 3.838449170692262e-05, "loss": 1.2987, "step": 1798 }, { "epoch": 0.53, "learning_rate": 3.8346501164108716e-05, "loss": 1.362, "step": 1799 }, { "epoch": 0.53, "learning_rate": 3.83085121153295e-05, "loss": 1.2754, "step": 1800 }, { "epoch": 0.53, "eval_loss": 0.11963999271392822, "eval_runtime": 24.3979, "eval_samples_per_second": 81.974, "eval_steps_per_second": 0.656, "step": 1800 }, { "epoch": 0.53, "learning_rate": 3.8270524594910315e-05, "loss": 1.3175, "step": 1801 }, { "epoch": 0.53, "learning_rate": 3.823253863717519e-05, "loss": 1.3207, "step": 1802 }, { "epoch": 0.53, "learning_rate": 3.819455427644666e-05, "loss": 1.2735, "step": 1803 }, { "epoch": 0.53, "learning_rate": 3.8156571547045875e-05, "loss": 1.2781, "step": 1804 }, { "epoch": 0.53, "learning_rate": 3.811859048329249e-05, "loss": 1.2988, "step": 1805 }, { "epoch": 0.53, "learning_rate": 3.8080611119504666e-05, "loss": 1.3627, "step": 1806 }, { "epoch": 0.53, "learning_rate": 3.8042633489999e-05, "loss": 1.2819, "step": 1807 }, { "epoch": 0.53, "learning_rate": 3.800465762909057e-05, "loss": 1.3025, "step": 1808 }, { "epoch": 0.53, "learning_rate": 3.7966683571092805e-05, "loss": 1.2675, "step": 1809 }, { "epoch": 0.53, "learning_rate": 3.7928711350317545e-05, "loss": 1.3414, "step": 1810 }, { "epoch": 0.53, "learning_rate": 3.789074100107494e-05, "loss": 1.2875, "step": 1811 }, { "epoch": 0.53, "learning_rate": 3.7852772557673477e-05, "loss": 1.2895, "step": 1812 }, { "epoch": 0.53, "learning_rate": 3.781480605441989e-05, "loss": 1.2693, "step": 1813 }, { "epoch": 0.53, "learning_rate": 3.777684152561917e-05, "loss": 1.3053, "step": 1814 }, { "epoch": 0.53, "learning_rate": 3.7738879005574556e-05, "loss": 1.3055, "step": 1815 }, { "epoch": 0.53, "learning_rate": 3.770091852858743e-05, "loss": 1.2972, "step": 1816 }, { "epoch": 0.53, "learning_rate": 3.7662960128957336e-05, "loss": 1.3423, "step": 1817 }, { "epoch": 0.53, "learning_rate": 3.7625003840981976e-05, "loss": 1.2805, "step": 1818 }, { "epoch": 0.53, "learning_rate": 3.758704969895708e-05, "loss": 1.2707, "step": 1819 }, { "epoch": 0.53, "learning_rate": 3.75490977371765e-05, "loss": 1.2824, "step": 1820 }, { "epoch": 0.53, "learning_rate": 3.751114798993209e-05, "loss": 1.3229, "step": 1821 }, { "epoch": 0.53, "learning_rate": 3.7473200491513716e-05, "loss": 1.2698, "step": 1822 }, { "epoch": 0.53, "learning_rate": 3.743525527620918e-05, "loss": 1.2623, "step": 1823 }, { "epoch": 0.54, "learning_rate": 3.739731237830427e-05, "loss": 1.2989, "step": 1824 }, { "epoch": 0.54, "learning_rate": 3.735937183208263e-05, "loss": 1.3364, "step": 1825 }, { "epoch": 0.54, "learning_rate": 3.732143367182581e-05, "loss": 1.2686, "step": 1826 }, { "epoch": 0.54, "learning_rate": 3.728349793181321e-05, "loss": 1.2572, "step": 1827 }, { "epoch": 0.54, "learning_rate": 3.724556464632203e-05, "loss": 1.2923, "step": 1828 }, { "epoch": 0.54, "learning_rate": 3.720763384962723e-05, "loss": 1.3484, "step": 1829 }, { "epoch": 0.54, "learning_rate": 3.7169705576001565e-05, "loss": 1.3086, "step": 1830 }, { "epoch": 0.54, "learning_rate": 3.7131779859715475e-05, "loss": 1.2899, "step": 1831 }, { "epoch": 0.54, "learning_rate": 3.7093856735037126e-05, "loss": 1.3417, "step": 1832 }, { "epoch": 0.54, "learning_rate": 3.7055936236232296e-05, "loss": 1.3113, "step": 1833 }, { "epoch": 0.54, "learning_rate": 3.701801839756443e-05, "loss": 1.2167, "step": 1834 }, { "epoch": 0.54, "learning_rate": 3.698010325329454e-05, "loss": 1.2844, "step": 1835 }, { "epoch": 0.54, "learning_rate": 3.6942190837681235e-05, "loss": 1.267, "step": 1836 }, { "epoch": 0.54, "learning_rate": 3.690428118498062e-05, "loss": 1.3066, "step": 1837 }, { "epoch": 0.54, "learning_rate": 3.686637432944634e-05, "loss": 1.3051, "step": 1838 }, { "epoch": 0.54, "learning_rate": 3.6828470305329485e-05, "loss": 1.2396, "step": 1839 }, { "epoch": 0.54, "learning_rate": 3.679056914687861e-05, "loss": 1.3507, "step": 1840 }, { "epoch": 0.54, "learning_rate": 3.675267088833966e-05, "loss": 1.2689, "step": 1841 }, { "epoch": 0.54, "learning_rate": 3.6714775563955974e-05, "loss": 1.295, "step": 1842 }, { "epoch": 0.54, "learning_rate": 3.6676883207968226e-05, "loss": 1.2977, "step": 1843 }, { "epoch": 0.54, "learning_rate": 3.6638993854614434e-05, "loss": 1.2781, "step": 1844 }, { "epoch": 0.54, "learning_rate": 3.660110753812986e-05, "loss": 1.2672, "step": 1845 }, { "epoch": 0.54, "learning_rate": 3.656322429274708e-05, "loss": 1.3403, "step": 1846 }, { "epoch": 0.54, "learning_rate": 3.652534415269583e-05, "loss": 1.2922, "step": 1847 }, { "epoch": 0.54, "learning_rate": 3.648746715220309e-05, "loss": 1.3216, "step": 1848 }, { "epoch": 0.54, "learning_rate": 3.644959332549299e-05, "loss": 1.2824, "step": 1849 }, { "epoch": 0.54, "learning_rate": 3.6411722706786784e-05, "loss": 1.2775, "step": 1850 }, { "epoch": 0.54, "learning_rate": 3.637385533030282e-05, "loss": 1.3366, "step": 1851 }, { "epoch": 0.54, "learning_rate": 3.6335991230256536e-05, "loss": 1.2807, "step": 1852 }, { "epoch": 0.54, "learning_rate": 3.6298130440860394e-05, "loss": 1.2819, "step": 1853 }, { "epoch": 0.54, "learning_rate": 3.626027299632389e-05, "loss": 1.3051, "step": 1854 }, { "epoch": 0.54, "learning_rate": 3.6222418930853474e-05, "loss": 1.3106, "step": 1855 }, { "epoch": 0.54, "learning_rate": 3.618456827865254e-05, "loss": 1.2978, "step": 1856 }, { "epoch": 0.54, "learning_rate": 3.614672107392141e-05, "loss": 1.3144, "step": 1857 }, { "epoch": 0.55, "learning_rate": 3.6108877350857296e-05, "loss": 1.2938, "step": 1858 }, { "epoch": 0.55, "learning_rate": 3.6071037143654235e-05, "loss": 1.3015, "step": 1859 }, { "epoch": 0.55, "learning_rate": 3.603320048650314e-05, "loss": 1.257, "step": 1860 }, { "epoch": 0.55, "learning_rate": 3.599536741359165e-05, "loss": 1.2707, "step": 1861 }, { "epoch": 0.55, "learning_rate": 3.5957537959104226e-05, "loss": 1.3495, "step": 1862 }, { "epoch": 0.55, "learning_rate": 3.5919712157222014e-05, "loss": 1.2192, "step": 1863 }, { "epoch": 0.55, "learning_rate": 3.58818900421229e-05, "loss": 1.2999, "step": 1864 }, { "epoch": 0.55, "learning_rate": 3.5844071647981396e-05, "loss": 1.3092, "step": 1865 }, { "epoch": 0.55, "learning_rate": 3.580625700896869e-05, "loss": 1.3539, "step": 1866 }, { "epoch": 0.55, "learning_rate": 3.576844615925255e-05, "loss": 1.2931, "step": 1867 }, { "epoch": 0.55, "learning_rate": 3.573063913299733e-05, "loss": 1.3017, "step": 1868 }, { "epoch": 0.55, "learning_rate": 3.5692835964363945e-05, "loss": 1.285, "step": 1869 }, { "epoch": 0.55, "learning_rate": 3.56550366875098e-05, "loss": 1.2721, "step": 1870 }, { "epoch": 0.55, "learning_rate": 3.561724133658878e-05, "loss": 1.3359, "step": 1871 }, { "epoch": 0.55, "learning_rate": 3.557944994575125e-05, "loss": 1.2648, "step": 1872 }, { "epoch": 0.55, "learning_rate": 3.554166254914399e-05, "loss": 1.3089, "step": 1873 }, { "epoch": 0.55, "learning_rate": 3.5503879180910144e-05, "loss": 1.2983, "step": 1874 }, { "epoch": 0.55, "learning_rate": 3.546609987518925e-05, "loss": 1.2959, "step": 1875 }, { "epoch": 0.55, "learning_rate": 3.542832466611715e-05, "loss": 1.3089, "step": 1876 }, { "epoch": 0.55, "learning_rate": 3.539055358782601e-05, "loss": 1.3284, "step": 1877 }, { "epoch": 0.55, "learning_rate": 3.535278667444423e-05, "loss": 1.2928, "step": 1878 }, { "epoch": 0.55, "learning_rate": 3.531502396009648e-05, "loss": 1.2307, "step": 1879 }, { "epoch": 0.55, "learning_rate": 3.527726547890362e-05, "loss": 1.3063, "step": 1880 }, { "epoch": 0.55, "learning_rate": 3.523951126498268e-05, "loss": 1.3395, "step": 1881 }, { "epoch": 0.55, "learning_rate": 3.5201761352446844e-05, "loss": 1.268, "step": 1882 }, { "epoch": 0.55, "learning_rate": 3.5164015775405414e-05, "loss": 1.2714, "step": 1883 }, { "epoch": 0.55, "learning_rate": 3.512627456796375e-05, "loss": 1.2705, "step": 1884 }, { "epoch": 0.55, "learning_rate": 3.5088537764223295e-05, "loss": 1.3012, "step": 1885 }, { "epoch": 0.55, "learning_rate": 3.505080539828149e-05, "loss": 1.2715, "step": 1886 }, { "epoch": 0.55, "learning_rate": 3.50130775042318e-05, "loss": 1.3258, "step": 1887 }, { "epoch": 0.55, "learning_rate": 3.4975354116163594e-05, "loss": 1.2739, "step": 1888 }, { "epoch": 0.55, "learning_rate": 3.49376352681622e-05, "loss": 1.2802, "step": 1889 }, { "epoch": 0.55, "learning_rate": 3.489992099430886e-05, "loss": 1.3022, "step": 1890 }, { "epoch": 0.55, "learning_rate": 3.4862211328680655e-05, "loss": 1.306, "step": 1891 }, { "epoch": 0.56, "learning_rate": 3.482450630535052e-05, "loss": 1.3272, "step": 1892 }, { "epoch": 0.56, "learning_rate": 3.478680595838719e-05, "loss": 1.3474, "step": 1893 }, { "epoch": 0.56, "learning_rate": 3.4749110321855176e-05, "loss": 1.2565, "step": 1894 }, { "epoch": 0.56, "learning_rate": 3.471141942981472e-05, "loss": 1.2476, "step": 1895 }, { "epoch": 0.56, "learning_rate": 3.4673733316321794e-05, "loss": 1.27, "step": 1896 }, { "epoch": 0.56, "learning_rate": 3.4636052015428055e-05, "loss": 1.3043, "step": 1897 }, { "epoch": 0.56, "learning_rate": 3.4598375561180783e-05, "loss": 1.2715, "step": 1898 }, { "epoch": 0.56, "learning_rate": 3.4560703987622924e-05, "loss": 1.2638, "step": 1899 }, { "epoch": 0.56, "learning_rate": 3.452303732879296e-05, "loss": 1.2906, "step": 1900 }, { "epoch": 0.56, "eval_loss": 0.12069112062454224, "eval_runtime": 25.2088, "eval_samples_per_second": 79.337, "eval_steps_per_second": 0.635, "step": 1900 }, { "epoch": 0.56, "learning_rate": 3.4485375618724986e-05, "loss": 1.2865, "step": 1901 }, { "epoch": 0.56, "learning_rate": 3.444771889144858e-05, "loss": 1.2703, "step": 1902 }, { "epoch": 0.56, "learning_rate": 3.441006718098885e-05, "loss": 1.2536, "step": 1903 }, { "epoch": 0.56, "learning_rate": 3.4372420521366354e-05, "loss": 1.3122, "step": 1904 }, { "epoch": 0.56, "learning_rate": 3.43347789465971e-05, "loss": 1.273, "step": 1905 }, { "epoch": 0.56, "learning_rate": 3.429714249069248e-05, "loss": 1.2504, "step": 1906 }, { "epoch": 0.56, "learning_rate": 3.4259511187659285e-05, "loss": 1.2751, "step": 1907 }, { "epoch": 0.56, "learning_rate": 3.422188507149962e-05, "loss": 1.267, "step": 1908 }, { "epoch": 0.56, "learning_rate": 3.4184264176210946e-05, "loss": 1.2478, "step": 1909 }, { "epoch": 0.56, "learning_rate": 3.414664853578597e-05, "loss": 1.3067, "step": 1910 }, { "epoch": 0.56, "learning_rate": 3.410903818421266e-05, "loss": 1.2967, "step": 1911 }, { "epoch": 0.56, "learning_rate": 3.4071433155474216e-05, "loss": 1.2767, "step": 1912 }, { "epoch": 0.56, "learning_rate": 3.403383348354902e-05, "loss": 1.2698, "step": 1913 }, { "epoch": 0.56, "learning_rate": 3.3996239202410617e-05, "loss": 1.2983, "step": 1914 }, { "epoch": 0.56, "learning_rate": 3.395865034602768e-05, "loss": 1.3113, "step": 1915 }, { "epoch": 0.56, "learning_rate": 3.3921066948363956e-05, "loss": 1.3003, "step": 1916 }, { "epoch": 0.56, "learning_rate": 3.3883489043378324e-05, "loss": 1.2903, "step": 1917 }, { "epoch": 0.56, "learning_rate": 3.3845916665024626e-05, "loss": 1.274, "step": 1918 }, { "epoch": 0.56, "learning_rate": 3.380834984725176e-05, "loss": 1.3062, "step": 1919 }, { "epoch": 0.56, "learning_rate": 3.3770788624003574e-05, "loss": 1.2646, "step": 1920 }, { "epoch": 0.56, "learning_rate": 3.373323302921888e-05, "loss": 1.2929, "step": 1921 }, { "epoch": 0.56, "learning_rate": 3.3695683096831394e-05, "loss": 1.2956, "step": 1922 }, { "epoch": 0.56, "learning_rate": 3.36581388607697e-05, "loss": 1.2732, "step": 1923 }, { "epoch": 0.56, "learning_rate": 3.3620600354957276e-05, "loss": 1.2567, "step": 1924 }, { "epoch": 0.56, "learning_rate": 3.3583067613312396e-05, "loss": 1.2301, "step": 1925 }, { "epoch": 0.57, "learning_rate": 3.354554066974809e-05, "loss": 1.3254, "step": 1926 }, { "epoch": 0.57, "learning_rate": 3.350801955817223e-05, "loss": 1.2792, "step": 1927 }, { "epoch": 0.57, "learning_rate": 3.347050431248735e-05, "loss": 1.2685, "step": 1928 }, { "epoch": 0.57, "learning_rate": 3.3432994966590726e-05, "loss": 1.2609, "step": 1929 }, { "epoch": 0.57, "learning_rate": 3.3395491554374266e-05, "loss": 1.3009, "step": 1930 }, { "epoch": 0.57, "learning_rate": 3.335799410972456e-05, "loss": 1.257, "step": 1931 }, { "epoch": 0.57, "learning_rate": 3.332050266652276e-05, "loss": 1.2878, "step": 1932 }, { "epoch": 0.57, "learning_rate": 3.3283017258644634e-05, "loss": 1.264, "step": 1933 }, { "epoch": 0.57, "learning_rate": 3.324553791996048e-05, "loss": 1.3178, "step": 1934 }, { "epoch": 0.57, "learning_rate": 3.32080646843351e-05, "loss": 1.3082, "step": 1935 }, { "epoch": 0.57, "learning_rate": 3.31705975856278e-05, "loss": 1.2647, "step": 1936 }, { "epoch": 0.57, "learning_rate": 3.313313665769235e-05, "loss": 1.2802, "step": 1937 }, { "epoch": 0.57, "learning_rate": 3.30956819343769e-05, "loss": 1.2614, "step": 1938 }, { "epoch": 0.57, "learning_rate": 3.3058233449524056e-05, "loss": 1.3316, "step": 1939 }, { "epoch": 0.57, "learning_rate": 3.302079123697073e-05, "loss": 1.3094, "step": 1940 }, { "epoch": 0.57, "learning_rate": 3.298335533054821e-05, "loss": 1.2493, "step": 1941 }, { "epoch": 0.57, "learning_rate": 3.294592576408204e-05, "loss": 1.25, "step": 1942 }, { "epoch": 0.57, "learning_rate": 3.290850257139209e-05, "loss": 1.2832, "step": 1943 }, { "epoch": 0.57, "learning_rate": 3.2871085786292435e-05, "loss": 1.2659, "step": 1944 }, { "epoch": 0.57, "learning_rate": 3.2833675442591366e-05, "loss": 1.3033, "step": 1945 }, { "epoch": 0.57, "learning_rate": 3.279627157409136e-05, "loss": 1.2674, "step": 1946 }, { "epoch": 0.57, "learning_rate": 3.275887421458903e-05, "loss": 1.2916, "step": 1947 }, { "epoch": 0.57, "learning_rate": 3.2721483397875135e-05, "loss": 1.2843, "step": 1948 }, { "epoch": 0.57, "learning_rate": 3.268409915773449e-05, "loss": 1.3228, "step": 1949 }, { "epoch": 0.57, "learning_rate": 3.2646721527945984e-05, "loss": 1.2859, "step": 1950 }, { "epoch": 0.57, "learning_rate": 3.260935054228254e-05, "loss": 1.2659, "step": 1951 }, { "epoch": 0.57, "learning_rate": 3.257198623451107e-05, "loss": 1.2488, "step": 1952 }, { "epoch": 0.57, "learning_rate": 3.253462863839243e-05, "loss": 1.3218, "step": 1953 }, { "epoch": 0.57, "learning_rate": 3.2497277787681466e-05, "loss": 1.287, "step": 1954 }, { "epoch": 0.57, "learning_rate": 3.2459933716126866e-05, "loss": 1.2386, "step": 1955 }, { "epoch": 0.57, "learning_rate": 3.2422596457471245e-05, "loss": 1.3374, "step": 1956 }, { "epoch": 0.57, "learning_rate": 3.238526604545103e-05, "loss": 1.2671, "step": 1957 }, { "epoch": 0.57, "learning_rate": 3.2347942513796475e-05, "loss": 1.3083, "step": 1958 }, { "epoch": 0.57, "learning_rate": 3.2310625896231606e-05, "loss": 1.2443, "step": 1959 }, { "epoch": 0.58, "learning_rate": 3.227331622647422e-05, "loss": 1.3063, "step": 1960 }, { "epoch": 0.58, "learning_rate": 3.223601353823582e-05, "loss": 1.2786, "step": 1961 }, { "epoch": 0.58, "learning_rate": 3.219871786522161e-05, "loss": 1.255, "step": 1962 }, { "epoch": 0.58, "learning_rate": 3.216142924113046e-05, "loss": 1.2737, "step": 1963 }, { "epoch": 0.58, "learning_rate": 3.2124147699654834e-05, "loss": 1.3497, "step": 1964 }, { "epoch": 0.58, "learning_rate": 3.208687327448085e-05, "loss": 1.2816, "step": 1965 }, { "epoch": 0.58, "learning_rate": 3.204960599928816e-05, "loss": 1.2894, "step": 1966 }, { "epoch": 0.58, "learning_rate": 3.2012345907749985e-05, "loss": 1.3062, "step": 1967 }, { "epoch": 0.58, "learning_rate": 3.1975093033533003e-05, "loss": 1.3122, "step": 1968 }, { "epoch": 0.58, "learning_rate": 3.193784741029745e-05, "loss": 1.3216, "step": 1969 }, { "epoch": 0.58, "learning_rate": 3.190060907169692e-05, "loss": 1.3057, "step": 1970 }, { "epoch": 0.58, "learning_rate": 3.18633780513785e-05, "loss": 1.2744, "step": 1971 }, { "epoch": 0.58, "learning_rate": 3.1826154382982625e-05, "loss": 1.3115, "step": 1972 }, { "epoch": 0.58, "learning_rate": 3.1788938100143086e-05, "loss": 1.3188, "step": 1973 }, { "epoch": 0.58, "learning_rate": 3.175172923648703e-05, "loss": 1.3241, "step": 1974 }, { "epoch": 0.58, "learning_rate": 3.171452782563486e-05, "loss": 1.2751, "step": 1975 }, { "epoch": 0.58, "learning_rate": 3.167733390120029e-05, "loss": 1.256, "step": 1976 }, { "epoch": 0.58, "learning_rate": 3.164014749679023e-05, "loss": 1.2658, "step": 1977 }, { "epoch": 0.58, "learning_rate": 3.160296864600482e-05, "loss": 1.2596, "step": 1978 }, { "epoch": 0.58, "learning_rate": 3.156579738243735e-05, "loss": 1.2895, "step": 1979 }, { "epoch": 0.58, "learning_rate": 3.152863373967429e-05, "loss": 1.24, "step": 1980 }, { "epoch": 0.58, "learning_rate": 3.149147775129519e-05, "loss": 1.2583, "step": 1981 }, { "epoch": 0.58, "learning_rate": 3.1454329450872725e-05, "loss": 1.2948, "step": 1982 }, { "epoch": 0.58, "learning_rate": 3.1417188871972554e-05, "loss": 1.3102, "step": 1983 }, { "epoch": 0.58, "learning_rate": 3.1380056048153445e-05, "loss": 1.2686, "step": 1984 }, { "epoch": 0.58, "learning_rate": 3.1342931012967084e-05, "loss": 1.32, "step": 1985 }, { "epoch": 0.58, "learning_rate": 3.1305813799958166e-05, "loss": 1.2893, "step": 1986 }, { "epoch": 0.58, "learning_rate": 3.12687044426643e-05, "loss": 1.245, "step": 1987 }, { "epoch": 0.58, "learning_rate": 3.123160297461601e-05, "loss": 1.2896, "step": 1988 }, { "epoch": 0.58, "learning_rate": 3.1194509429336666e-05, "loss": 1.2341, "step": 1989 }, { "epoch": 0.58, "learning_rate": 3.1157423840342516e-05, "loss": 1.2995, "step": 1990 }, { "epoch": 0.58, "learning_rate": 3.112034624114257e-05, "loss": 1.2572, "step": 1991 }, { "epoch": 0.58, "learning_rate": 3.108327666523867e-05, "loss": 1.2802, "step": 1992 }, { "epoch": 0.58, "learning_rate": 3.104621514612538e-05, "loss": 1.273, "step": 1993 }, { "epoch": 0.59, "learning_rate": 3.100916171728999e-05, "loss": 1.2993, "step": 1994 }, { "epoch": 0.59, "learning_rate": 3.097211641221248e-05, "loss": 1.278, "step": 1995 }, { "epoch": 0.59, "learning_rate": 3.0935079264365485e-05, "loss": 1.2976, "step": 1996 }, { "epoch": 0.59, "learning_rate": 3.089805030721428e-05, "loss": 1.2468, "step": 1997 }, { "epoch": 0.59, "learning_rate": 3.086102957421672e-05, "loss": 1.2535, "step": 1998 }, { "epoch": 0.59, "learning_rate": 3.082401709882327e-05, "loss": 1.2311, "step": 1999 }, { "epoch": 0.59, "learning_rate": 3.078701291447689e-05, "loss": 1.2886, "step": 2000 }, { "epoch": 0.59, "eval_loss": 0.11948389559984207, "eval_runtime": 24.5965, "eval_samples_per_second": 81.312, "eval_steps_per_second": 0.65, "step": 2000 }, { "epoch": 0.59, "learning_rate": 3.075001705461305e-05, "loss": 1.335, "step": 2001 }, { "epoch": 0.59, "learning_rate": 3.071302955265973e-05, "loss": 1.2889, "step": 2002 }, { "epoch": 0.59, "learning_rate": 3.0676050442037336e-05, "loss": 1.327, "step": 2003 }, { "epoch": 0.59, "learning_rate": 3.06390797561587e-05, "loss": 1.2386, "step": 2004 }, { "epoch": 0.59, "learning_rate": 3.060211752842903e-05, "loss": 1.3083, "step": 2005 }, { "epoch": 0.59, "learning_rate": 3.0565163792245917e-05, "loss": 1.257, "step": 2006 }, { "epoch": 0.59, "learning_rate": 3.052821858099924e-05, "loss": 1.2566, "step": 2007 }, { "epoch": 0.59, "learning_rate": 3.0491281928071217e-05, "loss": 1.2586, "step": 2008 }, { "epoch": 0.59, "learning_rate": 3.0454353866836302e-05, "loss": 1.312, "step": 2009 }, { "epoch": 0.59, "learning_rate": 3.0417434430661208e-05, "loss": 1.2886, "step": 2010 }, { "epoch": 0.59, "learning_rate": 3.0380523652904835e-05, "loss": 1.2788, "step": 2011 }, { "epoch": 0.59, "learning_rate": 3.034362156691828e-05, "loss": 1.2422, "step": 2012 }, { "epoch": 0.59, "learning_rate": 3.030672820604476e-05, "loss": 1.3027, "step": 2013 }, { "epoch": 0.59, "learning_rate": 3.0269843603619643e-05, "loss": 1.2645, "step": 2014 }, { "epoch": 0.59, "learning_rate": 3.0232967792970344e-05, "loss": 1.2768, "step": 2015 }, { "epoch": 0.59, "learning_rate": 3.019610080741637e-05, "loss": 1.2942, "step": 2016 }, { "epoch": 0.59, "learning_rate": 3.015924268026923e-05, "loss": 1.3036, "step": 2017 }, { "epoch": 0.59, "learning_rate": 3.012239344483244e-05, "loss": 1.2268, "step": 2018 }, { "epoch": 0.59, "learning_rate": 3.0085553134401476e-05, "loss": 1.2829, "step": 2019 }, { "epoch": 0.59, "learning_rate": 3.004872178226376e-05, "loss": 1.2761, "step": 2020 }, { "epoch": 0.59, "learning_rate": 3.0011899421698613e-05, "loss": 1.2684, "step": 2021 }, { "epoch": 0.59, "learning_rate": 2.9975086085977207e-05, "loss": 1.244, "step": 2022 }, { "epoch": 0.59, "learning_rate": 2.993828180836262e-05, "loss": 1.2528, "step": 2023 }, { "epoch": 0.59, "learning_rate": 2.9901486622109677e-05, "loss": 1.2759, "step": 2024 }, { "epoch": 0.59, "learning_rate": 2.9864700560465044e-05, "loss": 1.2332, "step": 2025 }, { "epoch": 0.59, "learning_rate": 2.98279236566671e-05, "loss": 1.2944, "step": 2026 }, { "epoch": 0.59, "learning_rate": 2.9791155943945985e-05, "loss": 1.2872, "step": 2027 }, { "epoch": 0.59, "learning_rate": 2.9754397455523497e-05, "loss": 1.3043, "step": 2028 }, { "epoch": 0.6, "learning_rate": 2.9717648224613147e-05, "loss": 1.2353, "step": 2029 }, { "epoch": 0.6, "learning_rate": 2.9680908284420024e-05, "loss": 1.2279, "step": 2030 }, { "epoch": 0.6, "learning_rate": 2.9644177668140873e-05, "loss": 1.3347, "step": 2031 }, { "epoch": 0.6, "learning_rate": 2.9607456408963987e-05, "loss": 1.262, "step": 2032 }, { "epoch": 0.6, "learning_rate": 2.957074454006921e-05, "loss": 1.2914, "step": 2033 }, { "epoch": 0.6, "learning_rate": 2.95340420946279e-05, "loss": 1.3127, "step": 2034 }, { "epoch": 0.6, "learning_rate": 2.949734910580291e-05, "loss": 1.2757, "step": 2035 }, { "epoch": 0.6, "learning_rate": 2.946066560674853e-05, "loss": 1.2544, "step": 2036 }, { "epoch": 0.6, "learning_rate": 2.9423991630610503e-05, "loss": 1.2869, "step": 2037 }, { "epoch": 0.6, "learning_rate": 2.9387327210525942e-05, "loss": 1.285, "step": 2038 }, { "epoch": 0.6, "learning_rate": 2.9350672379623318e-05, "loss": 1.2906, "step": 2039 }, { "epoch": 0.6, "learning_rate": 2.9314027171022482e-05, "loss": 1.315, "step": 2040 }, { "epoch": 0.6, "learning_rate": 2.9277391617834533e-05, "loss": 1.2116, "step": 2041 }, { "epoch": 0.6, "learning_rate": 2.9240765753161904e-05, "loss": 1.3029, "step": 2042 }, { "epoch": 0.6, "learning_rate": 2.9204149610098223e-05, "loss": 1.2744, "step": 2043 }, { "epoch": 0.6, "learning_rate": 2.9167543221728373e-05, "loss": 1.2328, "step": 2044 }, { "epoch": 0.6, "learning_rate": 2.9130946621128395e-05, "loss": 1.2368, "step": 2045 }, { "epoch": 0.6, "learning_rate": 2.9094359841365496e-05, "loss": 1.2629, "step": 2046 }, { "epoch": 0.6, "learning_rate": 2.9057782915498026e-05, "loss": 1.308, "step": 2047 }, { "epoch": 0.6, "learning_rate": 2.902121587657539e-05, "loss": 1.23, "step": 2048 }, { "epoch": 0.6, "learning_rate": 2.898465875763812e-05, "loss": 1.2787, "step": 2049 }, { "epoch": 0.6, "learning_rate": 2.8948111591717713e-05, "loss": 1.3179, "step": 2050 }, { "epoch": 0.6, "learning_rate": 2.891157441183674e-05, "loss": 1.2561, "step": 2051 }, { "epoch": 0.6, "learning_rate": 2.8875047251008702e-05, "loss": 1.273, "step": 2052 }, { "epoch": 0.6, "learning_rate": 2.8838530142238076e-05, "loss": 1.2504, "step": 2053 }, { "epoch": 0.6, "learning_rate": 2.8802023118520227e-05, "loss": 1.2608, "step": 2054 }, { "epoch": 0.6, "learning_rate": 2.8765526212841445e-05, "loss": 1.2993, "step": 2055 }, { "epoch": 0.6, "learning_rate": 2.8729039458178846e-05, "loss": 1.2809, "step": 2056 }, { "epoch": 0.6, "learning_rate": 2.86925628875004e-05, "loss": 1.2688, "step": 2057 }, { "epoch": 0.6, "learning_rate": 2.865609653376484e-05, "loss": 1.2867, "step": 2058 }, { "epoch": 0.6, "learning_rate": 2.8619640429921705e-05, "loss": 1.2277, "step": 2059 }, { "epoch": 0.6, "learning_rate": 2.8583194608911245e-05, "loss": 1.2669, "step": 2060 }, { "epoch": 0.6, "learning_rate": 2.8546759103664446e-05, "loss": 1.3178, "step": 2061 }, { "epoch": 0.6, "learning_rate": 2.851033394710293e-05, "loss": 1.2871, "step": 2062 }, { "epoch": 0.61, "learning_rate": 2.8473919172139034e-05, "loss": 1.265, "step": 2063 }, { "epoch": 0.61, "learning_rate": 2.8437514811675636e-05, "loss": 1.2737, "step": 2064 }, { "epoch": 0.61, "learning_rate": 2.8401120898606272e-05, "loss": 1.3215, "step": 2065 }, { "epoch": 0.61, "learning_rate": 2.8364737465814993e-05, "loss": 1.2623, "step": 2066 }, { "epoch": 0.61, "learning_rate": 2.832836454617641e-05, "loss": 1.2492, "step": 2067 }, { "epoch": 0.61, "learning_rate": 2.8292002172555616e-05, "loss": 1.2173, "step": 2068 }, { "epoch": 0.61, "learning_rate": 2.8255650377808185e-05, "loss": 1.2751, "step": 2069 }, { "epoch": 0.61, "learning_rate": 2.8219309194780113e-05, "loss": 1.272, "step": 2070 }, { "epoch": 0.61, "learning_rate": 2.8182978656307854e-05, "loss": 1.2682, "step": 2071 }, { "epoch": 0.61, "learning_rate": 2.814665879521818e-05, "loss": 1.2393, "step": 2072 }, { "epoch": 0.61, "learning_rate": 2.8110349644328275e-05, "loss": 1.3219, "step": 2073 }, { "epoch": 0.61, "learning_rate": 2.8074051236445607e-05, "loss": 1.2346, "step": 2074 }, { "epoch": 0.61, "learning_rate": 2.8037763604367962e-05, "loss": 1.2936, "step": 2075 }, { "epoch": 0.61, "learning_rate": 2.800148678088335e-05, "loss": 1.2525, "step": 2076 }, { "epoch": 0.61, "learning_rate": 2.796522079877006e-05, "loss": 1.252, "step": 2077 }, { "epoch": 0.61, "learning_rate": 2.7928965690796562e-05, "loss": 1.2554, "step": 2078 }, { "epoch": 0.61, "learning_rate": 2.7892721489721516e-05, "loss": 1.2498, "step": 2079 }, { "epoch": 0.61, "learning_rate": 2.7856488228293693e-05, "loss": 1.2822, "step": 2080 }, { "epoch": 0.61, "learning_rate": 2.7820265939252036e-05, "loss": 1.2625, "step": 2081 }, { "epoch": 0.61, "learning_rate": 2.7784054655325506e-05, "loss": 1.2813, "step": 2082 }, { "epoch": 0.61, "learning_rate": 2.7747854409233187e-05, "loss": 1.2518, "step": 2083 }, { "epoch": 0.61, "learning_rate": 2.7711665233684127e-05, "loss": 1.3049, "step": 2084 }, { "epoch": 0.61, "learning_rate": 2.767548716137743e-05, "loss": 1.2807, "step": 2085 }, { "epoch": 0.61, "learning_rate": 2.7639320225002108e-05, "loss": 1.2758, "step": 2086 }, { "epoch": 0.61, "learning_rate": 2.7603164457237172e-05, "loss": 1.2759, "step": 2087 }, { "epoch": 0.61, "learning_rate": 2.7567019890751493e-05, "loss": 1.3017, "step": 2088 }, { "epoch": 0.61, "learning_rate": 2.753088655820385e-05, "loss": 1.2906, "step": 2089 }, { "epoch": 0.61, "learning_rate": 2.749476449224285e-05, "loss": 1.2719, "step": 2090 }, { "epoch": 0.61, "learning_rate": 2.745865372550695e-05, "loss": 1.2734, "step": 2091 }, { "epoch": 0.61, "learning_rate": 2.7422554290624348e-05, "loss": 1.2289, "step": 2092 }, { "epoch": 0.61, "learning_rate": 2.7386466220213065e-05, "loss": 1.3234, "step": 2093 }, { "epoch": 0.61, "learning_rate": 2.7350389546880803e-05, "loss": 1.2704, "step": 2094 }, { "epoch": 0.61, "learning_rate": 2.7314324303224994e-05, "loss": 1.2987, "step": 2095 }, { "epoch": 0.61, "learning_rate": 2.727827052183272e-05, "loss": 1.2206, "step": 2096 }, { "epoch": 0.62, "learning_rate": 2.7242228235280746e-05, "loss": 1.2969, "step": 2097 }, { "epoch": 0.62, "learning_rate": 2.7206197476135403e-05, "loss": 1.2652, "step": 2098 }, { "epoch": 0.62, "learning_rate": 2.7170178276952627e-05, "loss": 1.2619, "step": 2099 }, { "epoch": 0.62, "learning_rate": 2.7134170670277923e-05, "loss": 1.2497, "step": 2100 }, { "epoch": 0.62, "eval_loss": 0.11877856403589249, "eval_runtime": 24.594, "eval_samples_per_second": 81.321, "eval_steps_per_second": 0.651, "step": 2100 }, { "epoch": 0.62, "learning_rate": 2.7098174688646293e-05, "loss": 1.2859, "step": 2101 }, { "epoch": 0.62, "learning_rate": 2.7062190364582268e-05, "loss": 1.2402, "step": 2102 }, { "epoch": 0.62, "learning_rate": 2.7026217730599814e-05, "loss": 1.2968, "step": 2103 }, { "epoch": 0.62, "learning_rate": 2.6990256819202353e-05, "loss": 1.2992, "step": 2104 }, { "epoch": 0.62, "learning_rate": 2.695430766288271e-05, "loss": 1.2159, "step": 2105 }, { "epoch": 0.62, "learning_rate": 2.691837029412309e-05, "loss": 1.2908, "step": 2106 }, { "epoch": 0.62, "learning_rate": 2.6882444745395044e-05, "loss": 1.297, "step": 2107 }, { "epoch": 0.62, "learning_rate": 2.6846531049159454e-05, "loss": 1.2995, "step": 2108 }, { "epoch": 0.62, "learning_rate": 2.6810629237866462e-05, "loss": 1.2352, "step": 2109 }, { "epoch": 0.62, "learning_rate": 2.677473934395552e-05, "loss": 1.2636, "step": 2110 }, { "epoch": 0.62, "learning_rate": 2.673886139985526e-05, "loss": 1.2827, "step": 2111 }, { "epoch": 0.62, "learning_rate": 2.6702995437983557e-05, "loss": 1.2675, "step": 2112 }, { "epoch": 0.62, "learning_rate": 2.6667141490747454e-05, "loss": 1.2757, "step": 2113 }, { "epoch": 0.62, "learning_rate": 2.6631299590543095e-05, "loss": 1.3055, "step": 2114 }, { "epoch": 0.62, "learning_rate": 2.6595469769755805e-05, "loss": 1.2427, "step": 2115 }, { "epoch": 0.62, "learning_rate": 2.6559652060759928e-05, "loss": 1.258, "step": 2116 }, { "epoch": 0.62, "learning_rate": 2.6523846495918926e-05, "loss": 1.2685, "step": 2117 }, { "epoch": 0.62, "learning_rate": 2.648805310758524e-05, "loss": 1.2814, "step": 2118 }, { "epoch": 0.62, "learning_rate": 2.645227192810034e-05, "loss": 1.2521, "step": 2119 }, { "epoch": 0.62, "learning_rate": 2.6416502989794647e-05, "loss": 1.3092, "step": 2120 }, { "epoch": 0.62, "learning_rate": 2.638074632498754e-05, "loss": 1.2792, "step": 2121 }, { "epoch": 0.62, "learning_rate": 2.634500196598729e-05, "loss": 1.2842, "step": 2122 }, { "epoch": 0.62, "learning_rate": 2.6309269945091025e-05, "loss": 1.266, "step": 2123 }, { "epoch": 0.62, "learning_rate": 2.62735502945848e-05, "loss": 1.277, "step": 2124 }, { "epoch": 0.62, "learning_rate": 2.6237843046743407e-05, "loss": 1.2642, "step": 2125 }, { "epoch": 0.62, "learning_rate": 2.6202148233830505e-05, "loss": 1.2382, "step": 2126 }, { "epoch": 0.62, "learning_rate": 2.6166465888098458e-05, "loss": 1.2786, "step": 2127 }, { "epoch": 0.62, "learning_rate": 2.6130796041788404e-05, "loss": 1.2744, "step": 2128 }, { "epoch": 0.62, "learning_rate": 2.6095138727130166e-05, "loss": 1.2573, "step": 2129 }, { "epoch": 0.62, "learning_rate": 2.6059493976342262e-05, "loss": 1.2936, "step": 2130 }, { "epoch": 0.63, "learning_rate": 2.6023861821631835e-05, "loss": 1.2255, "step": 2131 }, { "epoch": 0.63, "learning_rate": 2.5988242295194666e-05, "loss": 1.2958, "step": 2132 }, { "epoch": 0.63, "learning_rate": 2.5952635429215117e-05, "loss": 1.2912, "step": 2133 }, { "epoch": 0.63, "learning_rate": 2.5917041255866117e-05, "loss": 1.2889, "step": 2134 }, { "epoch": 0.63, "learning_rate": 2.588145980730911e-05, "loss": 1.257, "step": 2135 }, { "epoch": 0.63, "learning_rate": 2.5845891115694067e-05, "loss": 1.3095, "step": 2136 }, { "epoch": 0.63, "learning_rate": 2.5810335213159404e-05, "loss": 1.2672, "step": 2137 }, { "epoch": 0.63, "learning_rate": 2.5774792131832012e-05, "loss": 1.2579, "step": 2138 }, { "epoch": 0.63, "learning_rate": 2.5739261903827177e-05, "loss": 1.2832, "step": 2139 }, { "epoch": 0.63, "learning_rate": 2.5703744561248577e-05, "loss": 1.304, "step": 2140 }, { "epoch": 0.63, "learning_rate": 2.566824013618824e-05, "loss": 1.2841, "step": 2141 }, { "epoch": 0.63, "learning_rate": 2.5632748660726537e-05, "loss": 1.2342, "step": 2142 }, { "epoch": 0.63, "learning_rate": 2.559727016693212e-05, "loss": 1.2839, "step": 2143 }, { "epoch": 0.63, "learning_rate": 2.5561804686861935e-05, "loss": 1.2687, "step": 2144 }, { "epoch": 0.63, "learning_rate": 2.552635225256114e-05, "loss": 1.2393, "step": 2145 }, { "epoch": 0.63, "learning_rate": 2.549091289606313e-05, "loss": 1.2649, "step": 2146 }, { "epoch": 0.63, "learning_rate": 2.5455486649389477e-05, "loss": 1.242, "step": 2147 }, { "epoch": 0.63, "learning_rate": 2.54200735445499e-05, "loss": 1.3392, "step": 2148 }, { "epoch": 0.63, "learning_rate": 2.5384673613542258e-05, "loss": 1.2608, "step": 2149 }, { "epoch": 0.63, "learning_rate": 2.5349286888352488e-05, "loss": 1.3045, "step": 2150 }, { "epoch": 0.63, "learning_rate": 2.5313913400954593e-05, "loss": 1.31, "step": 2151 }, { "epoch": 0.63, "learning_rate": 2.5278553183310646e-05, "loss": 1.2655, "step": 2152 }, { "epoch": 0.63, "learning_rate": 2.5243206267370685e-05, "loss": 1.231, "step": 2153 }, { "epoch": 0.63, "learning_rate": 2.5207872685072783e-05, "loss": 1.2513, "step": 2154 }, { "epoch": 0.63, "learning_rate": 2.5172552468342913e-05, "loss": 1.3289, "step": 2155 }, { "epoch": 0.63, "learning_rate": 2.5137245649095015e-05, "loss": 1.3057, "step": 2156 }, { "epoch": 0.63, "learning_rate": 2.510195225923088e-05, "loss": 1.2272, "step": 2157 }, { "epoch": 0.63, "learning_rate": 2.506667233064021e-05, "loss": 1.2706, "step": 2158 }, { "epoch": 0.63, "learning_rate": 2.5031405895200505e-05, "loss": 1.2998, "step": 2159 }, { "epoch": 0.63, "learning_rate": 2.4996152984777098e-05, "loss": 1.2309, "step": 2160 }, { "epoch": 0.63, "learning_rate": 2.496091363122309e-05, "loss": 1.2631, "step": 2161 }, { "epoch": 0.63, "learning_rate": 2.4925687866379344e-05, "loss": 1.2622, "step": 2162 }, { "epoch": 0.63, "learning_rate": 2.489047572207442e-05, "loss": 1.2395, "step": 2163 }, { "epoch": 0.63, "learning_rate": 2.485527723012461e-05, "loss": 1.2657, "step": 2164 }, { "epoch": 0.64, "learning_rate": 2.4820092422333827e-05, "loss": 1.2902, "step": 2165 }, { "epoch": 0.64, "learning_rate": 2.4784921330493646e-05, "loss": 1.2901, "step": 2166 }, { "epoch": 0.64, "learning_rate": 2.474976398638325e-05, "loss": 1.2893, "step": 2167 }, { "epoch": 0.64, "learning_rate": 2.4714620421769394e-05, "loss": 1.263, "step": 2168 }, { "epoch": 0.64, "learning_rate": 2.4679490668406373e-05, "loss": 1.2496, "step": 2169 }, { "epoch": 0.64, "learning_rate": 2.4644374758036015e-05, "loss": 1.3173, "step": 2170 }, { "epoch": 0.64, "learning_rate": 2.4609272722387636e-05, "loss": 1.2312, "step": 2171 }, { "epoch": 0.64, "learning_rate": 2.4574184593178022e-05, "loss": 1.2481, "step": 2172 }, { "epoch": 0.64, "learning_rate": 2.4539110402111383e-05, "loss": 1.2617, "step": 2173 }, { "epoch": 0.64, "learning_rate": 2.450405018087933e-05, "loss": 1.3095, "step": 2174 }, { "epoch": 0.64, "learning_rate": 2.4469003961160883e-05, "loss": 1.2213, "step": 2175 }, { "epoch": 0.64, "learning_rate": 2.4433971774622368e-05, "loss": 1.2374, "step": 2176 }, { "epoch": 0.64, "learning_rate": 2.4398953652917464e-05, "loss": 1.2518, "step": 2177 }, { "epoch": 0.64, "learning_rate": 2.436394962768712e-05, "loss": 1.3257, "step": 2178 }, { "epoch": 0.64, "learning_rate": 2.4328959730559573e-05, "loss": 1.2227, "step": 2179 }, { "epoch": 0.64, "learning_rate": 2.4293983993150256e-05, "loss": 1.2663, "step": 2180 }, { "epoch": 0.64, "learning_rate": 2.4259022447061858e-05, "loss": 1.2969, "step": 2181 }, { "epoch": 0.64, "learning_rate": 2.4224075123884195e-05, "loss": 1.2596, "step": 2182 }, { "epoch": 0.64, "learning_rate": 2.4189142055194276e-05, "loss": 1.199, "step": 2183 }, { "epoch": 0.64, "learning_rate": 2.4154223272556184e-05, "loss": 1.287, "step": 2184 }, { "epoch": 0.64, "learning_rate": 2.411931880752114e-05, "loss": 1.2886, "step": 2185 }, { "epoch": 0.64, "learning_rate": 2.40844286916274e-05, "loss": 1.2447, "step": 2186 }, { "epoch": 0.64, "learning_rate": 2.404955295640026e-05, "loss": 1.257, "step": 2187 }, { "epoch": 0.64, "learning_rate": 2.4014691633352038e-05, "loss": 1.2159, "step": 2188 }, { "epoch": 0.64, "learning_rate": 2.3979844753981984e-05, "loss": 1.2866, "step": 2189 }, { "epoch": 0.64, "learning_rate": 2.3945012349776358e-05, "loss": 1.2717, "step": 2190 }, { "epoch": 0.64, "learning_rate": 2.3910194452208292e-05, "loss": 1.2025, "step": 2191 }, { "epoch": 0.64, "learning_rate": 2.3875391092737853e-05, "loss": 1.2589, "step": 2192 }, { "epoch": 0.64, "learning_rate": 2.3840602302811923e-05, "loss": 1.2504, "step": 2193 }, { "epoch": 0.64, "learning_rate": 2.3805828113864272e-05, "loss": 1.2905, "step": 2194 }, { "epoch": 0.64, "learning_rate": 2.3771068557315424e-05, "loss": 1.2466, "step": 2195 }, { "epoch": 0.64, "learning_rate": 2.3736323664572734e-05, "loss": 1.232, "step": 2196 }, { "epoch": 0.64, "learning_rate": 2.3701593467030265e-05, "loss": 1.3036, "step": 2197 }, { "epoch": 0.64, "learning_rate": 2.3666877996068824e-05, "loss": 1.2539, "step": 2198 }, { "epoch": 0.65, "learning_rate": 2.3632177283055908e-05, "loss": 1.256, "step": 2199 }, { "epoch": 0.65, "learning_rate": 2.3597491359345675e-05, "loss": 1.2985, "step": 2200 }, { "epoch": 0.65, "eval_loss": 0.11855418980121613, "eval_runtime": 24.6365, "eval_samples_per_second": 81.18, "eval_steps_per_second": 0.649, "step": 2200 }, { "epoch": 0.65, "learning_rate": 2.356282025627893e-05, "loss": 1.256, "step": 2201 }, { "epoch": 0.65, "learning_rate": 2.3528164005183067e-05, "loss": 1.2158, "step": 2202 }, { "epoch": 0.65, "learning_rate": 2.3493522637372087e-05, "loss": 1.2627, "step": 2203 }, { "epoch": 0.65, "learning_rate": 2.3458896184146517e-05, "loss": 1.272, "step": 2204 }, { "epoch": 0.65, "learning_rate": 2.3424284676793428e-05, "loss": 1.3215, "step": 2205 }, { "epoch": 0.65, "learning_rate": 2.338968814658637e-05, "loss": 1.2625, "step": 2206 }, { "epoch": 0.65, "learning_rate": 2.3355106624785392e-05, "loss": 1.2564, "step": 2207 }, { "epoch": 0.65, "learning_rate": 2.332054014263692e-05, "loss": 1.3071, "step": 2208 }, { "epoch": 0.65, "learning_rate": 2.3285988731373852e-05, "loss": 1.2588, "step": 2209 }, { "epoch": 0.65, "learning_rate": 2.3251452422215466e-05, "loss": 1.2609, "step": 2210 }, { "epoch": 0.65, "learning_rate": 2.3216931246367335e-05, "loss": 1.2539, "step": 2211 }, { "epoch": 0.65, "learning_rate": 2.318242523502141e-05, "loss": 1.2492, "step": 2212 }, { "epoch": 0.65, "learning_rate": 2.3147934419355935e-05, "loss": 1.2336, "step": 2213 }, { "epoch": 0.65, "learning_rate": 2.3113458830535432e-05, "loss": 1.28, "step": 2214 }, { "epoch": 0.65, "learning_rate": 2.3078998499710613e-05, "loss": 1.2764, "step": 2215 }, { "epoch": 0.65, "learning_rate": 2.3044553458018454e-05, "loss": 1.2551, "step": 2216 }, { "epoch": 0.65, "learning_rate": 2.301012373658211e-05, "loss": 1.2251, "step": 2217 }, { "epoch": 0.65, "learning_rate": 2.2975709366510887e-05, "loss": 1.2493, "step": 2218 }, { "epoch": 0.65, "learning_rate": 2.2941310378900203e-05, "loss": 1.2662, "step": 2219 }, { "epoch": 0.65, "learning_rate": 2.2906926804831598e-05, "loss": 1.2551, "step": 2220 }, { "epoch": 0.65, "learning_rate": 2.2872558675372674e-05, "loss": 1.2538, "step": 2221 }, { "epoch": 0.65, "learning_rate": 2.2838206021577108e-05, "loss": 1.2619, "step": 2222 }, { "epoch": 0.65, "learning_rate": 2.2803868874484536e-05, "loss": 1.2853, "step": 2223 }, { "epoch": 0.65, "learning_rate": 2.276954726512063e-05, "loss": 1.2659, "step": 2224 }, { "epoch": 0.65, "learning_rate": 2.2735241224497027e-05, "loss": 1.2268, "step": 2225 }, { "epoch": 0.65, "learning_rate": 2.2700950783611252e-05, "loss": 1.2466, "step": 2226 }, { "epoch": 0.65, "learning_rate": 2.2666675973446763e-05, "loss": 1.3015, "step": 2227 }, { "epoch": 0.65, "learning_rate": 2.2632416824972886e-05, "loss": 1.2587, "step": 2228 }, { "epoch": 0.65, "learning_rate": 2.2598173369144815e-05, "loss": 1.2257, "step": 2229 }, { "epoch": 0.65, "learning_rate": 2.256394563690357e-05, "loss": 1.2616, "step": 2230 }, { "epoch": 0.65, "learning_rate": 2.2529733659175906e-05, "loss": 1.2229, "step": 2231 }, { "epoch": 0.65, "learning_rate": 2.2495537466874402e-05, "loss": 1.2487, "step": 2232 }, { "epoch": 0.66, "learning_rate": 2.2461357090897352e-05, "loss": 1.2669, "step": 2233 }, { "epoch": 0.66, "learning_rate": 2.2427192562128786e-05, "loss": 1.2863, "step": 2234 }, { "epoch": 0.66, "learning_rate": 2.239304391143835e-05, "loss": 1.2698, "step": 2235 }, { "epoch": 0.66, "learning_rate": 2.2358911169681405e-05, "loss": 1.259, "step": 2236 }, { "epoch": 0.66, "learning_rate": 2.2324794367698914e-05, "loss": 1.2678, "step": 2237 }, { "epoch": 0.66, "learning_rate": 2.2290693536317444e-05, "loss": 1.277, "step": 2238 }, { "epoch": 0.66, "learning_rate": 2.2256608706349118e-05, "loss": 1.2437, "step": 2239 }, { "epoch": 0.66, "learning_rate": 2.2222539908591608e-05, "loss": 1.244, "step": 2240 }, { "epoch": 0.66, "learning_rate": 2.2188487173828116e-05, "loss": 1.2023, "step": 2241 }, { "epoch": 0.66, "learning_rate": 2.2154450532827315e-05, "loss": 1.3176, "step": 2242 }, { "epoch": 0.66, "learning_rate": 2.212043001634332e-05, "loss": 1.2036, "step": 2243 }, { "epoch": 0.66, "learning_rate": 2.2086425655115703e-05, "loss": 1.2621, "step": 2244 }, { "epoch": 0.66, "learning_rate": 2.205243747986943e-05, "loss": 1.2734, "step": 2245 }, { "epoch": 0.66, "learning_rate": 2.2018465521314857e-05, "loss": 1.2831, "step": 2246 }, { "epoch": 0.66, "learning_rate": 2.1984509810147645e-05, "loss": 1.2566, "step": 2247 }, { "epoch": 0.66, "learning_rate": 2.1950570377048813e-05, "loss": 1.289, "step": 2248 }, { "epoch": 0.66, "learning_rate": 2.1916647252684678e-05, "loss": 1.2823, "step": 2249 }, { "epoch": 0.66, "learning_rate": 2.188274046770677e-05, "loss": 1.2266, "step": 2250 }, { "epoch": 0.66, "learning_rate": 2.18488500527519e-05, "loss": 1.2698, "step": 2251 }, { "epoch": 0.66, "learning_rate": 2.1814976038442087e-05, "loss": 1.2187, "step": 2252 }, { "epoch": 0.66, "learning_rate": 2.178111845538453e-05, "loss": 1.3168, "step": 2253 }, { "epoch": 0.66, "learning_rate": 2.1747277334171545e-05, "loss": 1.2259, "step": 2254 }, { "epoch": 0.66, "learning_rate": 2.1713452705380612e-05, "loss": 1.2816, "step": 2255 }, { "epoch": 0.66, "learning_rate": 2.1679644599574293e-05, "loss": 1.2566, "step": 2256 }, { "epoch": 0.66, "learning_rate": 2.1645853047300248e-05, "loss": 1.3025, "step": 2257 }, { "epoch": 0.66, "learning_rate": 2.1612078079091125e-05, "loss": 1.2821, "step": 2258 }, { "epoch": 0.66, "learning_rate": 2.157831972546463e-05, "loss": 1.2499, "step": 2259 }, { "epoch": 0.66, "learning_rate": 2.1544578016923454e-05, "loss": 1.23, "step": 2260 }, { "epoch": 0.66, "learning_rate": 2.151085298395525e-05, "loss": 1.2619, "step": 2261 }, { "epoch": 0.66, "learning_rate": 2.1477144657032566e-05, "loss": 1.2541, "step": 2262 }, { "epoch": 0.66, "learning_rate": 2.144345306661291e-05, "loss": 1.2347, "step": 2263 }, { "epoch": 0.66, "learning_rate": 2.1409778243138618e-05, "loss": 1.2679, "step": 2264 }, { "epoch": 0.66, "learning_rate": 2.1376120217036904e-05, "loss": 1.2237, "step": 2265 }, { "epoch": 0.66, "learning_rate": 2.1342479018719818e-05, "loss": 1.2391, "step": 2266 }, { "epoch": 0.67, "learning_rate": 2.1308854678584162e-05, "loss": 1.2798, "step": 2267 }, { "epoch": 0.67, "learning_rate": 2.1275247227011536e-05, "loss": 1.2453, "step": 2268 }, { "epoch": 0.67, "learning_rate": 2.124165669436828e-05, "loss": 1.2812, "step": 2269 }, { "epoch": 0.67, "learning_rate": 2.1208083111005458e-05, "loss": 1.2221, "step": 2270 }, { "epoch": 0.67, "learning_rate": 2.1174526507258762e-05, "loss": 1.2548, "step": 2271 }, { "epoch": 0.67, "learning_rate": 2.1140986913448606e-05, "loss": 1.2705, "step": 2272 }, { "epoch": 0.67, "learning_rate": 2.1107464359880022e-05, "loss": 1.2134, "step": 2273 }, { "epoch": 0.67, "learning_rate": 2.1073958876842605e-05, "loss": 1.179, "step": 2274 }, { "epoch": 0.67, "learning_rate": 2.104047049461057e-05, "loss": 1.2881, "step": 2275 }, { "epoch": 0.67, "learning_rate": 2.1006999243442664e-05, "loss": 1.2468, "step": 2276 }, { "epoch": 0.67, "learning_rate": 2.097354515358217e-05, "loss": 1.2831, "step": 2277 }, { "epoch": 0.67, "learning_rate": 2.0940108255256823e-05, "loss": 1.2548, "step": 2278 }, { "epoch": 0.67, "learning_rate": 2.090668857867886e-05, "loss": 1.2709, "step": 2279 }, { "epoch": 0.67, "learning_rate": 2.0873286154044967e-05, "loss": 1.254, "step": 2280 }, { "epoch": 0.67, "learning_rate": 2.0839901011536216e-05, "loss": 1.2239, "step": 2281 }, { "epoch": 0.67, "learning_rate": 2.0806533181318063e-05, "loss": 1.2516, "step": 2282 }, { "epoch": 0.67, "learning_rate": 2.0773182693540325e-05, "loss": 1.2853, "step": 2283 }, { "epoch": 0.67, "learning_rate": 2.0739849578337167e-05, "loss": 1.2284, "step": 2284 }, { "epoch": 0.67, "learning_rate": 2.070653386582706e-05, "loss": 1.228, "step": 2285 }, { "epoch": 0.67, "learning_rate": 2.0673235586112698e-05, "loss": 1.2463, "step": 2286 }, { "epoch": 0.67, "learning_rate": 2.0639954769281082e-05, "loss": 1.2998, "step": 2287 }, { "epoch": 0.67, "learning_rate": 2.0606691445403398e-05, "loss": 1.277, "step": 2288 }, { "epoch": 0.67, "learning_rate": 2.057344564453507e-05, "loss": 1.2121, "step": 2289 }, { "epoch": 0.67, "learning_rate": 2.054021739671562e-05, "loss": 1.2894, "step": 2290 }, { "epoch": 0.67, "learning_rate": 2.0507006731968775e-05, "loss": 1.2377, "step": 2291 }, { "epoch": 0.67, "learning_rate": 2.0473813680302337e-05, "loss": 1.2422, "step": 2292 }, { "epoch": 0.67, "learning_rate": 2.0440638271708226e-05, "loss": 1.2078, "step": 2293 }, { "epoch": 0.67, "learning_rate": 2.0407480536162368e-05, "loss": 1.3223, "step": 2294 }, { "epoch": 0.67, "learning_rate": 2.037434050362477e-05, "loss": 1.2316, "step": 2295 }, { "epoch": 0.67, "learning_rate": 2.0341218204039416e-05, "loss": 1.246, "step": 2296 }, { "epoch": 0.67, "learning_rate": 2.0308113667334303e-05, "loss": 1.2729, "step": 2297 }, { "epoch": 0.67, "learning_rate": 2.0275026923421315e-05, "loss": 1.2566, "step": 2298 }, { "epoch": 0.67, "learning_rate": 2.0241958002196308e-05, "loss": 1.2499, "step": 2299 }, { "epoch": 0.67, "learning_rate": 2.020890693353905e-05, "loss": 1.2566, "step": 2300 }, { "epoch": 0.67, "eval_loss": 0.11811155080795288, "eval_runtime": 24.448, "eval_samples_per_second": 81.806, "eval_steps_per_second": 0.654, "step": 2300 }, { "epoch": 0.68, "learning_rate": 2.0175873747313118e-05, "loss": 1.2615, "step": 2301 }, { "epoch": 0.68, "learning_rate": 2.0142858473365957e-05, "loss": 1.2863, "step": 2302 }, { "epoch": 0.68, "learning_rate": 2.0109861141528853e-05, "loss": 1.2716, "step": 2303 }, { "epoch": 0.68, "learning_rate": 2.0076881781616856e-05, "loss": 1.2606, "step": 2304 }, { "epoch": 0.68, "learning_rate": 2.004392042342879e-05, "loss": 1.2657, "step": 2305 }, { "epoch": 0.68, "learning_rate": 2.0010977096747184e-05, "loss": 1.2354, "step": 2306 }, { "epoch": 0.68, "learning_rate": 1.997805183133831e-05, "loss": 1.2393, "step": 2307 }, { "epoch": 0.68, "learning_rate": 1.9945144656952103e-05, "loss": 1.2591, "step": 2308 }, { "epoch": 0.68, "learning_rate": 1.9912255603322174e-05, "loss": 1.2773, "step": 2309 }, { "epoch": 0.68, "learning_rate": 1.9879384700165706e-05, "loss": 1.211, "step": 2310 }, { "epoch": 0.68, "learning_rate": 1.9846531977183536e-05, "loss": 1.2424, "step": 2311 }, { "epoch": 0.68, "learning_rate": 1.9813697464060056e-05, "loss": 1.2258, "step": 2312 }, { "epoch": 0.68, "learning_rate": 1.9780881190463222e-05, "loss": 1.2652, "step": 2313 }, { "epoch": 0.68, "learning_rate": 1.9748083186044456e-05, "loss": 1.2576, "step": 2314 }, { "epoch": 0.68, "learning_rate": 1.971530348043873e-05, "loss": 1.2408, "step": 2315 }, { "epoch": 0.68, "learning_rate": 1.968254210326445e-05, "loss": 1.2493, "step": 2316 }, { "epoch": 0.68, "learning_rate": 1.9649799084123504e-05, "loss": 1.3321, "step": 2317 }, { "epoch": 0.68, "learning_rate": 1.9617074452601113e-05, "loss": 1.2754, "step": 2318 }, { "epoch": 0.68, "learning_rate": 1.958436823826595e-05, "loss": 1.1769, "step": 2319 }, { "epoch": 0.68, "learning_rate": 1.9551680470670035e-05, "loss": 1.2696, "step": 2320 }, { "epoch": 0.68, "learning_rate": 1.9519011179348717e-05, "loss": 1.2095, "step": 2321 }, { "epoch": 0.68, "learning_rate": 1.9486360393820628e-05, "loss": 1.2401, "step": 2322 }, { "epoch": 0.68, "learning_rate": 1.9453728143587697e-05, "loss": 1.2474, "step": 2323 }, { "epoch": 0.68, "learning_rate": 1.9421114458135138e-05, "loss": 1.2745, "step": 2324 }, { "epoch": 0.68, "learning_rate": 1.9388519366931316e-05, "loss": 1.2282, "step": 2325 }, { "epoch": 0.68, "learning_rate": 1.935594289942786e-05, "loss": 1.2392, "step": 2326 }, { "epoch": 0.68, "learning_rate": 1.9323385085059544e-05, "loss": 1.2246, "step": 2327 }, { "epoch": 0.68, "learning_rate": 1.9290845953244307e-05, "loss": 1.278, "step": 2328 }, { "epoch": 0.68, "learning_rate": 1.9258325533383166e-05, "loss": 1.2167, "step": 2329 }, { "epoch": 0.68, "learning_rate": 1.922582385486027e-05, "loss": 1.242, "step": 2330 }, { "epoch": 0.68, "learning_rate": 1.919334094704282e-05, "loss": 1.2719, "step": 2331 }, { "epoch": 0.68, "learning_rate": 1.9160876839281077e-05, "loss": 1.2855, "step": 2332 }, { "epoch": 0.68, "learning_rate": 1.9128431560908263e-05, "loss": 1.2537, "step": 2333 }, { "epoch": 0.68, "learning_rate": 1.909600514124063e-05, "loss": 1.2326, "step": 2334 }, { "epoch": 0.69, "learning_rate": 1.9063597609577388e-05, "loss": 1.291, "step": 2335 }, { "epoch": 0.69, "learning_rate": 1.9031208995200687e-05, "loss": 1.2172, "step": 2336 }, { "epoch": 0.69, "learning_rate": 1.8998839327375532e-05, "loss": 1.2588, "step": 2337 }, { "epoch": 0.69, "learning_rate": 1.896648863534988e-05, "loss": 1.2368, "step": 2338 }, { "epoch": 0.69, "learning_rate": 1.8934156948354475e-05, "loss": 1.2894, "step": 2339 }, { "epoch": 0.69, "learning_rate": 1.890184429560294e-05, "loss": 1.2531, "step": 2340 }, { "epoch": 0.69, "learning_rate": 1.88695507062917e-05, "loss": 1.2341, "step": 2341 }, { "epoch": 0.69, "learning_rate": 1.8837276209599896e-05, "loss": 1.2534, "step": 2342 }, { "epoch": 0.69, "learning_rate": 1.8805020834689487e-05, "loss": 1.272, "step": 2343 }, { "epoch": 0.69, "learning_rate": 1.8772784610705107e-05, "loss": 1.2104, "step": 2344 }, { "epoch": 0.69, "learning_rate": 1.8740567566774137e-05, "loss": 1.2886, "step": 2345 }, { "epoch": 0.69, "learning_rate": 1.8708369732006555e-05, "loss": 1.2731, "step": 2346 }, { "epoch": 0.69, "learning_rate": 1.867619113549504e-05, "loss": 1.2391, "step": 2347 }, { "epoch": 0.69, "learning_rate": 1.864403180631489e-05, "loss": 1.2516, "step": 2348 }, { "epoch": 0.69, "learning_rate": 1.8611891773523934e-05, "loss": 1.2218, "step": 2349 }, { "epoch": 0.69, "learning_rate": 1.8579771066162626e-05, "loss": 1.2865, "step": 2350 }, { "epoch": 0.69, "learning_rate": 1.8547669713253943e-05, "loss": 1.2438, "step": 2351 }, { "epoch": 0.69, "learning_rate": 1.8515587743803376e-05, "loss": 1.262, "step": 2352 }, { "epoch": 0.69, "learning_rate": 1.8483525186798875e-05, "loss": 1.2495, "step": 2353 }, { "epoch": 0.69, "learning_rate": 1.8451482071210875e-05, "loss": 1.2554, "step": 2354 }, { "epoch": 0.69, "learning_rate": 1.8419458425992243e-05, "loss": 1.2481, "step": 2355 }, { "epoch": 0.69, "learning_rate": 1.8387454280078268e-05, "loss": 1.2407, "step": 2356 }, { "epoch": 0.69, "learning_rate": 1.835546966238657e-05, "loss": 1.2507, "step": 2357 }, { "epoch": 0.69, "learning_rate": 1.832350460181717e-05, "loss": 1.2699, "step": 2358 }, { "epoch": 0.69, "learning_rate": 1.82915591272524e-05, "loss": 1.1869, "step": 2359 }, { "epoch": 0.69, "learning_rate": 1.8259633267556923e-05, "loss": 1.1662, "step": 2360 }, { "epoch": 0.69, "learning_rate": 1.822772705157762e-05, "loss": 1.294, "step": 2361 }, { "epoch": 0.69, "learning_rate": 1.8195840508143673e-05, "loss": 1.2864, "step": 2362 }, { "epoch": 0.69, "learning_rate": 1.8163973666066473e-05, "loss": 1.2462, "step": 2363 }, { "epoch": 0.69, "learning_rate": 1.813212655413962e-05, "loss": 1.2157, "step": 2364 }, { "epoch": 0.69, "learning_rate": 1.8100299201138857e-05, "loss": 1.2405, "step": 2365 }, { "epoch": 0.69, "learning_rate": 1.8068491635822097e-05, "loss": 1.24, "step": 2366 }, { "epoch": 0.69, "learning_rate": 1.803670388692938e-05, "loss": 1.2725, "step": 2367 }, { "epoch": 0.69, "learning_rate": 1.8004935983182835e-05, "loss": 1.2409, "step": 2368 }, { "epoch": 0.7, "learning_rate": 1.7973187953286634e-05, "loss": 1.288, "step": 2369 }, { "epoch": 0.7, "learning_rate": 1.7941459825927023e-05, "loss": 1.2169, "step": 2370 }, { "epoch": 0.7, "learning_rate": 1.7909751629772258e-05, "loss": 1.1776, "step": 2371 }, { "epoch": 0.7, "learning_rate": 1.787806339347259e-05, "loss": 1.2889, "step": 2372 }, { "epoch": 0.7, "learning_rate": 1.784639514566021e-05, "loss": 1.2779, "step": 2373 }, { "epoch": 0.7, "learning_rate": 1.781474691494927e-05, "loss": 1.2556, "step": 2374 }, { "epoch": 0.7, "learning_rate": 1.7783118729935838e-05, "loss": 1.2782, "step": 2375 }, { "epoch": 0.7, "learning_rate": 1.775151061919789e-05, "loss": 1.2685, "step": 2376 }, { "epoch": 0.7, "learning_rate": 1.7719922611295177e-05, "loss": 1.2556, "step": 2377 }, { "epoch": 0.7, "learning_rate": 1.7688354734769378e-05, "loss": 1.2534, "step": 2378 }, { "epoch": 0.7, "learning_rate": 1.765680701814395e-05, "loss": 1.2165, "step": 2379 }, { "epoch": 0.7, "learning_rate": 1.762527948992414e-05, "loss": 1.2971, "step": 2380 }, { "epoch": 0.7, "learning_rate": 1.759377217859693e-05, "loss": 1.1959, "step": 2381 }, { "epoch": 0.7, "learning_rate": 1.756228511263106e-05, "loss": 1.2311, "step": 2382 }, { "epoch": 0.7, "learning_rate": 1.753081832047697e-05, "loss": 1.2669, "step": 2383 }, { "epoch": 0.7, "learning_rate": 1.7499371830566794e-05, "loss": 1.2795, "step": 2384 }, { "epoch": 0.7, "learning_rate": 1.7467945671314284e-05, "loss": 1.2406, "step": 2385 }, { "epoch": 0.7, "learning_rate": 1.743653987111486e-05, "loss": 1.1918, "step": 2386 }, { "epoch": 0.7, "learning_rate": 1.740515445834553e-05, "loss": 1.2611, "step": 2387 }, { "epoch": 0.7, "learning_rate": 1.7373789461364904e-05, "loss": 1.2804, "step": 2388 }, { "epoch": 0.7, "learning_rate": 1.734244490851309e-05, "loss": 1.2379, "step": 2389 }, { "epoch": 0.7, "learning_rate": 1.7311120828111772e-05, "loss": 1.2276, "step": 2390 }, { "epoch": 0.7, "learning_rate": 1.7279817248464132e-05, "loss": 1.226, "step": 2391 }, { "epoch": 0.7, "learning_rate": 1.724853419785482e-05, "loss": 1.2891, "step": 2392 }, { "epoch": 0.7, "learning_rate": 1.7217271704549913e-05, "loss": 1.2269, "step": 2393 }, { "epoch": 0.7, "learning_rate": 1.718602979679695e-05, "loss": 1.2288, "step": 2394 }, { "epoch": 0.7, "learning_rate": 1.715480850282486e-05, "loss": 1.2531, "step": 2395 }, { "epoch": 0.7, "learning_rate": 1.7123607850843947e-05, "loss": 1.2267, "step": 2396 }, { "epoch": 0.7, "learning_rate": 1.7092427869045836e-05, "loss": 1.2184, "step": 2397 }, { "epoch": 0.7, "learning_rate": 1.7061268585603507e-05, "loss": 1.2751, "step": 2398 }, { "epoch": 0.7, "learning_rate": 1.7030130028671235e-05, "loss": 1.3224, "step": 2399 }, { "epoch": 0.7, "learning_rate": 1.6999012226384572e-05, "loss": 1.2363, "step": 2400 }, { "epoch": 0.7, "eval_loss": 0.12007694691419601, "eval_runtime": 24.699, "eval_samples_per_second": 80.975, "eval_steps_per_second": 0.648, "step": 2400 }, { "epoch": 0.7, "learning_rate": 1.6967915206860274e-05, "loss": 1.2254, "step": 2401 }, { "epoch": 0.7, "learning_rate": 1.693683899819637e-05, "loss": 1.2336, "step": 2402 }, { "epoch": 0.71, "learning_rate": 1.6905783628472074e-05, "loss": 1.2907, "step": 2403 }, { "epoch": 0.71, "learning_rate": 1.6874749125747737e-05, "loss": 1.2646, "step": 2404 }, { "epoch": 0.71, "learning_rate": 1.6843735518064895e-05, "loss": 1.221, "step": 2405 }, { "epoch": 0.71, "learning_rate": 1.6812742833446187e-05, "loss": 1.253, "step": 2406 }, { "epoch": 0.71, "learning_rate": 1.678177109989537e-05, "loss": 1.2584, "step": 2407 }, { "epoch": 0.71, "learning_rate": 1.6750820345397217e-05, "loss": 1.2554, "step": 2408 }, { "epoch": 0.71, "learning_rate": 1.6719890597917588e-05, "loss": 1.2153, "step": 2409 }, { "epoch": 0.71, "learning_rate": 1.668898188540336e-05, "loss": 1.2268, "step": 2410 }, { "epoch": 0.71, "learning_rate": 1.6658094235782405e-05, "loss": 1.2531, "step": 2411 }, { "epoch": 0.71, "learning_rate": 1.662722767696353e-05, "loss": 1.2358, "step": 2412 }, { "epoch": 0.71, "learning_rate": 1.659638223683653e-05, "loss": 1.2201, "step": 2413 }, { "epoch": 0.71, "learning_rate": 1.6565557943272077e-05, "loss": 1.3041, "step": 2414 }, { "epoch": 0.71, "learning_rate": 1.6534754824121767e-05, "loss": 1.1871, "step": 2415 }, { "epoch": 0.71, "learning_rate": 1.6503972907218067e-05, "loss": 1.2208, "step": 2416 }, { "epoch": 0.71, "learning_rate": 1.6473212220374245e-05, "loss": 1.2669, "step": 2417 }, { "epoch": 0.71, "learning_rate": 1.644247279138442e-05, "loss": 1.2488, "step": 2418 }, { "epoch": 0.71, "learning_rate": 1.641175464802351e-05, "loss": 1.2402, "step": 2419 }, { "epoch": 0.71, "learning_rate": 1.6381057818047196e-05, "loss": 1.1869, "step": 2420 }, { "epoch": 0.71, "learning_rate": 1.635038232919186e-05, "loss": 1.2312, "step": 2421 }, { "epoch": 0.71, "learning_rate": 1.631972820917465e-05, "loss": 1.2659, "step": 2422 }, { "epoch": 0.71, "learning_rate": 1.628909548569339e-05, "loss": 1.1862, "step": 2423 }, { "epoch": 0.71, "learning_rate": 1.6258484186426585e-05, "loss": 1.2562, "step": 2424 }, { "epoch": 0.71, "learning_rate": 1.6227894339033334e-05, "loss": 1.2712, "step": 2425 }, { "epoch": 0.71, "learning_rate": 1.6197325971153408e-05, "loss": 1.2487, "step": 2426 }, { "epoch": 0.71, "learning_rate": 1.6166779110407155e-05, "loss": 1.2378, "step": 2427 }, { "epoch": 0.71, "learning_rate": 1.6136253784395455e-05, "loss": 1.2381, "step": 2428 }, { "epoch": 0.71, "learning_rate": 1.610575002069978e-05, "loss": 1.2484, "step": 2429 }, { "epoch": 0.71, "learning_rate": 1.607526784688209e-05, "loss": 1.2071, "step": 2430 }, { "epoch": 0.71, "learning_rate": 1.604480729048486e-05, "loss": 1.2194, "step": 2431 }, { "epoch": 0.71, "learning_rate": 1.6014368379030987e-05, "loss": 1.2515, "step": 2432 }, { "epoch": 0.71, "learning_rate": 1.5983951140023864e-05, "loss": 1.2542, "step": 2433 }, { "epoch": 0.71, "learning_rate": 1.5953555600947263e-05, "loss": 1.2154, "step": 2434 }, { "epoch": 0.71, "learning_rate": 1.5923181789265393e-05, "loss": 1.2378, "step": 2435 }, { "epoch": 0.71, "learning_rate": 1.589282973242276e-05, "loss": 1.277, "step": 2436 }, { "epoch": 0.71, "learning_rate": 1.5862499457844283e-05, "loss": 1.3002, "step": 2437 }, { "epoch": 0.72, "learning_rate": 1.583219099293516e-05, "loss": 1.2509, "step": 2438 }, { "epoch": 0.72, "learning_rate": 1.580190436508091e-05, "loss": 1.2876, "step": 2439 }, { "epoch": 0.72, "learning_rate": 1.5771639601647286e-05, "loss": 1.2338, "step": 2440 }, { "epoch": 0.72, "learning_rate": 1.574139672998031e-05, "loss": 1.257, "step": 2441 }, { "epoch": 0.72, "learning_rate": 1.5711175777406217e-05, "loss": 1.2704, "step": 2442 }, { "epoch": 0.72, "learning_rate": 1.568097677123146e-05, "loss": 1.249, "step": 2443 }, { "epoch": 0.72, "learning_rate": 1.5650799738742604e-05, "loss": 1.2386, "step": 2444 }, { "epoch": 0.72, "learning_rate": 1.5620644707206406e-05, "loss": 1.2212, "step": 2445 }, { "epoch": 0.72, "learning_rate": 1.5590511703869745e-05, "loss": 1.2294, "step": 2446 }, { "epoch": 0.72, "learning_rate": 1.556040075595959e-05, "loss": 1.2305, "step": 2447 }, { "epoch": 0.72, "learning_rate": 1.5530311890682946e-05, "loss": 1.2587, "step": 2448 }, { "epoch": 0.72, "learning_rate": 1.5500245135226917e-05, "loss": 1.2768, "step": 2449 }, { "epoch": 0.72, "learning_rate": 1.5470200516758606e-05, "loss": 1.1653, "step": 2450 }, { "epoch": 0.72, "learning_rate": 1.544017806242513e-05, "loss": 1.2504, "step": 2451 }, { "epoch": 0.72, "learning_rate": 1.5410177799353524e-05, "loss": 1.2825, "step": 2452 }, { "epoch": 0.72, "learning_rate": 1.5380199754650838e-05, "loss": 1.2358, "step": 2453 }, { "epoch": 0.72, "learning_rate": 1.5350243955404015e-05, "loss": 1.2393, "step": 2454 }, { "epoch": 0.72, "learning_rate": 1.532031042867992e-05, "loss": 1.1987, "step": 2455 }, { "epoch": 0.72, "learning_rate": 1.529039920152525e-05, "loss": 1.2591, "step": 2456 }, { "epoch": 0.72, "learning_rate": 1.5260510300966593e-05, "loss": 1.2111, "step": 2457 }, { "epoch": 0.72, "learning_rate": 1.5230643754010355e-05, "loss": 1.2552, "step": 2458 }, { "epoch": 0.72, "learning_rate": 1.5200799587642747e-05, "loss": 1.2698, "step": 2459 }, { "epoch": 0.72, "learning_rate": 1.517097782882973e-05, "loss": 1.2331, "step": 2460 }, { "epoch": 0.72, "learning_rate": 1.5141178504517048e-05, "loss": 1.2203, "step": 2461 }, { "epoch": 0.72, "learning_rate": 1.5111401641630168e-05, "loss": 1.207, "step": 2462 }, { "epoch": 0.72, "learning_rate": 1.508164726707427e-05, "loss": 1.2449, "step": 2463 }, { "epoch": 0.72, "learning_rate": 1.5051915407734181e-05, "loss": 1.2555, "step": 2464 }, { "epoch": 0.72, "learning_rate": 1.5022206090474418e-05, "loss": 1.2263, "step": 2465 }, { "epoch": 0.72, "learning_rate": 1.4992519342139118e-05, "loss": 1.234, "step": 2466 }, { "epoch": 0.72, "learning_rate": 1.496285518955204e-05, "loss": 1.2564, "step": 2467 }, { "epoch": 0.72, "learning_rate": 1.4933213659516485e-05, "loss": 1.2294, "step": 2468 }, { "epoch": 0.72, "learning_rate": 1.4903594778815361e-05, "loss": 1.2796, "step": 2469 }, { "epoch": 0.72, "learning_rate": 1.4873998574211075e-05, "loss": 1.246, "step": 2470 }, { "epoch": 0.72, "learning_rate": 1.484442507244559e-05, "loss": 1.2505, "step": 2471 }, { "epoch": 0.73, "learning_rate": 1.4814874300240295e-05, "loss": 1.2326, "step": 2472 }, { "epoch": 0.73, "learning_rate": 1.4785346284296078e-05, "loss": 1.247, "step": 2473 }, { "epoch": 0.73, "learning_rate": 1.4755841051293272e-05, "loss": 1.2431, "step": 2474 }, { "epoch": 0.73, "learning_rate": 1.472635862789162e-05, "loss": 1.2322, "step": 2475 }, { "epoch": 0.73, "learning_rate": 1.4696899040730217e-05, "loss": 1.2192, "step": 2476 }, { "epoch": 0.73, "learning_rate": 1.4667462316427576e-05, "loss": 1.2225, "step": 2477 }, { "epoch": 0.73, "learning_rate": 1.4638048481581537e-05, "loss": 1.2898, "step": 2478 }, { "epoch": 0.73, "learning_rate": 1.4608657562769231e-05, "loss": 1.2722, "step": 2479 }, { "epoch": 0.73, "learning_rate": 1.4579289586547112e-05, "loss": 1.2377, "step": 2480 }, { "epoch": 0.73, "learning_rate": 1.4549944579450897e-05, "loss": 1.2683, "step": 2481 }, { "epoch": 0.73, "learning_rate": 1.452062256799556e-05, "loss": 1.2552, "step": 2482 }, { "epoch": 0.73, "learning_rate": 1.4491323578675265e-05, "loss": 1.1738, "step": 2483 }, { "epoch": 0.73, "learning_rate": 1.44620476379634e-05, "loss": 1.244, "step": 2484 }, { "epoch": 0.73, "learning_rate": 1.4432794772312519e-05, "loss": 1.224, "step": 2485 }, { "epoch": 0.73, "learning_rate": 1.440356500815435e-05, "loss": 1.2493, "step": 2486 }, { "epoch": 0.73, "learning_rate": 1.4374358371899698e-05, "loss": 1.2124, "step": 2487 }, { "epoch": 0.73, "learning_rate": 1.4345174889938514e-05, "loss": 1.231, "step": 2488 }, { "epoch": 0.73, "learning_rate": 1.43160145886398e-05, "loss": 1.2927, "step": 2489 }, { "epoch": 0.73, "learning_rate": 1.4286877494351626e-05, "loss": 1.2223, "step": 2490 }, { "epoch": 0.73, "learning_rate": 1.4257763633401114e-05, "loss": 1.2237, "step": 2491 }, { "epoch": 0.73, "learning_rate": 1.4228673032094342e-05, "loss": 1.2503, "step": 2492 }, { "epoch": 0.73, "learning_rate": 1.4199605716716414e-05, "loss": 1.2374, "step": 2493 }, { "epoch": 0.73, "learning_rate": 1.4170561713531381e-05, "loss": 1.2554, "step": 2494 }, { "epoch": 0.73, "learning_rate": 1.4141541048782244e-05, "loss": 1.2518, "step": 2495 }, { "epoch": 0.73, "learning_rate": 1.4112543748690874e-05, "loss": 1.2048, "step": 2496 }, { "epoch": 0.73, "learning_rate": 1.4083569839458071e-05, "loss": 1.2436, "step": 2497 }, { "epoch": 0.73, "learning_rate": 1.4054619347263487e-05, "loss": 1.2357, "step": 2498 }, { "epoch": 0.73, "learning_rate": 1.4025692298265634e-05, "loss": 1.2359, "step": 2499 }, { "epoch": 0.73, "learning_rate": 1.3996788718601795e-05, "loss": 1.2, "step": 2500 }, { "epoch": 0.73, "eval_loss": 0.1154995933175087, "eval_runtime": 25.0354, "eval_samples_per_second": 79.887, "eval_steps_per_second": 0.639, "step": 2500 }, { "epoch": 0.73, "learning_rate": 1.3967908634388088e-05, "loss": 1.2448, "step": 2501 }, { "epoch": 0.73, "learning_rate": 1.3939052071719398e-05, "loss": 1.1813, "step": 2502 }, { "epoch": 0.73, "learning_rate": 1.3910219056669335e-05, "loss": 1.2292, "step": 2503 }, { "epoch": 0.73, "learning_rate": 1.3881409615290244e-05, "loss": 1.2634, "step": 2504 }, { "epoch": 0.73, "learning_rate": 1.3852623773613178e-05, "loss": 1.2264, "step": 2505 }, { "epoch": 0.74, "learning_rate": 1.382386155764787e-05, "loss": 1.2159, "step": 2506 }, { "epoch": 0.74, "learning_rate": 1.3795122993382673e-05, "loss": 1.2163, "step": 2507 }, { "epoch": 0.74, "learning_rate": 1.3766408106784601e-05, "loss": 1.271, "step": 2508 }, { "epoch": 0.74, "learning_rate": 1.3737716923799265e-05, "loss": 1.1833, "step": 2509 }, { "epoch": 0.74, "learning_rate": 1.3709049470350873e-05, "loss": 1.2695, "step": 2510 }, { "epoch": 0.74, "learning_rate": 1.3680405772342145e-05, "loss": 1.2319, "step": 2511 }, { "epoch": 0.74, "learning_rate": 1.3651785855654387e-05, "loss": 1.2132, "step": 2512 }, { "epoch": 0.74, "learning_rate": 1.3623189746147398e-05, "loss": 1.2702, "step": 2513 }, { "epoch": 0.74, "learning_rate": 1.3594617469659474e-05, "loss": 1.2386, "step": 2514 }, { "epoch": 0.74, "learning_rate": 1.3566069052007351e-05, "loss": 1.1893, "step": 2515 }, { "epoch": 0.74, "learning_rate": 1.3537544518986225e-05, "loss": 1.2318, "step": 2516 }, { "epoch": 0.74, "learning_rate": 1.3509043896369725e-05, "loss": 1.2074, "step": 2517 }, { "epoch": 0.74, "learning_rate": 1.3480567209909863e-05, "loss": 1.2181, "step": 2518 }, { "epoch": 0.74, "learning_rate": 1.3452114485337e-05, "loss": 1.2923, "step": 2519 }, { "epoch": 0.74, "learning_rate": 1.3423685748359883e-05, "loss": 1.2487, "step": 2520 }, { "epoch": 0.74, "learning_rate": 1.3395281024665559e-05, "loss": 1.2368, "step": 2521 }, { "epoch": 0.74, "learning_rate": 1.3366900339919413e-05, "loss": 1.2459, "step": 2522 }, { "epoch": 0.74, "learning_rate": 1.333854371976505e-05, "loss": 1.2546, "step": 2523 }, { "epoch": 0.74, "learning_rate": 1.331021118982438e-05, "loss": 1.1991, "step": 2524 }, { "epoch": 0.74, "learning_rate": 1.3281902775697533e-05, "loss": 1.243, "step": 2525 }, { "epoch": 0.74, "learning_rate": 1.3253618502962872e-05, "loss": 1.2207, "step": 2526 }, { "epoch": 0.74, "learning_rate": 1.3225358397176868e-05, "loss": 1.2564, "step": 2527 }, { "epoch": 0.74, "learning_rate": 1.319712248387424e-05, "loss": 1.2266, "step": 2528 }, { "epoch": 0.74, "learning_rate": 1.3168910788567812e-05, "loss": 1.1931, "step": 2529 }, { "epoch": 0.74, "learning_rate": 1.3140723336748554e-05, "loss": 1.2316, "step": 2530 }, { "epoch": 0.74, "learning_rate": 1.3112560153885467e-05, "loss": 1.2273, "step": 2531 }, { "epoch": 0.74, "learning_rate": 1.3084421265425689e-05, "loss": 1.2325, "step": 2532 }, { "epoch": 0.74, "learning_rate": 1.3056306696794377e-05, "loss": 1.2264, "step": 2533 }, { "epoch": 0.74, "learning_rate": 1.3028216473394731e-05, "loss": 1.2132, "step": 2534 }, { "epoch": 0.74, "learning_rate": 1.3000150620607914e-05, "loss": 1.2362, "step": 2535 }, { "epoch": 0.74, "learning_rate": 1.297210916379311e-05, "loss": 1.2117, "step": 2536 }, { "epoch": 0.74, "learning_rate": 1.2944092128287448e-05, "loss": 1.2321, "step": 2537 }, { "epoch": 0.74, "learning_rate": 1.2916099539405997e-05, "loss": 1.2319, "step": 2538 }, { "epoch": 0.74, "learning_rate": 1.2888131422441705e-05, "loss": 1.2277, "step": 2539 }, { "epoch": 0.75, "learning_rate": 1.2860187802665443e-05, "loss": 1.2549, "step": 2540 }, { "epoch": 0.75, "learning_rate": 1.2832268705325936e-05, "loss": 1.2441, "step": 2541 }, { "epoch": 0.75, "learning_rate": 1.2804374155649768e-05, "loss": 1.2355, "step": 2542 }, { "epoch": 0.75, "learning_rate": 1.2776504178841291e-05, "loss": 1.2515, "step": 2543 }, { "epoch": 0.75, "learning_rate": 1.274865880008271e-05, "loss": 1.2325, "step": 2544 }, { "epoch": 0.75, "learning_rate": 1.2720838044533981e-05, "loss": 1.235, "step": 2545 }, { "epoch": 0.75, "learning_rate": 1.2693041937332828e-05, "loss": 1.2889, "step": 2546 }, { "epoch": 0.75, "learning_rate": 1.266527050359466e-05, "loss": 1.1998, "step": 2547 }, { "epoch": 0.75, "learning_rate": 1.2637523768412633e-05, "loss": 1.2619, "step": 2548 }, { "epoch": 0.75, "learning_rate": 1.2609801756857584e-05, "loss": 1.2925, "step": 2549 }, { "epoch": 0.75, "learning_rate": 1.2582104493978004e-05, "loss": 1.2543, "step": 2550 }, { "epoch": 0.75, "learning_rate": 1.2554432004800004e-05, "loss": 1.2287, "step": 2551 }, { "epoch": 0.75, "learning_rate": 1.2526784314327326e-05, "loss": 1.2638, "step": 2552 }, { "epoch": 0.75, "learning_rate": 1.249916144754133e-05, "loss": 1.225, "step": 2553 }, { "epoch": 0.75, "learning_rate": 1.2471563429400888e-05, "loss": 1.2406, "step": 2554 }, { "epoch": 0.75, "learning_rate": 1.2443990284842471e-05, "loss": 1.248, "step": 2555 }, { "epoch": 0.75, "learning_rate": 1.2416442038780052e-05, "loss": 1.236, "step": 2556 }, { "epoch": 0.75, "learning_rate": 1.2388918716105134e-05, "loss": 1.2519, "step": 2557 }, { "epoch": 0.75, "learning_rate": 1.2361420341686641e-05, "loss": 1.1818, "step": 2558 }, { "epoch": 0.75, "learning_rate": 1.2333946940371013e-05, "loss": 1.227, "step": 2559 }, { "epoch": 0.75, "learning_rate": 1.2306498536982101e-05, "loss": 1.2499, "step": 2560 }, { "epoch": 0.75, "learning_rate": 1.2279075156321185e-05, "loss": 1.2406, "step": 2561 }, { "epoch": 0.75, "learning_rate": 1.2251676823166902e-05, "loss": 1.2111, "step": 2562 }, { "epoch": 0.75, "learning_rate": 1.22243035622753e-05, "loss": 1.2227, "step": 2563 }, { "epoch": 0.75, "learning_rate": 1.219695539837972e-05, "loss": 1.2179, "step": 2564 }, { "epoch": 0.75, "learning_rate": 1.2169632356190877e-05, "loss": 1.2715, "step": 2565 }, { "epoch": 0.75, "learning_rate": 1.2142334460396775e-05, "loss": 1.2443, "step": 2566 }, { "epoch": 0.75, "learning_rate": 1.2115061735662667e-05, "loss": 1.2308, "step": 2567 }, { "epoch": 0.75, "learning_rate": 1.2087814206631095e-05, "loss": 1.2608, "step": 2568 }, { "epoch": 0.75, "learning_rate": 1.2060591897921828e-05, "loss": 1.2301, "step": 2569 }, { "epoch": 0.75, "learning_rate": 1.2033394834131849e-05, "loss": 1.2317, "step": 2570 }, { "epoch": 0.75, "learning_rate": 1.20062230398353e-05, "loss": 1.2301, "step": 2571 }, { "epoch": 0.75, "learning_rate": 1.1979076539583536e-05, "loss": 1.3083, "step": 2572 }, { "epoch": 0.75, "learning_rate": 1.1951955357905033e-05, "loss": 1.2181, "step": 2573 }, { "epoch": 0.76, "learning_rate": 1.1924859519305398e-05, "loss": 1.2385, "step": 2574 }, { "epoch": 0.76, "learning_rate": 1.1897789048267324e-05, "loss": 1.2355, "step": 2575 }, { "epoch": 0.76, "learning_rate": 1.18707439692506e-05, "loss": 1.2356, "step": 2576 }, { "epoch": 0.76, "learning_rate": 1.1843724306692077e-05, "loss": 1.1748, "step": 2577 }, { "epoch": 0.76, "learning_rate": 1.1816730085005603e-05, "loss": 1.2271, "step": 2578 }, { "epoch": 0.76, "learning_rate": 1.1789761328582082e-05, "loss": 1.2511, "step": 2579 }, { "epoch": 0.76, "learning_rate": 1.1762818061789391e-05, "loss": 1.2503, "step": 2580 }, { "epoch": 0.76, "learning_rate": 1.1735900308972385e-05, "loss": 1.235, "step": 2581 }, { "epoch": 0.76, "learning_rate": 1.1709008094452838e-05, "loss": 1.2292, "step": 2582 }, { "epoch": 0.76, "learning_rate": 1.1682141442529473e-05, "loss": 1.2627, "step": 2583 }, { "epoch": 0.76, "learning_rate": 1.1655300377477915e-05, "loss": 1.2375, "step": 2584 }, { "epoch": 0.76, "learning_rate": 1.1628484923550673e-05, "loss": 1.2418, "step": 2585 }, { "epoch": 0.76, "learning_rate": 1.1601695104977088e-05, "loss": 1.2385, "step": 2586 }, { "epoch": 0.76, "learning_rate": 1.1574930945963367e-05, "loss": 1.2818, "step": 2587 }, { "epoch": 0.76, "learning_rate": 1.1548192470692516e-05, "loss": 1.24, "step": 2588 }, { "epoch": 0.76, "learning_rate": 1.152147970332436e-05, "loss": 1.2065, "step": 2589 }, { "epoch": 0.76, "learning_rate": 1.1494792667995448e-05, "loss": 1.2575, "step": 2590 }, { "epoch": 0.76, "learning_rate": 1.1468131388819117e-05, "loss": 1.2718, "step": 2591 }, { "epoch": 0.76, "learning_rate": 1.1441495889885422e-05, "loss": 1.2654, "step": 2592 }, { "epoch": 0.76, "learning_rate": 1.1414886195261135e-05, "loss": 1.1758, "step": 2593 }, { "epoch": 0.76, "learning_rate": 1.1388302328989673e-05, "loss": 1.2959, "step": 2594 }, { "epoch": 0.76, "learning_rate": 1.1361744315091156e-05, "loss": 1.1677, "step": 2595 }, { "epoch": 0.76, "learning_rate": 1.1335212177562337e-05, "loss": 1.2087, "step": 2596 }, { "epoch": 0.76, "learning_rate": 1.130870594037658e-05, "loss": 1.183, "step": 2597 }, { "epoch": 0.76, "learning_rate": 1.128222562748384e-05, "loss": 1.2485, "step": 2598 }, { "epoch": 0.76, "learning_rate": 1.1255771262810665e-05, "loss": 1.2639, "step": 2599 }, { "epoch": 0.76, "learning_rate": 1.1229342870260145e-05, "loss": 1.1955, "step": 2600 }, { "epoch": 0.76, "eval_loss": 0.11455909162759781, "eval_runtime": 24.6076, "eval_samples_per_second": 81.276, "eval_steps_per_second": 0.65, "step": 2600 }, { "epoch": 0.76, "learning_rate": 1.1202940473711936e-05, "loss": 1.2584, "step": 2601 }, { "epoch": 0.76, "learning_rate": 1.117656409702212e-05, "loss": 1.2378, "step": 2602 }, { "epoch": 0.76, "learning_rate": 1.1150213764023357e-05, "loss": 1.2373, "step": 2603 }, { "epoch": 0.76, "learning_rate": 1.1123889498524738e-05, "loss": 1.1769, "step": 2604 }, { "epoch": 0.76, "learning_rate": 1.1097591324311816e-05, "loss": 1.2034, "step": 2605 }, { "epoch": 0.76, "learning_rate": 1.1071319265146534e-05, "loss": 1.2334, "step": 2606 }, { "epoch": 0.76, "learning_rate": 1.1045073344767277e-05, "loss": 1.2454, "step": 2607 }, { "epoch": 0.77, "learning_rate": 1.1018853586888794e-05, "loss": 1.2275, "step": 2608 }, { "epoch": 0.77, "learning_rate": 1.0992660015202215e-05, "loss": 1.2286, "step": 2609 }, { "epoch": 0.77, "learning_rate": 1.0966492653374958e-05, "loss": 1.2538, "step": 2610 }, { "epoch": 0.77, "learning_rate": 1.0940351525050815e-05, "loss": 1.2119, "step": 2611 }, { "epoch": 0.77, "learning_rate": 1.0914236653849848e-05, "loss": 1.2354, "step": 2612 }, { "epoch": 0.77, "learning_rate": 1.0888148063368411e-05, "loss": 1.2354, "step": 2613 }, { "epoch": 0.77, "learning_rate": 1.0862085777179078e-05, "loss": 1.2388, "step": 2614 }, { "epoch": 0.77, "learning_rate": 1.0836049818830685e-05, "loss": 1.2042, "step": 2615 }, { "epoch": 0.77, "learning_rate": 1.0810040211848269e-05, "loss": 1.147, "step": 2616 }, { "epoch": 0.77, "learning_rate": 1.0784056979733078e-05, "loss": 1.2882, "step": 2617 }, { "epoch": 0.77, "learning_rate": 1.075810014596248e-05, "loss": 1.2108, "step": 2618 }, { "epoch": 0.77, "learning_rate": 1.0732169733990036e-05, "loss": 1.2464, "step": 2619 }, { "epoch": 0.77, "learning_rate": 1.0706265767245419e-05, "loss": 1.2367, "step": 2620 }, { "epoch": 0.77, "learning_rate": 1.068038826913441e-05, "loss": 1.2634, "step": 2621 }, { "epoch": 0.77, "learning_rate": 1.0654537263038854e-05, "loss": 1.2037, "step": 2622 }, { "epoch": 0.77, "learning_rate": 1.0628712772316682e-05, "loss": 1.2043, "step": 2623 }, { "epoch": 0.77, "learning_rate": 1.060291482030186e-05, "loss": 1.2435, "step": 2624 }, { "epoch": 0.77, "learning_rate": 1.0577143430304386e-05, "loss": 1.1848, "step": 2625 }, { "epoch": 0.77, "learning_rate": 1.0551398625610227e-05, "loss": 1.2369, "step": 2626 }, { "epoch": 0.77, "learning_rate": 1.0525680429481353e-05, "loss": 1.2523, "step": 2627 }, { "epoch": 0.77, "learning_rate": 1.04999888651557e-05, "loss": 1.278, "step": 2628 }, { "epoch": 0.77, "learning_rate": 1.0474323955847101e-05, "loss": 1.2587, "step": 2629 }, { "epoch": 0.77, "learning_rate": 1.0448685724745347e-05, "loss": 1.1863, "step": 2630 }, { "epoch": 0.77, "learning_rate": 1.0423074195016101e-05, "loss": 1.2123, "step": 2631 }, { "epoch": 0.77, "learning_rate": 1.0397489389800918e-05, "loss": 1.244, "step": 2632 }, { "epoch": 0.77, "learning_rate": 1.0371931332217175e-05, "loss": 1.2313, "step": 2633 }, { "epoch": 0.77, "learning_rate": 1.034640004535811e-05, "loss": 1.1999, "step": 2634 }, { "epoch": 0.77, "learning_rate": 1.0320895552292755e-05, "loss": 1.2186, "step": 2635 }, { "epoch": 0.77, "learning_rate": 1.0295417876065957e-05, "loss": 1.2526, "step": 2636 }, { "epoch": 0.77, "learning_rate": 1.0269967039698288e-05, "loss": 1.2198, "step": 2637 }, { "epoch": 0.77, "learning_rate": 1.0244543066186102e-05, "loss": 1.1616, "step": 2638 }, { "epoch": 0.77, "learning_rate": 1.0219145978501492e-05, "loss": 1.244, "step": 2639 }, { "epoch": 0.77, "learning_rate": 1.0193775799592211e-05, "loss": 1.2348, "step": 2640 }, { "epoch": 0.77, "learning_rate": 1.0168432552381757e-05, "loss": 1.2204, "step": 2641 }, { "epoch": 0.78, "learning_rate": 1.014311625976923e-05, "loss": 1.2469, "step": 2642 }, { "epoch": 0.78, "learning_rate": 1.0117826944629425e-05, "loss": 1.2617, "step": 2643 }, { "epoch": 0.78, "learning_rate": 1.0092564629812744e-05, "loss": 1.2143, "step": 2644 }, { "epoch": 0.78, "learning_rate": 1.0067329338145209e-05, "loss": 1.2124, "step": 2645 }, { "epoch": 0.78, "learning_rate": 1.0042121092428378e-05, "loss": 1.2113, "step": 2646 }, { "epoch": 0.78, "learning_rate": 1.001693991543942e-05, "loss": 1.3103, "step": 2647 }, { "epoch": 0.78, "learning_rate": 9.991785829931024e-06, "loss": 1.2004, "step": 2648 }, { "epoch": 0.78, "learning_rate": 9.966658858631421e-06, "loss": 1.2091, "step": 2649 }, { "epoch": 0.78, "learning_rate": 9.941559024244296e-06, "loss": 1.2327, "step": 2650 }, { "epoch": 0.78, "learning_rate": 9.916486349448862e-06, "loss": 1.248, "step": 2651 }, { "epoch": 0.78, "learning_rate": 9.891440856899783e-06, "loss": 1.1986, "step": 2652 }, { "epoch": 0.78, "learning_rate": 9.866422569227133e-06, "loss": 1.2516, "step": 2653 }, { "epoch": 0.78, "learning_rate": 9.84143150903643e-06, "loss": 1.2427, "step": 2654 }, { "epoch": 0.78, "learning_rate": 9.816467698908592e-06, "loss": 1.2454, "step": 2655 }, { "epoch": 0.78, "learning_rate": 9.791531161399911e-06, "loss": 1.2508, "step": 2656 }, { "epoch": 0.78, "learning_rate": 9.766621919042025e-06, "loss": 1.2134, "step": 2657 }, { "epoch": 0.78, "learning_rate": 9.74173999434192e-06, "loss": 1.2839, "step": 2658 }, { "epoch": 0.78, "learning_rate": 9.716885409781893e-06, "loss": 1.2116, "step": 2659 }, { "epoch": 0.78, "learning_rate": 9.692058187819561e-06, "loss": 1.1961, "step": 2660 }, { "epoch": 0.78, "learning_rate": 9.667258350887776e-06, "loss": 1.2083, "step": 2661 }, { "epoch": 0.78, "learning_rate": 9.642485921394673e-06, "loss": 1.2718, "step": 2662 }, { "epoch": 0.78, "learning_rate": 9.61774092172362e-06, "loss": 1.2387, "step": 2663 }, { "epoch": 0.78, "learning_rate": 9.593023374233206e-06, "loss": 1.2312, "step": 2664 }, { "epoch": 0.78, "learning_rate": 9.568333301257193e-06, "loss": 1.184, "step": 2665 }, { "epoch": 0.78, "learning_rate": 9.543670725104533e-06, "loss": 1.2414, "step": 2666 }, { "epoch": 0.78, "learning_rate": 9.519035668059345e-06, "loss": 1.243, "step": 2667 }, { "epoch": 0.78, "learning_rate": 9.494428152380868e-06, "loss": 1.2126, "step": 2668 }, { "epoch": 0.78, "learning_rate": 9.469848200303446e-06, "loss": 1.265, "step": 2669 }, { "epoch": 0.78, "learning_rate": 9.445295834036537e-06, "loss": 1.1648, "step": 2670 }, { "epoch": 0.78, "learning_rate": 9.420771075764663e-06, "loss": 1.2899, "step": 2671 }, { "epoch": 0.78, "learning_rate": 9.39627394764742e-06, "loss": 1.2146, "step": 2672 }, { "epoch": 0.78, "learning_rate": 9.371804471819401e-06, "loss": 1.2437, "step": 2673 }, { "epoch": 0.78, "learning_rate": 9.347362670390242e-06, "loss": 1.2059, "step": 2674 }, { "epoch": 0.78, "learning_rate": 9.322948565444565e-06, "loss": 1.2322, "step": 2675 }, { "epoch": 0.79, "learning_rate": 9.298562179041996e-06, "loss": 1.1642, "step": 2676 }, { "epoch": 0.79, "learning_rate": 9.27420353321704e-06, "loss": 1.2458, "step": 2677 }, { "epoch": 0.79, "learning_rate": 9.249872649979203e-06, "loss": 1.2401, "step": 2678 }, { "epoch": 0.79, "learning_rate": 9.225569551312894e-06, "loss": 1.1887, "step": 2679 }, { "epoch": 0.79, "learning_rate": 9.201294259177413e-06, "loss": 1.225, "step": 2680 }, { "epoch": 0.79, "learning_rate": 9.177046795506918e-06, "loss": 1.2292, "step": 2681 }, { "epoch": 0.79, "learning_rate": 9.152827182210445e-06, "loss": 1.2199, "step": 2682 }, { "epoch": 0.79, "learning_rate": 9.128635441171854e-06, "loss": 1.1984, "step": 2683 }, { "epoch": 0.79, "learning_rate": 9.10447159424984e-06, "loss": 1.2554, "step": 2684 }, { "epoch": 0.79, "learning_rate": 9.080335663277853e-06, "loss": 1.2628, "step": 2685 }, { "epoch": 0.79, "learning_rate": 9.056227670064155e-06, "loss": 1.2014, "step": 2686 }, { "epoch": 0.79, "learning_rate": 9.032147636391752e-06, "loss": 1.2398, "step": 2687 }, { "epoch": 0.79, "learning_rate": 9.008095584018406e-06, "loss": 1.279, "step": 2688 }, { "epoch": 0.79, "learning_rate": 8.984071534676553e-06, "loss": 1.2308, "step": 2689 }, { "epoch": 0.79, "learning_rate": 8.960075510073358e-06, "loss": 1.2382, "step": 2690 }, { "epoch": 0.79, "learning_rate": 8.936107531890665e-06, "loss": 1.2046, "step": 2691 }, { "epoch": 0.79, "learning_rate": 8.912167621784981e-06, "loss": 1.2601, "step": 2692 }, { "epoch": 0.79, "learning_rate": 8.888255801387417e-06, "loss": 1.212, "step": 2693 }, { "epoch": 0.79, "learning_rate": 8.86437209230374e-06, "loss": 1.2477, "step": 2694 }, { "epoch": 0.79, "learning_rate": 8.840516516114297e-06, "loss": 1.2104, "step": 2695 }, { "epoch": 0.79, "learning_rate": 8.81668909437404e-06, "loss": 1.264, "step": 2696 }, { "epoch": 0.79, "learning_rate": 8.79288984861244e-06, "loss": 1.2217, "step": 2697 }, { "epoch": 0.79, "learning_rate": 8.76911880033354e-06, "loss": 1.1972, "step": 2698 }, { "epoch": 0.79, "learning_rate": 8.745375971015905e-06, "loss": 1.2174, "step": 2699 }, { "epoch": 0.79, "learning_rate": 8.721661382112598e-06, "loss": 1.2837, "step": 2700 }, { "epoch": 0.79, "eval_loss": 0.11844494938850403, "eval_runtime": 24.3874, "eval_samples_per_second": 82.01, "eval_steps_per_second": 0.656, "step": 2700 }, { "epoch": 0.79, "learning_rate": 8.697975055051148e-06, "loss": 1.2068, "step": 2701 }, { "epoch": 0.79, "learning_rate": 8.67431701123357e-06, "loss": 1.2612, "step": 2702 }, { "epoch": 0.79, "learning_rate": 8.65068727203633e-06, "loss": 1.2669, "step": 2703 }, { "epoch": 0.79, "learning_rate": 8.627085858810273e-06, "loss": 1.234, "step": 2704 }, { "epoch": 0.79, "learning_rate": 8.603512792880706e-06, "loss": 1.1902, "step": 2705 }, { "epoch": 0.79, "learning_rate": 8.579968095547287e-06, "loss": 1.1747, "step": 2706 }, { "epoch": 0.79, "learning_rate": 8.556451788084068e-06, "loss": 1.2325, "step": 2707 }, { "epoch": 0.79, "learning_rate": 8.53296389173941e-06, "loss": 1.1815, "step": 2708 }, { "epoch": 0.79, "learning_rate": 8.509504427736042e-06, "loss": 1.2421, "step": 2709 }, { "epoch": 0.8, "learning_rate": 8.486073417270977e-06, "loss": 1.2172, "step": 2710 }, { "epoch": 0.8, "learning_rate": 8.462670881515551e-06, "loss": 1.2663, "step": 2711 }, { "epoch": 0.8, "learning_rate": 8.43929684161532e-06, "loss": 1.2126, "step": 2712 }, { "epoch": 0.8, "learning_rate": 8.415951318690134e-06, "loss": 1.2192, "step": 2713 }, { "epoch": 0.8, "learning_rate": 8.392634333834073e-06, "loss": 1.2315, "step": 2714 }, { "epoch": 0.8, "learning_rate": 8.369345908115405e-06, "loss": 1.2324, "step": 2715 }, { "epoch": 0.8, "learning_rate": 8.346086062576625e-06, "loss": 1.2132, "step": 2716 }, { "epoch": 0.8, "learning_rate": 8.32285481823437e-06, "loss": 1.1859, "step": 2717 }, { "epoch": 0.8, "learning_rate": 8.299652196079462e-06, "loss": 1.246, "step": 2718 }, { "epoch": 0.8, "learning_rate": 8.276478217076853e-06, "loss": 1.2097, "step": 2719 }, { "epoch": 0.8, "learning_rate": 8.253332902165626e-06, "loss": 1.2351, "step": 2720 }, { "epoch": 0.8, "learning_rate": 8.230216272258924e-06, "loss": 1.256, "step": 2721 }, { "epoch": 0.8, "learning_rate": 8.20712834824402e-06, "loss": 1.2387, "step": 2722 }, { "epoch": 0.8, "learning_rate": 8.184069150982217e-06, "loss": 1.2777, "step": 2723 }, { "epoch": 0.8, "learning_rate": 8.161038701308892e-06, "loss": 1.2382, "step": 2724 }, { "epoch": 0.8, "learning_rate": 8.138037020033405e-06, "loss": 1.2049, "step": 2725 }, { "epoch": 0.8, "learning_rate": 8.115064127939152e-06, "loss": 1.2584, "step": 2726 }, { "epoch": 0.8, "learning_rate": 8.092120045783525e-06, "loss": 1.1788, "step": 2727 }, { "epoch": 0.8, "learning_rate": 8.069204794297843e-06, "loss": 1.2454, "step": 2728 }, { "epoch": 0.8, "learning_rate": 8.046318394187413e-06, "loss": 1.2364, "step": 2729 }, { "epoch": 0.8, "learning_rate": 8.02346086613146e-06, "loss": 1.2618, "step": 2730 }, { "epoch": 0.8, "learning_rate": 8.000632230783139e-06, "loss": 1.2176, "step": 2731 }, { "epoch": 0.8, "learning_rate": 7.97783250876945e-06, "loss": 1.2166, "step": 2732 }, { "epoch": 0.8, "learning_rate": 7.955061720691315e-06, "loss": 1.2745, "step": 2733 }, { "epoch": 0.8, "learning_rate": 7.932319887123498e-06, "loss": 1.2356, "step": 2734 }, { "epoch": 0.8, "learning_rate": 7.909607028614608e-06, "loss": 1.2045, "step": 2735 }, { "epoch": 0.8, "learning_rate": 7.886923165687048e-06, "loss": 1.2222, "step": 2736 }, { "epoch": 0.8, "learning_rate": 7.864268318837051e-06, "loss": 1.2042, "step": 2737 }, { "epoch": 0.8, "learning_rate": 7.841642508534617e-06, "loss": 1.2726, "step": 2738 }, { "epoch": 0.8, "learning_rate": 7.81904575522352e-06, "loss": 1.2902, "step": 2739 }, { "epoch": 0.8, "learning_rate": 7.79647807932126e-06, "loss": 1.2236, "step": 2740 }, { "epoch": 0.8, "learning_rate": 7.773939501219079e-06, "loss": 1.2082, "step": 2741 }, { "epoch": 0.8, "learning_rate": 7.751430041281933e-06, "loss": 1.2309, "step": 2742 }, { "epoch": 0.8, "learning_rate": 7.728949719848464e-06, "loss": 1.22, "step": 2743 }, { "epoch": 0.81, "learning_rate": 7.706498557230957e-06, "loss": 1.2023, "step": 2744 }, { "epoch": 0.81, "learning_rate": 7.684076573715398e-06, "loss": 1.2432, "step": 2745 }, { "epoch": 0.81, "learning_rate": 7.661683789561372e-06, "loss": 1.2366, "step": 2746 }, { "epoch": 0.81, "learning_rate": 7.639320225002106e-06, "loss": 1.2486, "step": 2747 }, { "epoch": 0.81, "learning_rate": 7.6169859002443954e-06, "loss": 1.2309, "step": 2748 }, { "epoch": 0.81, "learning_rate": 7.594680835468642e-06, "loss": 1.207, "step": 2749 }, { "epoch": 0.81, "learning_rate": 7.5724050508288e-06, "loss": 1.201, "step": 2750 }, { "epoch": 0.81, "learning_rate": 7.55015856645239e-06, "loss": 1.1928, "step": 2751 }, { "epoch": 0.81, "learning_rate": 7.527941402440388e-06, "loss": 1.208, "step": 2752 }, { "epoch": 0.81, "learning_rate": 7.505753578867354e-06, "loss": 1.2131, "step": 2753 }, { "epoch": 0.81, "learning_rate": 7.483595115781304e-06, "loss": 1.2215, "step": 2754 }, { "epoch": 0.81, "learning_rate": 7.461466033203737e-06, "loss": 1.2117, "step": 2755 }, { "epoch": 0.81, "learning_rate": 7.439366351129584e-06, "loss": 1.2395, "step": 2756 }, { "epoch": 0.81, "learning_rate": 7.417296089527224e-06, "loss": 1.1907, "step": 2757 }, { "epoch": 0.81, "learning_rate": 7.395255268338459e-06, "loss": 1.2015, "step": 2758 }, { "epoch": 0.81, "learning_rate": 7.3732439074784975e-06, "loss": 1.1883, "step": 2759 }, { "epoch": 0.81, "learning_rate": 7.351262026835888e-06, "loss": 1.2204, "step": 2760 }, { "epoch": 0.81, "learning_rate": 7.329309646272578e-06, "loss": 1.2139, "step": 2761 }, { "epoch": 0.81, "learning_rate": 7.3073867856238605e-06, "loss": 1.17, "step": 2762 }, { "epoch": 0.81, "learning_rate": 7.285493464698343e-06, "loss": 1.2624, "step": 2763 }, { "epoch": 0.81, "learning_rate": 7.263629703277937e-06, "loss": 1.1784, "step": 2764 }, { "epoch": 0.81, "learning_rate": 7.241795521117851e-06, "loss": 1.2237, "step": 2765 }, { "epoch": 0.81, "learning_rate": 7.219990937946581e-06, "loss": 1.2276, "step": 2766 }, { "epoch": 0.81, "learning_rate": 7.19821597346587e-06, "loss": 1.2761, "step": 2767 }, { "epoch": 0.81, "learning_rate": 7.176470647350675e-06, "loss": 1.211, "step": 2768 }, { "epoch": 0.81, "learning_rate": 7.1547549792492055e-06, "loss": 1.2301, "step": 2769 }, { "epoch": 0.81, "learning_rate": 7.1330689887828584e-06, "loss": 1.1788, "step": 2770 }, { "epoch": 0.81, "learning_rate": 7.111412695546236e-06, "loss": 1.2365, "step": 2771 }, { "epoch": 0.81, "learning_rate": 7.0897861191070535e-06, "loss": 1.2359, "step": 2772 }, { "epoch": 0.81, "learning_rate": 7.068189279006237e-06, "loss": 1.2164, "step": 2773 }, { "epoch": 0.81, "learning_rate": 7.046622194757806e-06, "loss": 1.2178, "step": 2774 }, { "epoch": 0.81, "learning_rate": 7.025084885848925e-06, "loss": 1.2217, "step": 2775 }, { "epoch": 0.81, "learning_rate": 7.003577371739809e-06, "loss": 1.2481, "step": 2776 }, { "epoch": 0.81, "learning_rate": 6.982099671863789e-06, "loss": 1.2377, "step": 2777 }, { "epoch": 0.82, "learning_rate": 6.960651805627248e-06, "loss": 1.2646, "step": 2778 }, { "epoch": 0.82, "learning_rate": 6.939233792409617e-06, "loss": 1.248, "step": 2779 }, { "epoch": 0.82, "learning_rate": 6.917845651563327e-06, "loss": 1.2326, "step": 2780 }, { "epoch": 0.82, "learning_rate": 6.896487402413852e-06, "loss": 1.2271, "step": 2781 }, { "epoch": 0.82, "learning_rate": 6.875159064259649e-06, "loss": 1.2621, "step": 2782 }, { "epoch": 0.82, "learning_rate": 6.853860656372125e-06, "loss": 1.2149, "step": 2783 }, { "epoch": 0.82, "learning_rate": 6.832592197995662e-06, "loss": 1.2139, "step": 2784 }, { "epoch": 0.82, "learning_rate": 6.8113537083475875e-06, "loss": 1.215, "step": 2785 }, { "epoch": 0.82, "learning_rate": 6.790145206618146e-06, "loss": 1.2323, "step": 2786 }, { "epoch": 0.82, "learning_rate": 6.768966711970466e-06, "loss": 1.1941, "step": 2787 }, { "epoch": 0.82, "learning_rate": 6.747818243540587e-06, "loss": 1.2535, "step": 2788 }, { "epoch": 0.82, "learning_rate": 6.726699820437419e-06, "loss": 1.2215, "step": 2789 }, { "epoch": 0.82, "learning_rate": 6.7056114617426935e-06, "loss": 1.227, "step": 2790 }, { "epoch": 0.82, "learning_rate": 6.684553186511022e-06, "loss": 1.2417, "step": 2791 }, { "epoch": 0.82, "learning_rate": 6.6635250137697935e-06, "loss": 1.2695, "step": 2792 }, { "epoch": 0.82, "learning_rate": 6.642526962519218e-06, "loss": 1.2686, "step": 2793 }, { "epoch": 0.82, "learning_rate": 6.621559051732287e-06, "loss": 1.2237, "step": 2794 }, { "epoch": 0.82, "learning_rate": 6.60062130035477e-06, "loss": 1.2168, "step": 2795 }, { "epoch": 0.82, "learning_rate": 6.579713727305148e-06, "loss": 1.2229, "step": 2796 }, { "epoch": 0.82, "learning_rate": 6.558836351474673e-06, "loss": 1.2629, "step": 2797 }, { "epoch": 0.82, "learning_rate": 6.537989191727292e-06, "loss": 1.2359, "step": 2798 }, { "epoch": 0.82, "learning_rate": 6.517172266899669e-06, "loss": 1.2424, "step": 2799 }, { "epoch": 0.82, "learning_rate": 6.496385595801116e-06, "loss": 1.2364, "step": 2800 }, { "epoch": 0.82, "eval_loss": 0.11666639894247055, "eval_runtime": 24.6904, "eval_samples_per_second": 81.003, "eval_steps_per_second": 0.648, "step": 2800 }, { "epoch": 0.82, "learning_rate": 6.475629197213637e-06, "loss": 1.2704, "step": 2801 }, { "epoch": 0.82, "learning_rate": 6.454903089891878e-06, "loss": 1.2297, "step": 2802 }, { "epoch": 0.82, "learning_rate": 6.4342072925631125e-06, "loss": 1.2271, "step": 2803 }, { "epoch": 0.82, "learning_rate": 6.413541823927216e-06, "loss": 1.2392, "step": 2804 }, { "epoch": 0.82, "learning_rate": 6.3929067026566735e-06, "loss": 1.2415, "step": 2805 }, { "epoch": 0.82, "learning_rate": 6.3723019473965575e-06, "loss": 1.2173, "step": 2806 }, { "epoch": 0.82, "learning_rate": 6.351727576764477e-06, "loss": 1.183, "step": 2807 }, { "epoch": 0.82, "learning_rate": 6.331183609350602e-06, "loss": 1.2815, "step": 2808 }, { "epoch": 0.82, "learning_rate": 6.310670063717638e-06, "loss": 1.2043, "step": 2809 }, { "epoch": 0.82, "learning_rate": 6.2901869584008014e-06, "loss": 1.2583, "step": 2810 }, { "epoch": 0.82, "learning_rate": 6.269734311907782e-06, "loss": 1.204, "step": 2811 }, { "epoch": 0.82, "learning_rate": 6.249312142718768e-06, "loss": 1.247, "step": 2812 }, { "epoch": 0.83, "learning_rate": 6.228920469286408e-06, "loss": 1.1945, "step": 2813 }, { "epoch": 0.83, "learning_rate": 6.208559310035807e-06, "loss": 1.1955, "step": 2814 }, { "epoch": 0.83, "learning_rate": 6.188228683364461e-06, "loss": 1.2397, "step": 2815 }, { "epoch": 0.83, "learning_rate": 6.167928607642317e-06, "loss": 1.2503, "step": 2816 }, { "epoch": 0.83, "learning_rate": 6.147659101211694e-06, "loss": 1.2377, "step": 2817 }, { "epoch": 0.83, "learning_rate": 6.127420182387314e-06, "loss": 1.2282, "step": 2818 }, { "epoch": 0.83, "learning_rate": 6.107211869456228e-06, "loss": 1.2269, "step": 2819 }, { "epoch": 0.83, "learning_rate": 6.087034180677855e-06, "loss": 1.2014, "step": 2820 }, { "epoch": 0.83, "learning_rate": 6.066887134283939e-06, "loss": 1.1965, "step": 2821 }, { "epoch": 0.83, "learning_rate": 6.046770748478552e-06, "loss": 1.2002, "step": 2822 }, { "epoch": 0.83, "learning_rate": 6.026685041438018e-06, "loss": 1.2899, "step": 2823 }, { "epoch": 0.83, "learning_rate": 6.006630031310989e-06, "loss": 1.2434, "step": 2824 }, { "epoch": 0.83, "learning_rate": 5.986605736218352e-06, "loss": 1.2274, "step": 2825 }, { "epoch": 0.83, "learning_rate": 5.9666121742532765e-06, "loss": 1.2363, "step": 2826 }, { "epoch": 0.83, "learning_rate": 5.9466493634810965e-06, "loss": 1.2649, "step": 2827 }, { "epoch": 0.83, "learning_rate": 5.926717321939417e-06, "loss": 1.1863, "step": 2828 }, { "epoch": 0.83, "learning_rate": 5.906816067638028e-06, "loss": 1.2161, "step": 2829 }, { "epoch": 0.83, "learning_rate": 5.886945618558906e-06, "loss": 1.2488, "step": 2830 }, { "epoch": 0.83, "learning_rate": 5.867105992656163e-06, "loss": 1.252, "step": 2831 }, { "epoch": 0.83, "learning_rate": 5.8472972078560975e-06, "loss": 1.209, "step": 2832 }, { "epoch": 0.83, "learning_rate": 5.827519282057128e-06, "loss": 1.2218, "step": 2833 }, { "epoch": 0.83, "learning_rate": 5.807772233129792e-06, "loss": 1.2406, "step": 2834 }, { "epoch": 0.83, "learning_rate": 5.788056078916704e-06, "loss": 1.2355, "step": 2835 }, { "epoch": 0.83, "learning_rate": 5.7683708372325975e-06, "loss": 1.2175, "step": 2836 }, { "epoch": 0.83, "learning_rate": 5.748716525864262e-06, "loss": 1.2035, "step": 2837 }, { "epoch": 0.83, "learning_rate": 5.729093162570545e-06, "loss": 1.2608, "step": 2838 }, { "epoch": 0.83, "learning_rate": 5.709500765082307e-06, "loss": 1.2076, "step": 2839 }, { "epoch": 0.83, "learning_rate": 5.689939351102456e-06, "loss": 1.2484, "step": 2840 }, { "epoch": 0.83, "learning_rate": 5.6704089383058915e-06, "loss": 1.2032, "step": 2841 }, { "epoch": 0.83, "learning_rate": 5.650909544339511e-06, "loss": 1.2321, "step": 2842 }, { "epoch": 0.83, "learning_rate": 5.631441186822168e-06, "loss": 1.1893, "step": 2843 }, { "epoch": 0.83, "learning_rate": 5.61200388334469e-06, "loss": 1.2397, "step": 2844 }, { "epoch": 0.83, "learning_rate": 5.592597651469835e-06, "loss": 1.2058, "step": 2845 }, { "epoch": 0.83, "learning_rate": 5.573222508732303e-06, "loss": 1.2172, "step": 2846 }, { "epoch": 0.84, "learning_rate": 5.553878472638667e-06, "loss": 1.2746, "step": 2847 }, { "epoch": 0.84, "learning_rate": 5.534565560667426e-06, "loss": 1.1764, "step": 2848 }, { "epoch": 0.84, "learning_rate": 5.515283790268946e-06, "loss": 1.2196, "step": 2849 }, { "epoch": 0.84, "learning_rate": 5.496033178865463e-06, "loss": 1.2319, "step": 2850 }, { "epoch": 0.84, "learning_rate": 5.476813743851037e-06, "loss": 1.1739, "step": 2851 }, { "epoch": 0.84, "learning_rate": 5.457625502591582e-06, "loss": 1.2115, "step": 2852 }, { "epoch": 0.84, "learning_rate": 5.4384684724248096e-06, "loss": 1.2597, "step": 2853 }, { "epoch": 0.84, "learning_rate": 5.419342670660248e-06, "loss": 1.2105, "step": 2854 }, { "epoch": 0.84, "learning_rate": 5.4002481145791895e-06, "loss": 1.2293, "step": 2855 }, { "epoch": 0.84, "learning_rate": 5.381184821434699e-06, "loss": 1.2067, "step": 2856 }, { "epoch": 0.84, "learning_rate": 5.362152808451618e-06, "loss": 1.2765, "step": 2857 }, { "epoch": 0.84, "learning_rate": 5.343152092826476e-06, "loss": 1.2315, "step": 2858 }, { "epoch": 0.84, "learning_rate": 5.324182691727564e-06, "loss": 1.2039, "step": 2859 }, { "epoch": 0.84, "learning_rate": 5.30524462229487e-06, "loss": 1.2252, "step": 2860 }, { "epoch": 0.84, "learning_rate": 5.286337901640069e-06, "loss": 1.2742, "step": 2861 }, { "epoch": 0.84, "learning_rate": 5.267462546846495e-06, "loss": 1.1883, "step": 2862 }, { "epoch": 0.84, "learning_rate": 5.2486185749691665e-06, "loss": 1.2121, "step": 2863 }, { "epoch": 0.84, "learning_rate": 5.229806003034741e-06, "loss": 1.2229, "step": 2864 }, { "epoch": 0.84, "learning_rate": 5.211024848041479e-06, "loss": 1.2716, "step": 2865 }, { "epoch": 0.84, "learning_rate": 5.192275126959292e-06, "loss": 1.2361, "step": 2866 }, { "epoch": 0.84, "learning_rate": 5.173556856729649e-06, "loss": 1.1855, "step": 2867 }, { "epoch": 0.84, "learning_rate": 5.154870054265635e-06, "loss": 1.2309, "step": 2868 }, { "epoch": 0.84, "learning_rate": 5.1362147364518854e-06, "loss": 1.2629, "step": 2869 }, { "epoch": 0.84, "learning_rate": 5.117590920144606e-06, "loss": 1.2222, "step": 2870 }, { "epoch": 0.84, "learning_rate": 5.0989986221715006e-06, "loss": 1.2088, "step": 2871 }, { "epoch": 0.84, "learning_rate": 5.080437859331823e-06, "loss": 1.231, "step": 2872 }, { "epoch": 0.84, "learning_rate": 5.061908648396338e-06, "loss": 1.2033, "step": 2873 }, { "epoch": 0.84, "learning_rate": 5.043411006107293e-06, "loss": 1.2219, "step": 2874 }, { "epoch": 0.84, "learning_rate": 5.024944949178392e-06, "loss": 1.2036, "step": 2875 }, { "epoch": 0.84, "learning_rate": 5.006510494294831e-06, "loss": 1.2974, "step": 2876 }, { "epoch": 0.84, "learning_rate": 4.988107658113235e-06, "loss": 1.1788, "step": 2877 }, { "epoch": 0.84, "learning_rate": 4.969736457261669e-06, "loss": 1.2012, "step": 2878 }, { "epoch": 0.84, "learning_rate": 4.951396908339594e-06, "loss": 1.2058, "step": 2879 }, { "epoch": 0.84, "learning_rate": 4.933089027917892e-06, "loss": 1.2347, "step": 2880 }, { "epoch": 0.85, "learning_rate": 4.914812832538829e-06, "loss": 1.2341, "step": 2881 }, { "epoch": 0.85, "learning_rate": 4.8965683387160215e-06, "loss": 1.2282, "step": 2882 }, { "epoch": 0.85, "learning_rate": 4.878355562934465e-06, "loss": 1.2579, "step": 2883 }, { "epoch": 0.85, "learning_rate": 4.860174521650485e-06, "loss": 1.2452, "step": 2884 }, { "epoch": 0.85, "learning_rate": 4.8420252312917445e-06, "loss": 1.2058, "step": 2885 }, { "epoch": 0.85, "learning_rate": 4.823907708257198e-06, "loss": 1.2358, "step": 2886 }, { "epoch": 0.85, "learning_rate": 4.805821968917106e-06, "loss": 1.2515, "step": 2887 }, { "epoch": 0.85, "learning_rate": 4.787768029613014e-06, "loss": 1.2075, "step": 2888 }, { "epoch": 0.85, "learning_rate": 4.769745906657739e-06, "loss": 1.2191, "step": 2889 }, { "epoch": 0.85, "learning_rate": 4.7517556163353275e-06, "loss": 1.1623, "step": 2890 }, { "epoch": 0.85, "learning_rate": 4.733797174901087e-06, "loss": 1.2402, "step": 2891 }, { "epoch": 0.85, "learning_rate": 4.71587059858154e-06, "loss": 1.2316, "step": 2892 }, { "epoch": 0.85, "learning_rate": 4.697975903574419e-06, "loss": 1.1947, "step": 2893 }, { "epoch": 0.85, "learning_rate": 4.6801131060486334e-06, "loss": 1.1914, "step": 2894 }, { "epoch": 0.85, "learning_rate": 4.662282222144297e-06, "loss": 1.2065, "step": 2895 }, { "epoch": 0.85, "learning_rate": 4.6444832679726705e-06, "loss": 1.203, "step": 2896 }, { "epoch": 0.85, "learning_rate": 4.626716259616175e-06, "loss": 1.2354, "step": 2897 }, { "epoch": 0.85, "learning_rate": 4.608981213128347e-06, "loss": 1.2328, "step": 2898 }, { "epoch": 0.85, "learning_rate": 4.59127814453387e-06, "loss": 1.2053, "step": 2899 }, { "epoch": 0.85, "learning_rate": 4.573607069828514e-06, "loss": 1.2304, "step": 2900 }, { "epoch": 0.85, "eval_loss": 0.1184241771697998, "eval_runtime": 24.4832, "eval_samples_per_second": 81.689, "eval_steps_per_second": 0.654, "step": 2900 }, { "epoch": 0.85, "learning_rate": 4.555968004979163e-06, "loss": 1.2277, "step": 2901 }, { "epoch": 0.85, "learning_rate": 4.538360965923745e-06, "loss": 1.2671, "step": 2902 }, { "epoch": 0.85, "learning_rate": 4.520785968571266e-06, "loss": 1.1668, "step": 2903 }, { "epoch": 0.85, "learning_rate": 4.503243028801794e-06, "loss": 1.2114, "step": 2904 }, { "epoch": 0.85, "learning_rate": 4.485732162466421e-06, "loss": 1.204, "step": 2905 }, { "epoch": 0.85, "learning_rate": 4.468253385387246e-06, "loss": 1.2262, "step": 2906 }, { "epoch": 0.85, "learning_rate": 4.450806713357394e-06, "loss": 1.2251, "step": 2907 }, { "epoch": 0.85, "learning_rate": 4.433392162140968e-06, "loss": 1.2646, "step": 2908 }, { "epoch": 0.85, "learning_rate": 4.4160097474730624e-06, "loss": 1.147, "step": 2909 }, { "epoch": 0.85, "learning_rate": 4.3986594850597084e-06, "loss": 1.2801, "step": 2910 }, { "epoch": 0.85, "learning_rate": 4.381341390577909e-06, "loss": 1.2174, "step": 2911 }, { "epoch": 0.85, "learning_rate": 4.364055479675595e-06, "loss": 1.2117, "step": 2912 }, { "epoch": 0.85, "learning_rate": 4.3468017679716245e-06, "loss": 1.2398, "step": 2913 }, { "epoch": 0.85, "learning_rate": 4.32958027105574e-06, "loss": 1.207, "step": 2914 }, { "epoch": 0.86, "learning_rate": 4.312391004488592e-06, "loss": 1.2114, "step": 2915 }, { "epoch": 0.86, "learning_rate": 4.2952339838017075e-06, "loss": 1.2373, "step": 2916 }, { "epoch": 0.86, "learning_rate": 4.278109224497486e-06, "loss": 1.2899, "step": 2917 }, { "epoch": 0.86, "learning_rate": 4.261016742049155e-06, "loss": 1.2266, "step": 2918 }, { "epoch": 0.86, "learning_rate": 4.2439565519007916e-06, "loss": 1.1913, "step": 2919 }, { "epoch": 0.86, "learning_rate": 4.2269286694673e-06, "loss": 1.2056, "step": 2920 }, { "epoch": 0.86, "learning_rate": 4.209933110134392e-06, "loss": 1.2724, "step": 2921 }, { "epoch": 0.86, "learning_rate": 4.192969889258552e-06, "loss": 1.1911, "step": 2922 }, { "epoch": 0.86, "learning_rate": 4.176039022167064e-06, "loss": 1.1573, "step": 2923 }, { "epoch": 0.86, "learning_rate": 4.159140524157983e-06, "loss": 1.261, "step": 2924 }, { "epoch": 0.86, "learning_rate": 4.142274410500111e-06, "loss": 1.244, "step": 2925 }, { "epoch": 0.86, "learning_rate": 4.12544069643297e-06, "loss": 1.2519, "step": 2926 }, { "epoch": 0.86, "learning_rate": 4.108639397166831e-06, "loss": 1.2032, "step": 2927 }, { "epoch": 0.86, "learning_rate": 4.0918705278826685e-06, "loss": 1.2047, "step": 2928 }, { "epoch": 0.86, "learning_rate": 4.075134103732166e-06, "loss": 1.2571, "step": 2929 }, { "epoch": 0.86, "learning_rate": 4.0584301398376615e-06, "loss": 1.2438, "step": 2930 }, { "epoch": 0.86, "learning_rate": 4.041758651292193e-06, "loss": 1.223, "step": 2931 }, { "epoch": 0.86, "learning_rate": 4.025119653159442e-06, "loss": 1.218, "step": 2932 }, { "epoch": 0.86, "learning_rate": 4.008513160473726e-06, "loss": 1.2348, "step": 2933 }, { "epoch": 0.86, "learning_rate": 3.991939188240009e-06, "loss": 1.2058, "step": 2934 }, { "epoch": 0.86, "learning_rate": 3.975397751433856e-06, "loss": 1.2057, "step": 2935 }, { "epoch": 0.86, "learning_rate": 3.958888865001451e-06, "loss": 1.2426, "step": 2936 }, { "epoch": 0.86, "learning_rate": 3.942412543859546e-06, "loss": 1.1983, "step": 2937 }, { "epoch": 0.86, "learning_rate": 3.925968802895477e-06, "loss": 1.1782, "step": 2938 }, { "epoch": 0.86, "learning_rate": 3.909557656967154e-06, "loss": 1.1886, "step": 2939 }, { "epoch": 0.86, "learning_rate": 3.893179120903012e-06, "loss": 1.2433, "step": 2940 }, { "epoch": 0.86, "learning_rate": 3.876833209502051e-06, "loss": 1.1762, "step": 2941 }, { "epoch": 0.86, "learning_rate": 3.860519937533753e-06, "loss": 1.2309, "step": 2942 }, { "epoch": 0.86, "learning_rate": 3.8442393197381456e-06, "loss": 1.1942, "step": 2943 }, { "epoch": 0.86, "learning_rate": 3.827991370825732e-06, "loss": 1.251, "step": 2944 }, { "epoch": 0.86, "learning_rate": 3.8117761054775116e-06, "loss": 1.1705, "step": 2945 }, { "epoch": 0.86, "learning_rate": 3.795593538344928e-06, "loss": 1.2067, "step": 2946 }, { "epoch": 0.86, "learning_rate": 3.7794436840498947e-06, "loss": 1.2723, "step": 2947 }, { "epoch": 0.86, "learning_rate": 3.7633265571847787e-06, "loss": 1.2135, "step": 2948 }, { "epoch": 0.87, "learning_rate": 3.7472421723123662e-06, "loss": 1.2081, "step": 2949 }, { "epoch": 0.87, "learning_rate": 3.731190543965837e-06, "loss": 1.2205, "step": 2950 }, { "epoch": 0.87, "learning_rate": 3.7151716866488065e-06, "loss": 1.2077, "step": 2951 }, { "epoch": 0.87, "learning_rate": 3.69918561483527e-06, "loss": 1.2172, "step": 2952 }, { "epoch": 0.87, "learning_rate": 3.683232342969594e-06, "loss": 1.2355, "step": 2953 }, { "epoch": 0.87, "learning_rate": 3.6673118854664956e-06, "loss": 1.1658, "step": 2954 }, { "epoch": 0.87, "learning_rate": 3.6514242567110738e-06, "loss": 1.2889, "step": 2955 }, { "epoch": 0.87, "learning_rate": 3.6355694710587463e-06, "loss": 1.1549, "step": 2956 }, { "epoch": 0.87, "learning_rate": 3.6197475428352415e-06, "loss": 1.2331, "step": 2957 }, { "epoch": 0.87, "learning_rate": 3.603958486336625e-06, "loss": 1.1841, "step": 2958 }, { "epoch": 0.87, "learning_rate": 3.5882023158292455e-06, "loss": 1.2429, "step": 2959 }, { "epoch": 0.87, "learning_rate": 3.5724790455497504e-06, "loss": 1.2262, "step": 2960 }, { "epoch": 0.87, "learning_rate": 3.5567886897050418e-06, "loss": 1.1866, "step": 2961 }, { "epoch": 0.87, "learning_rate": 3.5411312624722903e-06, "loss": 1.2479, "step": 2962 }, { "epoch": 0.87, "learning_rate": 3.5255067779989173e-06, "loss": 1.1975, "step": 2963 }, { "epoch": 0.87, "learning_rate": 3.50991525040258e-06, "loss": 1.2536, "step": 2964 }, { "epoch": 0.87, "learning_rate": 3.4943566937711438e-06, "loss": 1.1653, "step": 2965 }, { "epoch": 0.87, "learning_rate": 3.4788311221626914e-06, "loss": 1.2561, "step": 2966 }, { "epoch": 0.87, "learning_rate": 3.463338549605508e-06, "loss": 1.1945, "step": 2967 }, { "epoch": 0.87, "learning_rate": 3.4478789900980545e-06, "loss": 1.1836, "step": 2968 }, { "epoch": 0.87, "learning_rate": 3.432452457608957e-06, "loss": 1.23, "step": 2969 }, { "epoch": 0.87, "learning_rate": 3.4170589660770117e-06, "loss": 1.2527, "step": 2970 }, { "epoch": 0.87, "learning_rate": 3.4016985294111503e-06, "loss": 1.2219, "step": 2971 }, { "epoch": 0.87, "learning_rate": 3.386371161490458e-06, "loss": 1.2407, "step": 2972 }, { "epoch": 0.87, "learning_rate": 3.371076876164101e-06, "loss": 1.2047, "step": 2973 }, { "epoch": 0.87, "learning_rate": 3.355815687251389e-06, "loss": 1.2244, "step": 2974 }, { "epoch": 0.87, "learning_rate": 3.340587608541719e-06, "loss": 1.1801, "step": 2975 }, { "epoch": 0.87, "learning_rate": 3.3253926537945634e-06, "loss": 1.2002, "step": 2976 }, { "epoch": 0.87, "learning_rate": 3.3102308367394743e-06, "loss": 1.2499, "step": 2977 }, { "epoch": 0.87, "learning_rate": 3.295102171076039e-06, "loss": 1.1515, "step": 2978 }, { "epoch": 0.87, "learning_rate": 3.2800066704739185e-06, "loss": 1.2028, "step": 2979 }, { "epoch": 0.87, "learning_rate": 3.264944348572807e-06, "loss": 1.2098, "step": 2980 }, { "epoch": 0.87, "learning_rate": 3.2499152189823913e-06, "loss": 1.2218, "step": 2981 }, { "epoch": 0.87, "learning_rate": 3.234919295282395e-06, "loss": 1.1926, "step": 2982 }, { "epoch": 0.88, "learning_rate": 3.2199565910225305e-06, "loss": 1.2407, "step": 2983 }, { "epoch": 0.88, "learning_rate": 3.2050271197224903e-06, "loss": 1.1564, "step": 2984 }, { "epoch": 0.88, "learning_rate": 3.1901308948719368e-06, "loss": 1.2477, "step": 2985 }, { "epoch": 0.88, "learning_rate": 3.1752679299304944e-06, "loss": 1.2311, "step": 2986 }, { "epoch": 0.88, "learning_rate": 3.1604382383277453e-06, "loss": 1.2041, "step": 2987 }, { "epoch": 0.88, "learning_rate": 3.145641833463198e-06, "loss": 1.2169, "step": 2988 }, { "epoch": 0.88, "learning_rate": 3.1308787287062814e-06, "loss": 1.2583, "step": 2989 }, { "epoch": 0.88, "learning_rate": 3.1161489373963395e-06, "loss": 1.2083, "step": 2990 }, { "epoch": 0.88, "learning_rate": 3.101452472842619e-06, "loss": 1.2041, "step": 2991 }, { "epoch": 0.88, "learning_rate": 3.086789348324253e-06, "loss": 1.2512, "step": 2992 }, { "epoch": 0.88, "learning_rate": 3.0721595770902435e-06, "loss": 1.204, "step": 2993 }, { "epoch": 0.88, "learning_rate": 3.0575631723594613e-06, "loss": 1.2294, "step": 2994 }, { "epoch": 0.88, "learning_rate": 3.043000147320623e-06, "loss": 1.1997, "step": 2995 }, { "epoch": 0.88, "learning_rate": 3.0284705151323046e-06, "loss": 1.2148, "step": 2996 }, { "epoch": 0.88, "learning_rate": 3.013974288922876e-06, "loss": 1.2245, "step": 2997 }, { "epoch": 0.88, "learning_rate": 2.9995114817905493e-06, "loss": 1.2108, "step": 2998 }, { "epoch": 0.88, "learning_rate": 2.9850821068033274e-06, "loss": 1.1822, "step": 2999 }, { "epoch": 0.88, "learning_rate": 2.9706861769990225e-06, "loss": 1.24, "step": 3000 }, { "epoch": 0.88, "eval_loss": 0.11790976673364639, "eval_runtime": 24.5107, "eval_samples_per_second": 81.597, "eval_steps_per_second": 0.653, "step": 3000 }, { "epoch": 0.88, "learning_rate": 2.956323705385198e-06, "loss": 1.2293, "step": 3001 }, { "epoch": 0.88, "learning_rate": 2.941994704939215e-06, "loss": 1.2306, "step": 3002 }, { "epoch": 0.88, "learning_rate": 2.927699188608171e-06, "loss": 1.2018, "step": 3003 }, { "epoch": 0.88, "learning_rate": 2.9134371693089235e-06, "loss": 1.2241, "step": 3004 }, { "epoch": 0.88, "learning_rate": 2.89920865992805e-06, "loss": 1.2112, "step": 3005 }, { "epoch": 0.88, "learning_rate": 2.885013673321857e-06, "loss": 1.2146, "step": 3006 }, { "epoch": 0.88, "learning_rate": 2.8708522223163694e-06, "loss": 1.2694, "step": 3007 }, { "epoch": 0.88, "learning_rate": 2.856724319707289e-06, "loss": 1.2034, "step": 3008 }, { "epoch": 0.88, "learning_rate": 2.8426299782600186e-06, "loss": 1.1902, "step": 3009 }, { "epoch": 0.88, "learning_rate": 2.828569210709642e-06, "loss": 1.2194, "step": 3010 }, { "epoch": 0.88, "learning_rate": 2.8145420297609026e-06, "loss": 1.25, "step": 3011 }, { "epoch": 0.88, "learning_rate": 2.800548448088183e-06, "loss": 1.2186, "step": 3012 }, { "epoch": 0.88, "learning_rate": 2.7865884783355234e-06, "loss": 1.1936, "step": 3013 }, { "epoch": 0.88, "learning_rate": 2.77266213311659e-06, "loss": 1.1917, "step": 3014 }, { "epoch": 0.88, "learning_rate": 2.75876942501466e-06, "loss": 1.2397, "step": 3015 }, { "epoch": 0.88, "learning_rate": 2.7449103665826336e-06, "loss": 1.2389, "step": 3016 }, { "epoch": 0.89, "learning_rate": 2.7310849703429788e-06, "loss": 1.2235, "step": 3017 }, { "epoch": 0.89, "learning_rate": 2.717293248787769e-06, "loss": 1.1986, "step": 3018 }, { "epoch": 0.89, "learning_rate": 2.7035352143786543e-06, "loss": 1.2184, "step": 3019 }, { "epoch": 0.89, "learning_rate": 2.6898108795468413e-06, "loss": 1.2179, "step": 3020 }, { "epoch": 0.89, "learning_rate": 2.676120256693069e-06, "loss": 1.2383, "step": 3021 }, { "epoch": 0.89, "learning_rate": 2.662463358187637e-06, "loss": 1.2561, "step": 3022 }, { "epoch": 0.89, "learning_rate": 2.648840196370368e-06, "loss": 1.2288, "step": 3023 }, { "epoch": 0.89, "learning_rate": 2.635250783550607e-06, "loss": 1.2262, "step": 3024 }, { "epoch": 0.89, "learning_rate": 2.621695132007185e-06, "loss": 1.1919, "step": 3025 }, { "epoch": 0.89, "learning_rate": 2.6081732539884465e-06, "loss": 1.2442, "step": 3026 }, { "epoch": 0.89, "learning_rate": 2.594685161712214e-06, "loss": 1.1955, "step": 3027 }, { "epoch": 0.89, "learning_rate": 2.5812308673657825e-06, "loss": 1.2391, "step": 3028 }, { "epoch": 0.89, "learning_rate": 2.5678103831059044e-06, "loss": 1.1847, "step": 3029 }, { "epoch": 0.89, "learning_rate": 2.5544237210587896e-06, "loss": 1.2489, "step": 3030 }, { "epoch": 0.89, "learning_rate": 2.5410708933200924e-06, "loss": 1.2014, "step": 3031 }, { "epoch": 0.89, "learning_rate": 2.527751911954872e-06, "loss": 1.2042, "step": 3032 }, { "epoch": 0.89, "learning_rate": 2.5144667889976316e-06, "loss": 1.2115, "step": 3033 }, { "epoch": 0.89, "learning_rate": 2.501215536452266e-06, "loss": 1.252, "step": 3034 }, { "epoch": 0.89, "learning_rate": 2.487998166292083e-06, "loss": 1.2272, "step": 3035 }, { "epoch": 0.89, "learning_rate": 2.474814690459746e-06, "loss": 1.2268, "step": 3036 }, { "epoch": 0.89, "learning_rate": 2.4616651208673183e-06, "loss": 1.2386, "step": 3037 }, { "epoch": 0.89, "learning_rate": 2.4485494693962197e-06, "loss": 1.165, "step": 3038 }, { "epoch": 0.89, "learning_rate": 2.4354677478972334e-06, "loss": 1.2183, "step": 3039 }, { "epoch": 0.89, "learning_rate": 2.422419968190455e-06, "loss": 1.2144, "step": 3040 }, { "epoch": 0.89, "learning_rate": 2.4094061420653382e-06, "loss": 1.2537, "step": 3041 }, { "epoch": 0.89, "learning_rate": 2.3964262812806504e-06, "loss": 1.166, "step": 3042 }, { "epoch": 0.89, "learning_rate": 2.3834803975644772e-06, "loss": 1.2067, "step": 3043 }, { "epoch": 0.89, "learning_rate": 2.370568502614181e-06, "loss": 1.2219, "step": 3044 }, { "epoch": 0.89, "learning_rate": 2.3576906080964344e-06, "loss": 1.2361, "step": 3045 }, { "epoch": 0.89, "learning_rate": 2.344846725647183e-06, "loss": 1.2276, "step": 3046 }, { "epoch": 0.89, "learning_rate": 2.332036866871645e-06, "loss": 1.2302, "step": 3047 }, { "epoch": 0.89, "learning_rate": 2.3192610433442784e-06, "loss": 1.1819, "step": 3048 }, { "epoch": 0.89, "learning_rate": 2.306519266608809e-06, "loss": 1.266, "step": 3049 }, { "epoch": 0.89, "learning_rate": 2.2938115481781864e-06, "loss": 1.1758, "step": 3050 }, { "epoch": 0.9, "learning_rate": 2.2811378995346e-06, "loss": 1.2106, "step": 3051 }, { "epoch": 0.9, "learning_rate": 2.268498332129441e-06, "loss": 1.2416, "step": 3052 }, { "epoch": 0.9, "learning_rate": 2.2558928573833016e-06, "loss": 1.2043, "step": 3053 }, { "epoch": 0.9, "learning_rate": 2.2433214866859874e-06, "loss": 1.2419, "step": 3054 }, { "epoch": 0.9, "learning_rate": 2.2307842313964876e-06, "loss": 1.2153, "step": 3055 }, { "epoch": 0.9, "learning_rate": 2.2182811028429497e-06, "loss": 1.2821, "step": 3056 }, { "epoch": 0.9, "learning_rate": 2.2058121123226916e-06, "loss": 1.2103, "step": 3057 }, { "epoch": 0.9, "learning_rate": 2.1933772711021995e-06, "loss": 1.2128, "step": 3058 }, { "epoch": 0.9, "learning_rate": 2.1809765904170942e-06, "loss": 1.182, "step": 3059 }, { "epoch": 0.9, "learning_rate": 2.1686100814721157e-06, "loss": 1.254, "step": 3060 }, { "epoch": 0.9, "learning_rate": 2.1562777554411474e-06, "loss": 1.2182, "step": 3061 }, { "epoch": 0.9, "learning_rate": 2.1439796234671874e-06, "loss": 1.2066, "step": 3062 }, { "epoch": 0.9, "learning_rate": 2.1317156966623243e-06, "loss": 1.1914, "step": 3063 }, { "epoch": 0.9, "learning_rate": 2.1194859861077477e-06, "loss": 1.2006, "step": 3064 }, { "epoch": 0.9, "learning_rate": 2.107290502853725e-06, "loss": 1.2594, "step": 3065 }, { "epoch": 0.9, "learning_rate": 2.0951292579196057e-06, "loss": 1.2084, "step": 3066 }, { "epoch": 0.9, "learning_rate": 2.083002262293805e-06, "loss": 1.2186, "step": 3067 }, { "epoch": 0.9, "learning_rate": 2.0709095269337755e-06, "loss": 1.2215, "step": 3068 }, { "epoch": 0.9, "learning_rate": 2.0588510627660253e-06, "loss": 1.214, "step": 3069 }, { "epoch": 0.9, "learning_rate": 2.0468268806861015e-06, "loss": 1.1584, "step": 3070 }, { "epoch": 0.9, "learning_rate": 2.0348369915585666e-06, "loss": 1.2639, "step": 3071 }, { "epoch": 0.9, "learning_rate": 2.0228814062169944e-06, "loss": 1.1341, "step": 3072 }, { "epoch": 0.9, "learning_rate": 2.0109601354639706e-06, "loss": 1.2081, "step": 3073 }, { "epoch": 0.9, "learning_rate": 1.9990731900710745e-06, "loss": 1.2038, "step": 3074 }, { "epoch": 0.9, "learning_rate": 1.9872205807788747e-06, "loss": 1.2376, "step": 3075 }, { "epoch": 0.9, "learning_rate": 1.975402318296902e-06, "loss": 1.1899, "step": 3076 }, { "epoch": 0.9, "learning_rate": 1.9636184133036583e-06, "loss": 1.1916, "step": 3077 }, { "epoch": 0.9, "learning_rate": 1.9518688764466096e-06, "loss": 1.2418, "step": 3078 }, { "epoch": 0.9, "learning_rate": 1.940153718342166e-06, "loss": 1.2071, "step": 3079 }, { "epoch": 0.9, "learning_rate": 1.9284729495756595e-06, "loss": 1.2302, "step": 3080 }, { "epoch": 0.9, "learning_rate": 1.916826580701363e-06, "loss": 1.2173, "step": 3081 }, { "epoch": 0.9, "learning_rate": 1.9052146222424772e-06, "loss": 1.2947, "step": 3082 }, { "epoch": 0.9, "learning_rate": 1.8936370846910845e-06, "loss": 1.2064, "step": 3083 }, { "epoch": 0.9, "learning_rate": 1.8820939785081771e-06, "loss": 1.2323, "step": 3084 }, { "epoch": 0.91, "learning_rate": 1.8705853141236518e-06, "loss": 1.2466, "step": 3085 }, { "epoch": 0.91, "learning_rate": 1.8591111019362662e-06, "loss": 1.2662, "step": 3086 }, { "epoch": 0.91, "learning_rate": 1.8476713523136514e-06, "loss": 1.2127, "step": 3087 }, { "epoch": 0.91, "learning_rate": 1.8362660755923079e-06, "loss": 1.2152, "step": 3088 }, { "epoch": 0.91, "learning_rate": 1.824895282077579e-06, "loss": 1.1888, "step": 3089 }, { "epoch": 0.91, "learning_rate": 1.813558982043655e-06, "loss": 1.2449, "step": 3090 }, { "epoch": 0.91, "learning_rate": 1.8022571857335603e-06, "loss": 1.1833, "step": 3091 }, { "epoch": 0.91, "learning_rate": 1.790989903359135e-06, "loss": 1.1922, "step": 3092 }, { "epoch": 0.91, "learning_rate": 1.7797571451010442e-06, "loss": 1.2021, "step": 3093 }, { "epoch": 0.91, "learning_rate": 1.7685589211087517e-06, "loss": 1.2674, "step": 3094 }, { "epoch": 0.91, "learning_rate": 1.7573952415005235e-06, "loss": 1.1923, "step": 3095 }, { "epoch": 0.91, "learning_rate": 1.7462661163634064e-06, "loss": 1.1775, "step": 3096 }, { "epoch": 0.91, "learning_rate": 1.7351715557532234e-06, "loss": 1.2562, "step": 3097 }, { "epoch": 0.91, "learning_rate": 1.7241115696945821e-06, "loss": 1.2338, "step": 3098 }, { "epoch": 0.91, "learning_rate": 1.713086168180831e-06, "loss": 1.2058, "step": 3099 }, { "epoch": 0.91, "learning_rate": 1.7020953611740765e-06, "loss": 1.2243, "step": 3100 }, { "epoch": 0.91, "eval_loss": 0.11909307539463043, "eval_runtime": 25.2086, "eval_samples_per_second": 79.338, "eval_steps_per_second": 0.635, "step": 3100 }, { "epoch": 0.91, "learning_rate": 1.6911391586051707e-06, "loss": 1.2174, "step": 3101 }, { "epoch": 0.91, "learning_rate": 1.6802175703736923e-06, "loss": 1.1877, "step": 3102 }, { "epoch": 0.91, "learning_rate": 1.669330606347952e-06, "loss": 1.159, "step": 3103 }, { "epoch": 0.91, "learning_rate": 1.658478276364961e-06, "loss": 1.2408, "step": 3104 }, { "epoch": 0.91, "learning_rate": 1.6476605902304532e-06, "loss": 1.2698, "step": 3105 }, { "epoch": 0.91, "learning_rate": 1.6368775577188546e-06, "loss": 1.2128, "step": 3106 }, { "epoch": 0.91, "learning_rate": 1.6261291885732645e-06, "loss": 1.2157, "step": 3107 }, { "epoch": 0.91, "learning_rate": 1.6154154925054878e-06, "loss": 1.2102, "step": 3108 }, { "epoch": 0.91, "learning_rate": 1.6047364791959763e-06, "loss": 1.2071, "step": 3109 }, { "epoch": 0.91, "learning_rate": 1.5940921582938694e-06, "loss": 1.1873, "step": 3110 }, { "epoch": 0.91, "learning_rate": 1.5834825394169317e-06, "loss": 1.2295, "step": 3111 }, { "epoch": 0.91, "learning_rate": 1.572907632151588e-06, "loss": 1.2481, "step": 3112 }, { "epoch": 0.91, "learning_rate": 1.5623674460529014e-06, "loss": 1.2163, "step": 3113 }, { "epoch": 0.91, "learning_rate": 1.5518619906445564e-06, "loss": 1.2, "step": 3114 }, { "epoch": 0.91, "learning_rate": 1.5413912754188531e-06, "loss": 1.2084, "step": 3115 }, { "epoch": 0.91, "learning_rate": 1.5309553098367125e-06, "loss": 1.2058, "step": 3116 }, { "epoch": 0.91, "learning_rate": 1.520554103327645e-06, "loss": 1.2264, "step": 3117 }, { "epoch": 0.91, "learning_rate": 1.510187665289773e-06, "loss": 1.2161, "step": 3118 }, { "epoch": 0.92, "learning_rate": 1.4998560050897726e-06, "loss": 1.2033, "step": 3119 }, { "epoch": 0.92, "learning_rate": 1.4895591320629232e-06, "loss": 1.2176, "step": 3120 }, { "epoch": 0.92, "learning_rate": 1.4792970555130627e-06, "loss": 1.2372, "step": 3121 }, { "epoch": 0.92, "learning_rate": 1.4690697847125956e-06, "loss": 1.1984, "step": 3122 }, { "epoch": 0.92, "learning_rate": 1.458877328902455e-06, "loss": 1.1808, "step": 3123 }, { "epoch": 0.92, "learning_rate": 1.4487196972921402e-06, "loss": 1.2559, "step": 3124 }, { "epoch": 0.92, "learning_rate": 1.4385968990596743e-06, "loss": 1.1869, "step": 3125 }, { "epoch": 0.92, "learning_rate": 1.4285089433516164e-06, "loss": 1.2198, "step": 3126 }, { "epoch": 0.92, "learning_rate": 1.4184558392830306e-06, "loss": 1.2406, "step": 3127 }, { "epoch": 0.92, "learning_rate": 1.4084375959374864e-06, "loss": 1.2032, "step": 3128 }, { "epoch": 0.92, "learning_rate": 1.3984542223670717e-06, "loss": 1.2094, "step": 3129 }, { "epoch": 0.92, "learning_rate": 1.388505727592362e-06, "loss": 1.2095, "step": 3130 }, { "epoch": 0.92, "learning_rate": 1.3785921206024022e-06, "loss": 1.262, "step": 3131 }, { "epoch": 0.92, "learning_rate": 1.3687134103547383e-06, "loss": 1.2094, "step": 3132 }, { "epoch": 0.92, "learning_rate": 1.3588696057753725e-06, "loss": 1.2179, "step": 3133 }, { "epoch": 0.92, "learning_rate": 1.3490607157587677e-06, "loss": 1.1915, "step": 3134 }, { "epoch": 0.92, "learning_rate": 1.339286749167834e-06, "loss": 1.286, "step": 3135 }, { "epoch": 0.92, "learning_rate": 1.3295477148339387e-06, "loss": 1.211, "step": 3136 }, { "epoch": 0.92, "learning_rate": 1.3198436215568778e-06, "loss": 1.2351, "step": 3137 }, { "epoch": 0.92, "learning_rate": 1.3101744781048774e-06, "loss": 1.2168, "step": 3138 }, { "epoch": 0.92, "learning_rate": 1.300540293214585e-06, "loss": 1.2357, "step": 3139 }, { "epoch": 0.92, "learning_rate": 1.290941075591059e-06, "loss": 1.215, "step": 3140 }, { "epoch": 0.92, "learning_rate": 1.281376833907766e-06, "loss": 1.185, "step": 3141 }, { "epoch": 0.92, "learning_rate": 1.2718475768065709e-06, "loss": 1.265, "step": 3142 }, { "epoch": 0.92, "learning_rate": 1.262353312897715e-06, "loss": 1.1601, "step": 3143 }, { "epoch": 0.92, "learning_rate": 1.2528940507598387e-06, "loss": 1.1799, "step": 3144 }, { "epoch": 0.92, "learning_rate": 1.243469798939949e-06, "loss": 1.1758, "step": 3145 }, { "epoch": 0.92, "learning_rate": 1.2340805659534216e-06, "loss": 1.232, "step": 3146 }, { "epoch": 0.92, "learning_rate": 1.224726360283981e-06, "loss": 1.2176, "step": 3147 }, { "epoch": 0.92, "learning_rate": 1.2154071903837107e-06, "loss": 1.2218, "step": 3148 }, { "epoch": 0.92, "learning_rate": 1.2061230646730348e-06, "loss": 1.197, "step": 3149 }, { "epoch": 0.92, "learning_rate": 1.196873991540719e-06, "loss": 1.2303, "step": 3150 }, { "epoch": 0.92, "learning_rate": 1.187659979343847e-06, "loss": 1.2106, "step": 3151 }, { "epoch": 0.92, "learning_rate": 1.1784810364078303e-06, "loss": 1.1829, "step": 3152 }, { "epoch": 0.93, "learning_rate": 1.1693371710263857e-06, "loss": 1.1985, "step": 3153 }, { "epoch": 0.93, "learning_rate": 1.1602283914615486e-06, "loss": 1.2391, "step": 3154 }, { "epoch": 0.93, "learning_rate": 1.151154705943638e-06, "loss": 1.2085, "step": 3155 }, { "epoch": 0.93, "learning_rate": 1.1421161226712684e-06, "loss": 1.1942, "step": 3156 }, { "epoch": 0.93, "learning_rate": 1.1331126498113433e-06, "loss": 1.261, "step": 3157 }, { "epoch": 0.93, "learning_rate": 1.1241442954990301e-06, "loss": 1.1779, "step": 3158 }, { "epoch": 0.93, "learning_rate": 1.1152110678377759e-06, "loss": 1.2233, "step": 3159 }, { "epoch": 0.93, "learning_rate": 1.1063129748992796e-06, "loss": 1.2103, "step": 3160 }, { "epoch": 0.93, "learning_rate": 1.0974500247235053e-06, "loss": 1.2403, "step": 3161 }, { "epoch": 0.93, "learning_rate": 1.0886222253186473e-06, "loss": 1.2104, "step": 3162 }, { "epoch": 0.93, "learning_rate": 1.0798295846611562e-06, "loss": 1.1962, "step": 3163 }, { "epoch": 0.93, "learning_rate": 1.0710721106957034e-06, "loss": 1.2059, "step": 3164 }, { "epoch": 0.93, "learning_rate": 1.062349811335186e-06, "loss": 1.2184, "step": 3165 }, { "epoch": 0.93, "learning_rate": 1.0536626944607264e-06, "loss": 1.2092, "step": 3166 }, { "epoch": 0.93, "learning_rate": 1.045010767921646e-06, "loss": 1.1916, "step": 3167 }, { "epoch": 0.93, "learning_rate": 1.0363940395354777e-06, "loss": 1.2092, "step": 3168 }, { "epoch": 0.93, "learning_rate": 1.0278125170879495e-06, "loss": 1.233, "step": 3169 }, { "epoch": 0.93, "learning_rate": 1.019266208332983e-06, "loss": 1.2049, "step": 3170 }, { "epoch": 0.93, "learning_rate": 1.0107551209926635e-06, "loss": 1.2276, "step": 3171 }, { "epoch": 0.93, "learning_rate": 1.0022792627572797e-06, "loss": 1.2149, "step": 3172 }, { "epoch": 0.93, "learning_rate": 9.938386412852652e-07, "loss": 1.2075, "step": 3173 }, { "epoch": 0.93, "learning_rate": 9.854332642032348e-07, "loss": 1.1898, "step": 3174 }, { "epoch": 0.93, "learning_rate": 9.770631391059316e-07, "loss": 1.2432, "step": 3175 }, { "epoch": 0.93, "learning_rate": 9.687282735562742e-07, "loss": 1.2802, "step": 3176 }, { "epoch": 0.93, "learning_rate": 9.604286750853054e-07, "loss": 1.1746, "step": 3177 }, { "epoch": 0.93, "learning_rate": 9.52164351192213e-07, "loss": 1.2301, "step": 3178 }, { "epoch": 0.93, "learning_rate": 9.439353093442949e-07, "loss": 1.1661, "step": 3179 }, { "epoch": 0.93, "learning_rate": 9.357415569769901e-07, "loss": 1.2088, "step": 3180 }, { "epoch": 0.93, "learning_rate": 9.275831014938385e-07, "loss": 1.184, "step": 3181 }, { "epoch": 0.93, "learning_rate": 9.194599502664858e-07, "loss": 1.23, "step": 3182 }, { "epoch": 0.93, "learning_rate": 9.113721106346918e-07, "loss": 1.1952, "step": 3183 }, { "epoch": 0.93, "learning_rate": 9.033195899062907e-07, "loss": 1.2606, "step": 3184 }, { "epoch": 0.93, "learning_rate": 8.953023953572315e-07, "loss": 1.2358, "step": 3185 }, { "epoch": 0.93, "learning_rate": 8.873205342315061e-07, "loss": 1.2365, "step": 3186 }, { "epoch": 0.94, "learning_rate": 8.793740137412166e-07, "loss": 1.1951, "step": 3187 }, { "epoch": 0.94, "learning_rate": 8.714628410665082e-07, "loss": 1.2474, "step": 3188 }, { "epoch": 0.94, "learning_rate": 8.635870233556054e-07, "loss": 1.1712, "step": 3189 }, { "epoch": 0.94, "learning_rate": 8.557465677247756e-07, "loss": 1.2063, "step": 3190 }, { "epoch": 0.94, "learning_rate": 8.479414812583298e-07, "loss": 1.2672, "step": 3191 }, { "epoch": 0.94, "learning_rate": 8.401717710086354e-07, "loss": 1.2227, "step": 3192 }, { "epoch": 0.94, "learning_rate": 8.324374439960947e-07, "loss": 1.1378, "step": 3193 }, { "epoch": 0.94, "learning_rate": 8.247385072091219e-07, "loss": 1.2223, "step": 3194 }, { "epoch": 0.94, "learning_rate": 8.170749676041656e-07, "loss": 1.2112, "step": 3195 }, { "epoch": 0.94, "learning_rate": 8.094468321056958e-07, "loss": 1.1939, "step": 3196 }, { "epoch": 0.94, "learning_rate": 8.018541076061859e-07, "loss": 1.1832, "step": 3197 }, { "epoch": 0.94, "learning_rate": 7.942968009661123e-07, "loss": 1.2064, "step": 3198 }, { "epoch": 0.94, "learning_rate": 7.867749190139506e-07, "loss": 1.2711, "step": 3199 }, { "epoch": 0.94, "learning_rate": 7.792884685461754e-07, "loss": 1.2301, "step": 3200 }, { "epoch": 0.94, "eval_loss": 0.11856154352426529, "eval_runtime": 24.8968, "eval_samples_per_second": 80.332, "eval_steps_per_second": 0.643, "step": 3200 }, { "epoch": 0.94, "learning_rate": 7.718374563272379e-07, "loss": 1.2119, "step": 3201 }, { "epoch": 0.94, "learning_rate": 7.644218890895705e-07, "loss": 1.2066, "step": 3202 }, { "epoch": 0.94, "learning_rate": 7.570417735335733e-07, "loss": 1.2263, "step": 3203 }, { "epoch": 0.94, "learning_rate": 7.496971163276324e-07, "loss": 1.2003, "step": 3204 }, { "epoch": 0.94, "learning_rate": 7.423879241080745e-07, "loss": 1.2088, "step": 3205 }, { "epoch": 0.94, "learning_rate": 7.351142034791947e-07, "loss": 1.2077, "step": 3206 }, { "epoch": 0.94, "learning_rate": 7.278759610132291e-07, "loss": 1.2038, "step": 3207 }, { "epoch": 0.94, "learning_rate": 7.206732032503638e-07, "loss": 1.1972, "step": 3208 }, { "epoch": 0.94, "learning_rate": 7.135059366987173e-07, "loss": 1.1978, "step": 3209 }, { "epoch": 0.94, "learning_rate": 7.063741678343449e-07, "loss": 1.3071, "step": 3210 }, { "epoch": 0.94, "learning_rate": 6.992779031012209e-07, "loss": 1.2284, "step": 3211 }, { "epoch": 0.94, "learning_rate": 6.922171489112428e-07, "loss": 1.2087, "step": 3212 }, { "epoch": 0.94, "learning_rate": 6.851919116442274e-07, "loss": 1.2022, "step": 3213 }, { "epoch": 0.94, "learning_rate": 6.782021976478925e-07, "loss": 1.2347, "step": 3214 }, { "epoch": 0.94, "learning_rate": 6.712480132378574e-07, "loss": 1.2221, "step": 3215 }, { "epoch": 0.94, "learning_rate": 6.643293646976468e-07, "loss": 1.1756, "step": 3216 }, { "epoch": 0.94, "learning_rate": 6.574462582786733e-07, "loss": 1.2267, "step": 3217 }, { "epoch": 0.94, "learning_rate": 6.505987002002245e-07, "loss": 1.2208, "step": 3218 }, { "epoch": 0.94, "learning_rate": 6.437866966494843e-07, "loss": 1.2152, "step": 3219 }, { "epoch": 0.94, "learning_rate": 6.370102537815026e-07, "loss": 1.1821, "step": 3220 }, { "epoch": 0.94, "learning_rate": 6.302693777191993e-07, "loss": 1.2259, "step": 3221 }, { "epoch": 0.95, "learning_rate": 6.235640745533556e-07, "loss": 1.2004, "step": 3222 }, { "epoch": 0.95, "learning_rate": 6.168943503426139e-07, "loss": 1.2232, "step": 3223 }, { "epoch": 0.95, "learning_rate": 6.102602111134648e-07, "loss": 1.2077, "step": 3224 }, { "epoch": 0.95, "learning_rate": 6.036616628602554e-07, "loss": 1.2323, "step": 3225 }, { "epoch": 0.95, "learning_rate": 5.970987115451587e-07, "loss": 1.2547, "step": 3226 }, { "epoch": 0.95, "learning_rate": 5.905713630981913e-07, "loss": 1.2138, "step": 3227 }, { "epoch": 0.95, "learning_rate": 5.840796234172085e-07, "loss": 1.1924, "step": 3228 }, { "epoch": 0.95, "learning_rate": 5.77623498367883e-07, "loss": 1.2618, "step": 3229 }, { "epoch": 0.95, "learning_rate": 5.712029937837038e-07, "loss": 1.2606, "step": 3230 }, { "epoch": 0.95, "learning_rate": 5.648181154659771e-07, "loss": 1.1964, "step": 3231 }, { "epoch": 0.95, "learning_rate": 5.584688691838258e-07, "loss": 1.1864, "step": 3232 }, { "epoch": 0.95, "learning_rate": 5.521552606741765e-07, "loss": 1.2483, "step": 3233 }, { "epoch": 0.95, "learning_rate": 5.458772956417413e-07, "loss": 1.2191, "step": 3234 }, { "epoch": 0.95, "learning_rate": 5.396349797590405e-07, "loss": 1.2066, "step": 3235 }, { "epoch": 0.95, "learning_rate": 5.334283186663758e-07, "loss": 1.2256, "step": 3236 }, { "epoch": 0.95, "learning_rate": 5.272573179718388e-07, "loss": 1.1937, "step": 3237 }, { "epoch": 0.95, "learning_rate": 5.211219832512893e-07, "loss": 1.1856, "step": 3238 }, { "epoch": 0.95, "learning_rate": 5.150223200483772e-07, "loss": 1.2324, "step": 3239 }, { "epoch": 0.95, "learning_rate": 5.08958333874503e-07, "loss": 1.1985, "step": 3240 }, { "epoch": 0.95, "learning_rate": 5.029300302088436e-07, "loss": 1.1575, "step": 3241 }, { "epoch": 0.95, "learning_rate": 4.969374144983308e-07, "loss": 1.2206, "step": 3242 }, { "epoch": 0.95, "learning_rate": 4.909804921576466e-07, "loss": 1.2243, "step": 3243 }, { "epoch": 0.95, "learning_rate": 4.850592685692279e-07, "loss": 1.2707, "step": 3244 }, { "epoch": 0.95, "learning_rate": 4.791737490832526e-07, "loss": 1.2191, "step": 3245 }, { "epoch": 0.95, "learning_rate": 4.7332393901763586e-07, "loss": 1.2254, "step": 3246 }, { "epoch": 0.95, "learning_rate": 4.6750984365803385e-07, "loss": 1.2153, "step": 3247 }, { "epoch": 0.95, "learning_rate": 4.6173146825782224e-07, "loss": 1.2357, "step": 3248 }, { "epoch": 0.95, "learning_rate": 4.5598881803811336e-07, "loss": 1.2064, "step": 3249 }, { "epoch": 0.95, "learning_rate": 4.5028189818772996e-07, "loss": 1.1862, "step": 3250 }, { "epoch": 0.95, "learning_rate": 4.4461071386321387e-07, "loss": 1.26, "step": 3251 }, { "epoch": 0.95, "learning_rate": 4.389752701888173e-07, "loss": 1.2199, "step": 3252 }, { "epoch": 0.95, "learning_rate": 4.3337557225650695e-07, "loss": 1.1857, "step": 3253 }, { "epoch": 0.95, "learning_rate": 4.27811625125929e-07, "loss": 1.1982, "step": 3254 }, { "epoch": 0.95, "learning_rate": 4.2228343382445746e-07, "loss": 1.2319, "step": 3255 }, { "epoch": 0.96, "learning_rate": 4.1679100334712787e-07, "loss": 1.2478, "step": 3256 }, { "epoch": 0.96, "learning_rate": 4.113343386566904e-07, "loss": 1.2289, "step": 3257 }, { "epoch": 0.96, "learning_rate": 4.0591344468355666e-07, "loss": 1.2184, "step": 3258 }, { "epoch": 0.96, "learning_rate": 4.005283263258353e-07, "loss": 1.2183, "step": 3259 }, { "epoch": 0.96, "learning_rate": 3.9517898844929624e-07, "loss": 1.2018, "step": 3260 }, { "epoch": 0.96, "learning_rate": 3.8986543588738437e-07, "loss": 1.2008, "step": 3261 }, { "epoch": 0.96, "learning_rate": 3.845876734412146e-07, "loss": 1.1776, "step": 3262 }, { "epoch": 0.96, "learning_rate": 3.793457058795591e-07, "loss": 1.2697, "step": 3263 }, { "epoch": 0.96, "learning_rate": 3.741395379388468e-07, "loss": 1.1833, "step": 3264 }, { "epoch": 0.96, "learning_rate": 3.689691743231594e-07, "loss": 1.1906, "step": 3265 }, { "epoch": 0.96, "learning_rate": 3.638346197042264e-07, "loss": 1.1974, "step": 3266 }, { "epoch": 0.96, "learning_rate": 3.587358787214301e-07, "loss": 1.2361, "step": 3267 }, { "epoch": 0.96, "learning_rate": 3.5367295598178307e-07, "loss": 1.2688, "step": 3268 }, { "epoch": 0.96, "learning_rate": 3.4864585605993705e-07, "loss": 1.1855, "step": 3269 }, { "epoch": 0.96, "learning_rate": 3.4365458349817415e-07, "loss": 1.2575, "step": 3270 }, { "epoch": 0.96, "learning_rate": 3.3869914280641125e-07, "loss": 1.1622, "step": 3271 }, { "epoch": 0.96, "learning_rate": 3.337795384621867e-07, "loss": 1.1752, "step": 3272 }, { "epoch": 0.96, "learning_rate": 3.2889577491064693e-07, "loss": 1.2039, "step": 3273 }, { "epoch": 0.96, "learning_rate": 3.240478565645644e-07, "loss": 1.2238, "step": 3274 }, { "epoch": 0.96, "learning_rate": 3.192357878043284e-07, "loss": 1.181, "step": 3275 }, { "epoch": 0.96, "learning_rate": 3.144595729779276e-07, "loss": 1.2328, "step": 3276 }, { "epoch": 0.96, "learning_rate": 3.0971921640095883e-07, "loss": 1.207, "step": 3277 }, { "epoch": 0.96, "learning_rate": 3.050147223566091e-07, "loss": 1.2321, "step": 3278 }, { "epoch": 0.96, "learning_rate": 3.003460950956694e-07, "loss": 1.1912, "step": 3279 }, { "epoch": 0.96, "learning_rate": 2.9571333883652966e-07, "loss": 1.2046, "step": 3280 }, { "epoch": 0.96, "learning_rate": 2.911164577651615e-07, "loss": 1.261, "step": 3281 }, { "epoch": 0.96, "learning_rate": 2.8655545603511357e-07, "loss": 1.1855, "step": 3282 }, { "epoch": 0.96, "learning_rate": 2.8203033776752487e-07, "loss": 1.2228, "step": 3283 }, { "epoch": 0.96, "learning_rate": 2.7754110705111137e-07, "loss": 1.2589, "step": 3284 }, { "epoch": 0.96, "learning_rate": 2.730877679421573e-07, "loss": 1.2224, "step": 3285 }, { "epoch": 0.96, "learning_rate": 2.686703244645239e-07, "loss": 1.1829, "step": 3286 }, { "epoch": 0.96, "learning_rate": 2.642887806096317e-07, "loss": 1.2241, "step": 3287 }, { "epoch": 0.96, "learning_rate": 2.599431403364694e-07, "loss": 1.2519, "step": 3288 }, { "epoch": 0.96, "learning_rate": 2.556334075715805e-07, "loss": 1.2351, "step": 3289 }, { "epoch": 0.97, "learning_rate": 2.51359586209059e-07, "loss": 1.2314, "step": 3290 }, { "epoch": 0.97, "learning_rate": 2.4712168011056693e-07, "loss": 1.1899, "step": 3291 }, { "epoch": 0.97, "learning_rate": 2.4291969310529907e-07, "loss": 1.1722, "step": 3292 }, { "epoch": 0.97, "learning_rate": 2.3875362899000054e-07, "loss": 1.2295, "step": 3293 }, { "epoch": 0.97, "learning_rate": 2.3462349152896247e-07, "loss": 1.197, "step": 3294 }, { "epoch": 0.97, "learning_rate": 2.3052928445400858e-07, "loss": 1.1559, "step": 3295 }, { "epoch": 0.97, "learning_rate": 2.2647101146449524e-07, "loss": 1.2152, "step": 3296 }, { "epoch": 0.97, "learning_rate": 2.2244867622731593e-07, "loss": 1.2135, "step": 3297 }, { "epoch": 0.97, "learning_rate": 2.1846228237689226e-07, "loss": 1.2398, "step": 3298 }, { "epoch": 0.97, "learning_rate": 2.1451183351516969e-07, "loss": 1.2219, "step": 3299 }, { "epoch": 0.97, "learning_rate": 2.1059733321160848e-07, "loss": 1.2838, "step": 3300 }, { "epoch": 0.97, "eval_loss": 0.11866208910942078, "eval_runtime": 24.6429, "eval_samples_per_second": 81.159, "eval_steps_per_second": 0.649, "step": 3300 }, { "epoch": 0.97, "learning_rate": 2.0671878500319707e-07, "loss": 1.2131, "step": 3301 }, { "epoch": 0.97, "learning_rate": 2.0287619239443003e-07, "loss": 1.2107, "step": 3302 }, { "epoch": 0.97, "learning_rate": 1.9906955885732104e-07, "loss": 1.2182, "step": 3303 }, { "epoch": 0.97, "learning_rate": 1.9529888783139883e-07, "loss": 1.2384, "step": 3304 }, { "epoch": 0.97, "learning_rate": 1.9156418272367584e-07, "loss": 1.248, "step": 3305 }, { "epoch": 0.97, "learning_rate": 1.878654469086838e-07, "loss": 1.1917, "step": 3306 }, { "epoch": 0.97, "learning_rate": 1.8420268372845608e-07, "loss": 1.1746, "step": 3307 }, { "epoch": 0.97, "learning_rate": 1.8057589649251862e-07, "loss": 1.2346, "step": 3308 }, { "epoch": 0.97, "learning_rate": 1.7698508847788121e-07, "loss": 1.2001, "step": 3309 }, { "epoch": 0.97, "learning_rate": 1.7343026292905518e-07, "loss": 1.2314, "step": 3310 }, { "epoch": 0.97, "learning_rate": 1.6991142305804454e-07, "loss": 1.1919, "step": 3311 }, { "epoch": 0.97, "learning_rate": 1.6642857204431928e-07, "loss": 1.1796, "step": 3312 }, { "epoch": 0.97, "learning_rate": 1.62981713034851e-07, "loss": 1.228, "step": 3313 }, { "epoch": 0.97, "learning_rate": 1.595708491440817e-07, "loss": 1.1774, "step": 3314 }, { "epoch": 0.97, "learning_rate": 1.561959834539284e-07, "loss": 1.2435, "step": 3315 }, { "epoch": 0.97, "learning_rate": 1.5285711901378285e-07, "loss": 1.1853, "step": 3316 }, { "epoch": 0.97, "learning_rate": 1.4955425884050746e-07, "loss": 1.2228, "step": 3317 }, { "epoch": 0.97, "learning_rate": 1.462874059184305e-07, "loss": 1.2004, "step": 3318 }, { "epoch": 0.97, "learning_rate": 1.4305656319935522e-07, "loss": 1.2149, "step": 3319 }, { "epoch": 0.97, "learning_rate": 1.3986173360253762e-07, "loss": 1.2187, "step": 3320 }, { "epoch": 0.97, "learning_rate": 1.367029200146952e-07, "loss": 1.2098, "step": 3321 }, { "epoch": 0.97, "learning_rate": 1.3358012529000263e-07, "loss": 1.2093, "step": 3322 }, { "epoch": 0.97, "learning_rate": 1.3049335225009175e-07, "loss": 1.2551, "step": 3323 }, { "epoch": 0.98, "learning_rate": 1.2744260368404703e-07, "loss": 1.217, "step": 3324 }, { "epoch": 0.98, "learning_rate": 1.2442788234840575e-07, "loss": 1.2462, "step": 3325 }, { "epoch": 0.98, "learning_rate": 1.2144919096714004e-07, "loss": 1.2635, "step": 3326 }, { "epoch": 0.98, "learning_rate": 1.1850653223167918e-07, "loss": 1.1902, "step": 3327 }, { "epoch": 0.98, "learning_rate": 1.1559990880089189e-07, "loss": 1.1875, "step": 3328 }, { "epoch": 0.98, "learning_rate": 1.1272932330108178e-07, "loss": 1.2146, "step": 3329 }, { "epoch": 0.98, "learning_rate": 1.0989477832599627e-07, "loss": 1.2177, "step": 3330 }, { "epoch": 0.98, "learning_rate": 1.070962764368133e-07, "loss": 1.2562, "step": 3331 }, { "epoch": 0.98, "learning_rate": 1.0433382016215466e-07, "loss": 1.2039, "step": 3332 }, { "epoch": 0.98, "learning_rate": 1.0160741199805036e-07, "loss": 1.2125, "step": 3333 }, { "epoch": 0.98, "learning_rate": 9.891705440798316e-08, "loss": 1.221, "step": 3334 }, { "epoch": 0.98, "learning_rate": 9.626274982284855e-08, "loss": 1.205, "step": 3335 }, { "epoch": 0.98, "learning_rate": 9.364450064096364e-08, "loss": 1.197, "step": 3336 }, { "epoch": 0.98, "learning_rate": 9.106230922808046e-08, "loss": 1.212, "step": 3337 }, { "epoch": 0.98, "learning_rate": 8.851617791735933e-08, "loss": 1.2638, "step": 3338 }, { "epoch": 0.98, "learning_rate": 8.600610900938222e-08, "loss": 1.2388, "step": 3339 }, { "epoch": 0.98, "learning_rate": 8.353210477214379e-08, "loss": 1.2256, "step": 3340 }, { "epoch": 0.98, "learning_rate": 8.109416744105148e-08, "loss": 1.2323, "step": 3341 }, { "epoch": 0.98, "learning_rate": 7.869229921893429e-08, "loss": 1.2112, "step": 3342 }, { "epoch": 0.98, "learning_rate": 7.632650227602511e-08, "loss": 1.1989, "step": 3343 }, { "epoch": 0.98, "learning_rate": 7.399677874995181e-08, "loss": 1.2037, "step": 3344 }, { "epoch": 0.98, "learning_rate": 7.170313074576384e-08, "loss": 1.2638, "step": 3345 }, { "epoch": 0.98, "learning_rate": 6.944556033590566e-08, "loss": 1.2455, "step": 3346 }, { "epoch": 0.98, "learning_rate": 6.722406956023441e-08, "loss": 1.1469, "step": 3347 }, { "epoch": 0.98, "learning_rate": 6.503866042599338e-08, "loss": 1.1828, "step": 3348 }, { "epoch": 0.98, "learning_rate": 6.288933490782967e-08, "loss": 1.2341, "step": 3349 }, { "epoch": 0.98, "learning_rate": 6.07760949477898e-08, "loss": 1.2226, "step": 3350 }, { "epoch": 0.98, "learning_rate": 5.8698942455306384e-08, "loss": 1.2107, "step": 3351 }, { "epoch": 0.98, "learning_rate": 5.6657879307220334e-08, "loss": 1.1971, "step": 3352 }, { "epoch": 0.98, "learning_rate": 5.4652907347745307e-08, "loss": 1.2389, "step": 3353 }, { "epoch": 0.98, "learning_rate": 5.26840283884944e-08, "loss": 1.2183, "step": 3354 }, { "epoch": 0.98, "learning_rate": 5.075124420846678e-08, "loss": 1.176, "step": 3355 }, { "epoch": 0.98, "learning_rate": 4.885455655405213e-08, "loss": 1.224, "step": 3356 }, { "epoch": 0.98, "learning_rate": 4.699396713901294e-08, "loss": 1.1713, "step": 3357 }, { "epoch": 0.99, "learning_rate": 4.516947764451107e-08, "loss": 1.1988, "step": 3358 }, { "epoch": 0.99, "learning_rate": 4.338108971908117e-08, "loss": 1.2227, "step": 3359 }, { "epoch": 0.99, "learning_rate": 4.1628804978630645e-08, "loss": 1.2372, "step": 3360 }, { "epoch": 0.99, "learning_rate": 3.9912625006457426e-08, "loss": 1.1985, "step": 3361 }, { "epoch": 0.99, "learning_rate": 3.823255135323223e-08, "loss": 1.2346, "step": 3362 }, { "epoch": 0.99, "learning_rate": 3.6588585537002955e-08, "loss": 1.2013, "step": 3363 }, { "epoch": 0.99, "learning_rate": 3.498072904319027e-08, "loss": 1.2219, "step": 3364 }, { "epoch": 0.99, "learning_rate": 3.34089833245832e-08, "loss": 1.2462, "step": 3365 }, { "epoch": 0.99, "learning_rate": 3.1873349801356814e-08, "loss": 1.1915, "step": 3366 }, { "epoch": 0.99, "learning_rate": 3.0373829861041204e-08, "loss": 1.1813, "step": 3367 }, { "epoch": 0.99, "learning_rate": 2.8910424858543673e-08, "loss": 1.264, "step": 3368 }, { "epoch": 0.99, "learning_rate": 2.74831361161354e-08, "loss": 1.1941, "step": 3369 }, { "epoch": 0.99, "learning_rate": 2.6091964923455894e-08, "loss": 1.2501, "step": 3370 }, { "epoch": 0.99, "learning_rate": 2.473691253752186e-08, "loss": 1.2374, "step": 3371 }, { "epoch": 0.99, "learning_rate": 2.341798018268726e-08, "loss": 1.198, "step": 3372 }, { "epoch": 0.99, "learning_rate": 2.213516905070101e-08, "loss": 1.2375, "step": 3373 }, { "epoch": 0.99, "learning_rate": 2.088848030064927e-08, "loss": 1.2139, "step": 3374 }, { "epoch": 0.99, "learning_rate": 1.9677915058999854e-08, "loss": 1.2488, "step": 3375 }, { "epoch": 0.99, "learning_rate": 1.8503474419557798e-08, "loss": 1.1891, "step": 3376 }, { "epoch": 0.99, "learning_rate": 1.7365159443509804e-08, "loss": 1.1486, "step": 3377 }, { "epoch": 0.99, "learning_rate": 1.6262971159384243e-08, "loss": 1.2394, "step": 3378 }, { "epoch": 0.99, "learning_rate": 1.5196910563082256e-08, "loss": 1.2217, "step": 3379 }, { "epoch": 0.99, "learning_rate": 1.416697861784222e-08, "loss": 1.1723, "step": 3380 }, { "epoch": 0.99, "learning_rate": 1.3173176254279717e-08, "loss": 1.2329, "step": 3381 }, { "epoch": 0.99, "learning_rate": 1.2215504370352016e-08, "loss": 1.1743, "step": 3382 }, { "epoch": 0.99, "learning_rate": 1.1293963831366939e-08, "loss": 1.2438, "step": 3383 }, { "epoch": 0.99, "learning_rate": 1.0408555469996195e-08, "loss": 1.2094, "step": 3384 }, { "epoch": 0.99, "learning_rate": 9.559280086253176e-09, "loss": 1.215, "step": 3385 }, { "epoch": 0.99, "learning_rate": 8.746138447519593e-09, "loss": 1.2585, "step": 3386 }, { "epoch": 0.99, "learning_rate": 7.969131288509957e-09, "loss": 1.1919, "step": 3387 }, { "epoch": 0.99, "learning_rate": 7.228259311293784e-09, "loss": 1.1525, "step": 3388 }, { "epoch": 0.99, "learning_rate": 6.523523185300029e-09, "loss": 1.1838, "step": 3389 }, { "epoch": 0.99, "learning_rate": 5.854923547294888e-09, "loss": 1.2912, "step": 3390 }, { "epoch": 0.99, "learning_rate": 5.222461001399559e-09, "loss": 1.2181, "step": 3391 }, { "epoch": 1.0, "learning_rate": 4.626136119081359e-09, "loss": 1.2081, "step": 3392 }, { "epoch": 1.0, "learning_rate": 4.065949439158168e-09, "loss": 1.1659, "step": 3393 }, { "epoch": 1.0, "learning_rate": 3.541901467785103e-09, "loss": 1.1937, "step": 3394 }, { "epoch": 1.0, "learning_rate": 3.053992678476725e-09, "loss": 1.2296, "step": 3395 }, { "epoch": 1.0, "learning_rate": 2.602223512089275e-09, "loss": 1.1585, "step": 3396 }, { "epoch": 1.0, "learning_rate": 2.186594376816231e-09, "loss": 1.1891, "step": 3397 }, { "epoch": 1.0, "learning_rate": 1.8071056482105166e-09, "loss": 1.2397, "step": 3398 }, { "epoch": 1.0, "learning_rate": 1.4637576691534095e-09, "loss": 1.2282, "step": 3399 }, { "epoch": 1.0, "learning_rate": 1.156550749890073e-09, "loss": 1.1682, "step": 3400 }, { "epoch": 1.0, "eval_loss": 0.11866223812103271, "eval_runtime": 24.3505, "eval_samples_per_second": 82.134, "eval_steps_per_second": 0.657, "step": 3400 }, { "epoch": 1.0, "learning_rate": 8.854851679984678e-10, "loss": 1.2339, "step": 3401 }, { "epoch": 1.0, "learning_rate": 6.505611683937929e-10, "loss": 1.2304, "step": 3402 }, { "epoch": 1.0, "learning_rate": 4.517789633551317e-10, "loss": 1.2208, "step": 3403 }, { "epoch": 1.0, "learning_rate": 2.891387324899242e-10, "loss": 1.2399, "step": 3404 }, { "epoch": 1.0, "learning_rate": 1.6264062274728986e-10, "loss": 1.271, "step": 3405 }, { "epoch": 1.0, "learning_rate": 7.228474843579136e-11, "loss": 1.1947, "step": 3406 }, { "epoch": 1.0, "learning_rate": 1.8071191192348125e-11, "loss": 1.2193, "step": 3407 }, { "epoch": 1.0, "learning_rate": 0.0, "loss": 1.2438, "step": 3408 }, { "epoch": 1.0, "step": 3408, "total_flos": 0.0, "train_loss": 0.14557739074381304, "train_runtime": 9584.0536, "train_samples_per_second": 273.278, "train_steps_per_second": 0.356 } ], "logging_steps": 1.0, "max_steps": 3408, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 12, "trial_name": null, "trial_params": null }