{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0004614674665435, "eval_steps": 500, "global_step": 271, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 6.060606060606061e-07, "loss": 1.6011, "step": 1 }, { "epoch": 0.01, "learning_rate": 1.2121212121212122e-06, "loss": 1.3923, "step": 2 }, { "epoch": 0.01, "learning_rate": 1.8181818181818183e-06, "loss": 1.5238, "step": 3 }, { "epoch": 0.01, "learning_rate": 2.4242424242424244e-06, "loss": 1.4057, "step": 4 }, { "epoch": 0.02, "learning_rate": 3.0303030303030305e-06, "loss": 1.4638, "step": 5 }, { "epoch": 0.02, "learning_rate": 3.6363636363636366e-06, "loss": 1.4316, "step": 6 }, { "epoch": 0.03, "learning_rate": 4.242424242424243e-06, "loss": 1.3427, "step": 7 }, { "epoch": 0.03, "learning_rate": 4.848484848484849e-06, "loss": 1.2477, "step": 8 }, { "epoch": 0.03, "learning_rate": 5.4545454545454545e-06, "loss": 1.2386, "step": 9 }, { "epoch": 0.04, "learning_rate": 6.060606060606061e-06, "loss": 1.2007, "step": 10 }, { "epoch": 0.04, "learning_rate": 6.666666666666667e-06, "loss": 1.1452, "step": 11 }, { "epoch": 0.04, "learning_rate": 7.272727272727273e-06, "loss": 1.1837, "step": 12 }, { "epoch": 0.05, "learning_rate": 7.87878787878788e-06, "loss": 1.1542, "step": 13 }, { "epoch": 0.05, "learning_rate": 8.484848484848486e-06, "loss": 1.2, "step": 14 }, { "epoch": 0.06, "learning_rate": 9.090909090909091e-06, "loss": 1.2151, "step": 15 }, { "epoch": 0.06, "learning_rate": 9.696969696969698e-06, "loss": 1.202, "step": 16 }, { "epoch": 0.06, "learning_rate": 1.0303030303030304e-05, "loss": 1.209, "step": 17 }, { "epoch": 0.07, "learning_rate": 1.0909090909090909e-05, "loss": 1.1633, "step": 18 }, { "epoch": 0.07, "learning_rate": 1.1515151515151517e-05, "loss": 1.1233, "step": 19 }, { "epoch": 0.07, "learning_rate": 1.2121212121212122e-05, "loss": 1.1684, "step": 20 }, { "epoch": 0.08, "learning_rate": 1.2727272727272728e-05, "loss": 1.1677, "step": 21 }, { "epoch": 0.08, "learning_rate": 1.3333333333333333e-05, "loss": 1.1706, "step": 22 }, { "epoch": 0.08, "learning_rate": 1.3939393939393942e-05, "loss": 1.1212, "step": 23 }, { "epoch": 0.09, "learning_rate": 1.4545454545454546e-05, "loss": 1.2346, "step": 24 }, { "epoch": 0.09, "learning_rate": 1.5151515151515153e-05, "loss": 1.205, "step": 25 }, { "epoch": 0.1, "learning_rate": 1.575757575757576e-05, "loss": 1.1821, "step": 26 }, { "epoch": 0.1, "learning_rate": 1.6363636363636366e-05, "loss": 1.18, "step": 27 }, { "epoch": 0.1, "learning_rate": 1.6969696969696972e-05, "loss": 1.2078, "step": 28 }, { "epoch": 0.11, "learning_rate": 1.7575757575757576e-05, "loss": 1.1999, "step": 29 }, { "epoch": 0.11, "learning_rate": 1.8181818181818182e-05, "loss": 1.1746, "step": 30 }, { "epoch": 0.11, "learning_rate": 1.8787878787878792e-05, "loss": 1.1802, "step": 31 }, { "epoch": 0.12, "learning_rate": 1.9393939393939395e-05, "loss": 1.1883, "step": 32 }, { "epoch": 0.12, "learning_rate": 2e-05, "loss": 1.1985, "step": 33 }, { "epoch": 0.13, "learning_rate": 1.9999954983050696e-05, "loss": 1.0979, "step": 34 }, { "epoch": 0.13, "learning_rate": 1.999981993260809e-05, "loss": 1.2325, "step": 35 }, { "epoch": 0.13, "learning_rate": 1.9999594849888083e-05, "loss": 1.2247, "step": 36 }, { "epoch": 0.14, "learning_rate": 1.99992797369172e-05, "loss": 1.1616, "step": 37 }, { "epoch": 0.14, "learning_rate": 1.9998874596532512e-05, "loss": 1.2291, "step": 38 }, { "epoch": 0.14, "learning_rate": 1.999837943238166e-05, "loss": 1.1251, "step": 39 }, { "epoch": 0.15, "learning_rate": 1.99977942489228e-05, "loss": 1.12, "step": 40 }, { "epoch": 0.15, "learning_rate": 1.999711905142457e-05, "loss": 1.1749, "step": 41 }, { "epoch": 0.16, "learning_rate": 1.9996353845966033e-05, "loss": 1.2086, "step": 42 }, { "epoch": 0.16, "learning_rate": 1.9995498639436634e-05, "loss": 1.2592, "step": 43 }, { "epoch": 0.16, "learning_rate": 1.9994553439536125e-05, "loss": 1.2809, "step": 44 }, { "epoch": 0.17, "learning_rate": 1.9993518254774517e-05, "loss": 1.1214, "step": 45 }, { "epoch": 0.17, "learning_rate": 1.9992393094471976e-05, "loss": 1.1859, "step": 46 }, { "epoch": 0.17, "learning_rate": 1.9991177968758764e-05, "loss": 1.1512, "step": 47 }, { "epoch": 0.18, "learning_rate": 1.998987288857513e-05, "loss": 1.1474, "step": 48 }, { "epoch": 0.18, "learning_rate": 1.9988477865671217e-05, "loss": 1.2545, "step": 49 }, { "epoch": 0.18, "learning_rate": 1.998699291260696e-05, "loss": 1.2222, "step": 50 }, { "epoch": 0.19, "learning_rate": 1.9985418042751975e-05, "loss": 1.2038, "step": 51 }, { "epoch": 0.19, "learning_rate": 1.9983753270285423e-05, "loss": 1.142, "step": 52 }, { "epoch": 0.2, "learning_rate": 1.998199861019591e-05, "loss": 1.1784, "step": 53 }, { "epoch": 0.2, "learning_rate": 1.998015407828131e-05, "loss": 1.2366, "step": 54 }, { "epoch": 0.2, "learning_rate": 1.9978219691148676e-05, "loss": 1.1631, "step": 55 }, { "epoch": 0.21, "learning_rate": 1.9976195466214042e-05, "loss": 1.1555, "step": 56 }, { "epoch": 0.21, "learning_rate": 1.9974081421702296e-05, "loss": 1.2391, "step": 57 }, { "epoch": 0.21, "learning_rate": 1.9971877576647005e-05, "loss": 1.1841, "step": 58 }, { "epoch": 0.22, "learning_rate": 1.9969583950890245e-05, "loss": 1.2531, "step": 59 }, { "epoch": 0.22, "learning_rate": 1.9967200565082426e-05, "loss": 1.2242, "step": 60 }, { "epoch": 0.23, "learning_rate": 1.9964727440682097e-05, "loss": 1.2037, "step": 61 }, { "epoch": 0.23, "learning_rate": 1.9962164599955762e-05, "loss": 1.1442, "step": 62 }, { "epoch": 0.23, "learning_rate": 1.9959512065977673e-05, "loss": 1.2214, "step": 63 }, { "epoch": 0.24, "learning_rate": 1.995676986262963e-05, "loss": 1.2022, "step": 64 }, { "epoch": 0.24, "learning_rate": 1.9953938014600757e-05, "loss": 1.1414, "step": 65 }, { "epoch": 0.24, "learning_rate": 1.9951016547387286e-05, "loss": 1.1876, "step": 66 }, { "epoch": 0.25, "learning_rate": 1.994800548729233e-05, "loss": 1.1716, "step": 67 }, { "epoch": 0.25, "learning_rate": 1.9944904861425626e-05, "loss": 1.2206, "step": 68 }, { "epoch": 0.25, "learning_rate": 1.9941714697703333e-05, "loss": 1.1521, "step": 69 }, { "epoch": 0.26, "learning_rate": 1.9938435024847723e-05, "loss": 1.2028, "step": 70 }, { "epoch": 0.26, "learning_rate": 1.9935065872386977e-05, "loss": 1.1917, "step": 71 }, { "epoch": 0.27, "learning_rate": 1.993160727065489e-05, "loss": 1.2168, "step": 72 }, { "epoch": 0.27, "learning_rate": 1.99280592507906e-05, "loss": 1.2537, "step": 73 }, { "epoch": 0.27, "learning_rate": 1.992442184473831e-05, "loss": 1.2286, "step": 74 }, { "epoch": 0.28, "learning_rate": 1.9920695085247012e-05, "loss": 1.1492, "step": 75 }, { "epoch": 0.28, "learning_rate": 1.9916879005870164e-05, "loss": 1.2348, "step": 76 }, { "epoch": 0.28, "learning_rate": 1.9912973640965423e-05, "loss": 1.1437, "step": 77 }, { "epoch": 0.29, "learning_rate": 1.9908979025694312e-05, "loss": 1.2257, "step": 78 }, { "epoch": 0.29, "learning_rate": 1.990489519602191e-05, "loss": 1.1692, "step": 79 }, { "epoch": 0.3, "learning_rate": 1.9900722188716526e-05, "loss": 1.2699, "step": 80 }, { "epoch": 0.3, "learning_rate": 1.989646004134937e-05, "loss": 1.2475, "step": 81 }, { "epoch": 0.3, "learning_rate": 1.989210879229422e-05, "loss": 1.261, "step": 82 }, { "epoch": 0.31, "learning_rate": 1.9887668480727066e-05, "loss": 1.1786, "step": 83 }, { "epoch": 0.31, "learning_rate": 1.9883139146625763e-05, "loss": 1.2237, "step": 84 }, { "epoch": 0.31, "learning_rate": 1.9878520830769675e-05, "loss": 1.2214, "step": 85 }, { "epoch": 0.32, "learning_rate": 1.9873813574739293e-05, "loss": 1.3091, "step": 86 }, { "epoch": 0.32, "learning_rate": 1.9869017420915888e-05, "loss": 1.2442, "step": 87 }, { "epoch": 0.32, "learning_rate": 1.9864132412481094e-05, "loss": 1.2017, "step": 88 }, { "epoch": 0.33, "learning_rate": 1.9859158593416554e-05, "loss": 1.1557, "step": 89 }, { "epoch": 0.33, "learning_rate": 1.9854096008503495e-05, "loss": 1.194, "step": 90 }, { "epoch": 0.34, "learning_rate": 1.9848944703322345e-05, "loss": 1.1097, "step": 91 }, { "epoch": 0.34, "learning_rate": 1.9843704724252308e-05, "loss": 1.1112, "step": 92 }, { "epoch": 0.34, "learning_rate": 1.9838376118470965e-05, "loss": 1.1634, "step": 93 }, { "epoch": 0.35, "learning_rate": 1.983295893395383e-05, "loss": 1.0942, "step": 94 }, { "epoch": 0.35, "learning_rate": 1.9827453219473925e-05, "loss": 1.1493, "step": 95 }, { "epoch": 0.35, "learning_rate": 1.9821859024601345e-05, "loss": 1.2171, "step": 96 }, { "epoch": 0.36, "learning_rate": 1.9816176399702806e-05, "loss": 1.1748, "step": 97 }, { "epoch": 0.36, "learning_rate": 1.98104053959412e-05, "loss": 1.2336, "step": 98 }, { "epoch": 0.37, "learning_rate": 1.9804546065275116e-05, "loss": 1.1497, "step": 99 }, { "epoch": 0.37, "learning_rate": 1.9798598460458394e-05, "loss": 1.1544, "step": 100 }, { "epoch": 0.37, "learning_rate": 1.979256263503965e-05, "loss": 1.2676, "step": 101 }, { "epoch": 0.38, "learning_rate": 1.978643864336176e-05, "loss": 1.1786, "step": 102 }, { "epoch": 0.38, "learning_rate": 1.9780226540561413e-05, "loss": 1.2087, "step": 103 }, { "epoch": 0.38, "learning_rate": 1.9773926382568592e-05, "loss": 1.1794, "step": 104 }, { "epoch": 0.39, "learning_rate": 1.9767538226106078e-05, "loss": 1.1964, "step": 105 }, { "epoch": 0.39, "learning_rate": 1.9761062128688932e-05, "loss": 1.2106, "step": 106 }, { "epoch": 0.4, "learning_rate": 1.9754498148623985e-05, "loss": 1.2571, "step": 107 }, { "epoch": 0.4, "learning_rate": 1.9747846345009306e-05, "loss": 1.2233, "step": 108 }, { "epoch": 0.4, "learning_rate": 1.974110677773368e-05, "loss": 1.2183, "step": 109 }, { "epoch": 0.41, "learning_rate": 1.9734279507476057e-05, "loss": 1.2022, "step": 110 }, { "epoch": 0.41, "learning_rate": 1.9727364595705012e-05, "loss": 1.1858, "step": 111 }, { "epoch": 0.41, "learning_rate": 1.9720362104678193e-05, "loss": 1.1514, "step": 112 }, { "epoch": 0.42, "learning_rate": 1.9713272097441755e-05, "loss": 1.2193, "step": 113 }, { "epoch": 0.42, "learning_rate": 1.9706094637829797e-05, "loss": 1.1742, "step": 114 }, { "epoch": 0.42, "learning_rate": 1.9698829790463792e-05, "loss": 1.2172, "step": 115 }, { "epoch": 0.43, "learning_rate": 1.9691477620751985e-05, "loss": 1.1813, "step": 116 }, { "epoch": 0.43, "learning_rate": 1.9684038194888827e-05, "loss": 1.1926, "step": 117 }, { "epoch": 0.44, "learning_rate": 1.9676511579854375e-05, "loss": 1.2264, "step": 118 }, { "epoch": 0.44, "learning_rate": 1.9668897843413676e-05, "loss": 1.1865, "step": 119 }, { "epoch": 0.44, "learning_rate": 1.9661197054116165e-05, "loss": 1.1506, "step": 120 }, { "epoch": 0.45, "learning_rate": 1.9653409281295053e-05, "loss": 1.2092, "step": 121 }, { "epoch": 0.45, "learning_rate": 1.9645534595066697e-05, "loss": 1.232, "step": 122 }, { "epoch": 0.45, "learning_rate": 1.963757306632996e-05, "loss": 1.1755, "step": 123 }, { "epoch": 0.46, "learning_rate": 1.9629524766765593e-05, "loss": 1.1795, "step": 124 }, { "epoch": 0.46, "learning_rate": 1.962138976883558e-05, "loss": 1.2398, "step": 125 }, { "epoch": 0.47, "learning_rate": 1.9613168145782468e-05, "loss": 1.2437, "step": 126 }, { "epoch": 0.47, "learning_rate": 1.9604859971628743e-05, "loss": 1.1782, "step": 127 }, { "epoch": 0.47, "learning_rate": 1.9596465321176136e-05, "loss": 1.1971, "step": 128 }, { "epoch": 0.48, "learning_rate": 1.958798427000495e-05, "loss": 1.1401, "step": 129 }, { "epoch": 0.48, "learning_rate": 1.9579416894473407e-05, "loss": 1.1967, "step": 130 }, { "epoch": 0.48, "learning_rate": 1.957076327171692e-05, "loss": 1.188, "step": 131 }, { "epoch": 0.49, "learning_rate": 1.956202347964743e-05, "loss": 1.1636, "step": 132 }, { "epoch": 0.49, "learning_rate": 1.955319759695269e-05, "loss": 1.1595, "step": 133 }, { "epoch": 0.49, "learning_rate": 1.9544285703095565e-05, "loss": 1.193, "step": 134 }, { "epoch": 0.5, "learning_rate": 1.9535287878313315e-05, "loss": 1.1201, "step": 135 }, { "epoch": 0.5, "learning_rate": 1.952620420361686e-05, "loss": 1.2006, "step": 136 }, { "epoch": 0.51, "learning_rate": 1.9517034760790064e-05, "loss": 1.1913, "step": 137 }, { "epoch": 0.51, "learning_rate": 1.9507779632388997e-05, "loss": 1.23, "step": 138 }, { "epoch": 0.51, "learning_rate": 1.9498438901741186e-05, "loss": 1.2601, "step": 139 }, { "epoch": 0.52, "learning_rate": 1.9489012652944874e-05, "loss": 1.1785, "step": 140 }, { "epoch": 0.52, "learning_rate": 1.947950097086825e-05, "loss": 1.1947, "step": 141 }, { "epoch": 0.52, "learning_rate": 1.94699039411487e-05, "loss": 1.1873, "step": 142 }, { "epoch": 0.53, "learning_rate": 1.9460221650192016e-05, "loss": 1.2313, "step": 143 }, { "epoch": 0.53, "learning_rate": 1.945045418517165e-05, "loss": 1.166, "step": 144 }, { "epoch": 0.54, "learning_rate": 1.9440601634027892e-05, "loss": 1.2167, "step": 145 }, { "epoch": 0.54, "learning_rate": 1.94306640854671e-05, "loss": 1.2201, "step": 146 }, { "epoch": 0.54, "learning_rate": 1.9420641628960897e-05, "loss": 1.1424, "step": 147 }, { "epoch": 0.55, "learning_rate": 1.9410534354745367e-05, "loss": 1.265, "step": 148 }, { "epoch": 0.55, "learning_rate": 1.9400342353820244e-05, "loss": 1.1723, "step": 149 }, { "epoch": 0.55, "learning_rate": 1.9390065717948084e-05, "loss": 1.153, "step": 150 }, { "epoch": 0.56, "learning_rate": 1.9379704539653443e-05, "loss": 1.2422, "step": 151 }, { "epoch": 0.56, "learning_rate": 1.9369258912222052e-05, "loss": 1.2091, "step": 152 }, { "epoch": 0.56, "learning_rate": 1.9358728929699966e-05, "loss": 1.1898, "step": 153 }, { "epoch": 0.57, "learning_rate": 1.9348114686892722e-05, "loss": 1.1857, "step": 154 }, { "epoch": 0.57, "learning_rate": 1.9337416279364486e-05, "loss": 1.1432, "step": 155 }, { "epoch": 0.58, "learning_rate": 1.9326633803437197e-05, "loss": 1.2366, "step": 156 }, { "epoch": 0.58, "learning_rate": 1.931576735618968e-05, "loss": 1.2082, "step": 157 }, { "epoch": 0.58, "learning_rate": 1.9304817035456804e-05, "loss": 1.1841, "step": 158 }, { "epoch": 0.59, "learning_rate": 1.929378293982857e-05, "loss": 1.203, "step": 159 }, { "epoch": 0.59, "learning_rate": 1.928266516864925e-05, "loss": 1.187, "step": 160 }, { "epoch": 0.59, "learning_rate": 1.9271463822016465e-05, "loss": 1.2004, "step": 161 }, { "epoch": 0.6, "learning_rate": 1.926017900078031e-05, "loss": 1.1816, "step": 162 }, { "epoch": 0.6, "learning_rate": 1.924881080654243e-05, "loss": 1.1433, "step": 163 }, { "epoch": 0.61, "learning_rate": 1.9237359341655108e-05, "loss": 1.2313, "step": 164 }, { "epoch": 0.61, "learning_rate": 1.922582470922034e-05, "loss": 1.2453, "step": 165 }, { "epoch": 0.61, "learning_rate": 1.9214207013088935e-05, "loss": 1.1982, "step": 166 }, { "epoch": 0.62, "learning_rate": 1.920250635785953e-05, "loss": 1.2433, "step": 167 }, { "epoch": 0.62, "learning_rate": 1.9190722848877683e-05, "loss": 1.1923, "step": 168 }, { "epoch": 0.62, "learning_rate": 1.9178856592234927e-05, "loss": 1.1939, "step": 169 }, { "epoch": 0.63, "learning_rate": 1.916690769476779e-05, "loss": 1.1539, "step": 170 }, { "epoch": 0.63, "learning_rate": 1.9154876264056863e-05, "loss": 1.1694, "step": 171 }, { "epoch": 0.63, "learning_rate": 1.9142762408425797e-05, "loss": 1.1914, "step": 172 }, { "epoch": 0.64, "learning_rate": 1.9130566236940363e-05, "loss": 1.2548, "step": 173 }, { "epoch": 0.64, "learning_rate": 1.911828785940745e-05, "loss": 1.1776, "step": 174 }, { "epoch": 0.65, "learning_rate": 1.910592738637407e-05, "loss": 1.1697, "step": 175 }, { "epoch": 0.65, "learning_rate": 1.9093484929126383e-05, "loss": 1.2309, "step": 176 }, { "epoch": 0.65, "learning_rate": 1.908096059968869e-05, "loss": 1.1831, "step": 177 }, { "epoch": 0.66, "learning_rate": 1.9068354510822402e-05, "loss": 1.228, "step": 178 }, { "epoch": 0.66, "learning_rate": 1.905566677602506e-05, "loss": 1.1951, "step": 179 }, { "epoch": 0.66, "learning_rate": 1.904289750952928e-05, "loss": 1.205, "step": 180 }, { "epoch": 0.67, "learning_rate": 1.9030046826301746e-05, "loss": 1.2015, "step": 181 }, { "epoch": 0.67, "learning_rate": 1.9017114842042174e-05, "loss": 1.2009, "step": 182 }, { "epoch": 0.68, "learning_rate": 1.900410167318226e-05, "loss": 1.1674, "step": 183 }, { "epoch": 0.68, "learning_rate": 1.8991007436884633e-05, "loss": 1.2286, "step": 184 }, { "epoch": 0.68, "learning_rate": 1.897783225104181e-05, "loss": 1.171, "step": 185 }, { "epoch": 0.69, "learning_rate": 1.8964576234275123e-05, "loss": 1.1635, "step": 186 }, { "epoch": 0.69, "learning_rate": 1.8951239505933663e-05, "loss": 1.1771, "step": 187 }, { "epoch": 0.69, "learning_rate": 1.893782218609319e-05, "loss": 1.1829, "step": 188 }, { "epoch": 0.7, "learning_rate": 1.8924324395555066e-05, "loss": 1.1819, "step": 189 }, { "epoch": 0.7, "learning_rate": 1.8910746255845168e-05, "loss": 1.1657, "step": 190 }, { "epoch": 0.71, "learning_rate": 1.8897087889212772e-05, "loss": 1.1807, "step": 191 }, { "epoch": 0.71, "learning_rate": 1.8883349418629487e-05, "loss": 1.1763, "step": 192 }, { "epoch": 0.71, "learning_rate": 1.886953096778811e-05, "loss": 1.1322, "step": 193 }, { "epoch": 0.72, "learning_rate": 1.885563266110155e-05, "loss": 1.1607, "step": 194 }, { "epoch": 0.72, "learning_rate": 1.8841654623701673e-05, "loss": 1.2171, "step": 195 }, { "epoch": 0.72, "learning_rate": 1.8827596981438202e-05, "loss": 1.1621, "step": 196 }, { "epoch": 0.73, "learning_rate": 1.8813459860877575e-05, "loss": 1.1894, "step": 197 }, { "epoch": 0.73, "learning_rate": 1.8799243389301796e-05, "loss": 1.1554, "step": 198 }, { "epoch": 0.73, "learning_rate": 1.87849476947073e-05, "loss": 1.152, "step": 199 }, { "epoch": 0.74, "learning_rate": 1.8770572905803806e-05, "loss": 1.1941, "step": 200 }, { "epoch": 0.74, "learning_rate": 1.8756119152013134e-05, "loss": 1.2741, "step": 201 }, { "epoch": 0.75, "learning_rate": 1.8741586563468064e-05, "loss": 1.164, "step": 202 }, { "epoch": 0.75, "learning_rate": 1.8726975271011163e-05, "loss": 1.1811, "step": 203 }, { "epoch": 0.75, "learning_rate": 1.8712285406193585e-05, "loss": 1.2335, "step": 204 }, { "epoch": 0.76, "learning_rate": 1.869751710127392e-05, "loss": 1.1534, "step": 205 }, { "epoch": 0.76, "learning_rate": 1.868267048921697e-05, "loss": 1.1557, "step": 206 }, { "epoch": 0.76, "learning_rate": 1.866774570369257e-05, "loss": 1.152, "step": 207 }, { "epoch": 0.77, "learning_rate": 1.8652742879074384e-05, "loss": 1.1982, "step": 208 }, { "epoch": 0.77, "learning_rate": 1.8637662150438695e-05, "loss": 1.1531, "step": 209 }, { "epoch": 0.78, "learning_rate": 1.8622503653563173e-05, "loss": 1.2339, "step": 210 }, { "epoch": 0.78, "learning_rate": 1.8607267524925684e-05, "loss": 1.1669, "step": 211 }, { "epoch": 0.78, "learning_rate": 1.8591953901703028e-05, "loss": 1.251, "step": 212 }, { "epoch": 0.79, "learning_rate": 1.8576562921769727e-05, "loss": 1.2002, "step": 213 }, { "epoch": 0.79, "learning_rate": 1.8561094723696776e-05, "loss": 1.171, "step": 214 }, { "epoch": 0.79, "learning_rate": 1.8545549446750392e-05, "loss": 1.2238, "step": 215 }, { "epoch": 0.8, "learning_rate": 1.8529927230890757e-05, "loss": 1.2063, "step": 216 }, { "epoch": 0.8, "learning_rate": 1.8514228216770784e-05, "loss": 1.1634, "step": 217 }, { "epoch": 0.8, "learning_rate": 1.8498452545734808e-05, "loss": 1.2448, "step": 218 }, { "epoch": 0.81, "learning_rate": 1.8482600359817344e-05, "loss": 1.2604, "step": 219 }, { "epoch": 0.81, "learning_rate": 1.8466671801741812e-05, "loss": 1.1813, "step": 220 }, { "epoch": 0.82, "learning_rate": 1.845066701491922e-05, "loss": 1.1993, "step": 221 }, { "epoch": 0.82, "learning_rate": 1.843458614344691e-05, "loss": 1.1974, "step": 222 }, { "epoch": 0.82, "learning_rate": 1.841842933210723e-05, "loss": 1.1981, "step": 223 }, { "epoch": 0.83, "learning_rate": 1.840219672636626e-05, "loss": 1.1443, "step": 224 }, { "epoch": 0.83, "learning_rate": 1.8385888472372474e-05, "loss": 1.1971, "step": 225 }, { "epoch": 0.83, "learning_rate": 1.836950471695544e-05, "loss": 1.1828, "step": 226 }, { "epoch": 0.84, "learning_rate": 1.8353045607624494e-05, "loss": 1.2389, "step": 227 }, { "epoch": 0.84, "learning_rate": 1.833651129256742e-05, "loss": 1.1886, "step": 228 }, { "epoch": 0.85, "learning_rate": 1.8319901920649096e-05, "loss": 1.1794, "step": 229 }, { "epoch": 0.85, "learning_rate": 1.8303217641410174e-05, "loss": 1.1842, "step": 230 }, { "epoch": 0.85, "learning_rate": 1.828645860506573e-05, "loss": 1.2395, "step": 231 }, { "epoch": 0.86, "learning_rate": 1.8269624962503895e-05, "loss": 1.189, "step": 232 }, { "epoch": 0.86, "learning_rate": 1.825271686528452e-05, "loss": 1.2021, "step": 233 }, { "epoch": 0.86, "learning_rate": 1.8235734465637794e-05, "loss": 1.1385, "step": 234 }, { "epoch": 0.87, "learning_rate": 1.8218677916462882e-05, "loss": 1.1857, "step": 235 }, { "epoch": 0.87, "learning_rate": 1.8201547371326553e-05, "loss": 1.1745, "step": 236 }, { "epoch": 0.87, "learning_rate": 1.8184342984461766e-05, "loss": 1.2081, "step": 237 }, { "epoch": 0.88, "learning_rate": 1.816706491076634e-05, "loss": 1.1183, "step": 238 }, { "epoch": 0.88, "learning_rate": 1.8149713305801505e-05, "loss": 1.2019, "step": 239 }, { "epoch": 0.89, "learning_rate": 1.8132288325790518e-05, "loss": 1.1882, "step": 240 }, { "epoch": 0.89, "learning_rate": 1.8114790127617274e-05, "loss": 1.2139, "step": 241 }, { "epoch": 0.89, "learning_rate": 1.809721886882487e-05, "loss": 1.1907, "step": 242 }, { "epoch": 0.9, "learning_rate": 1.8079574707614202e-05, "loss": 1.1665, "step": 243 }, { "epoch": 0.9, "learning_rate": 1.806185780284253e-05, "loss": 1.2126, "step": 244 }, { "epoch": 0.9, "learning_rate": 1.8044068314022057e-05, "loss": 1.2309, "step": 245 }, { "epoch": 0.91, "learning_rate": 1.802620640131848e-05, "loss": 1.2309, "step": 246 }, { "epoch": 0.91, "learning_rate": 1.800827222554957e-05, "loss": 1.2466, "step": 247 }, { "epoch": 0.92, "learning_rate": 1.79902659481837e-05, "loss": 1.2281, "step": 248 }, { "epoch": 0.92, "learning_rate": 1.797218773133841e-05, "loss": 1.208, "step": 249 }, { "epoch": 0.92, "learning_rate": 1.7954037737778927e-05, "loss": 1.132, "step": 250 }, { "epoch": 0.93, "learning_rate": 1.7935816130916724e-05, "loss": 1.138, "step": 251 }, { "epoch": 0.93, "learning_rate": 1.7917523074808024e-05, "loss": 1.2041, "step": 252 }, { "epoch": 0.93, "learning_rate": 1.789915873415235e-05, "loss": 1.2736, "step": 253 }, { "epoch": 0.94, "learning_rate": 1.7880723274291023e-05, "loss": 1.2191, "step": 254 }, { "epoch": 0.94, "learning_rate": 1.786221686120567e-05, "loss": 1.1143, "step": 255 }, { "epoch": 0.95, "learning_rate": 1.7843639661516743e-05, "loss": 1.2028, "step": 256 }, { "epoch": 0.95, "learning_rate": 1.7824991842482014e-05, "loss": 1.1834, "step": 257 }, { "epoch": 0.95, "learning_rate": 1.7806273571995066e-05, "loss": 1.1844, "step": 258 }, { "epoch": 0.96, "learning_rate": 1.7787485018583792e-05, "loss": 1.1251, "step": 259 }, { "epoch": 0.96, "learning_rate": 1.7768626351408856e-05, "loss": 1.1508, "step": 260 }, { "epoch": 0.96, "learning_rate": 1.7749697740262197e-05, "loss": 1.184, "step": 261 }, { "epoch": 0.97, "learning_rate": 1.7730699355565478e-05, "loss": 1.2147, "step": 262 }, { "epoch": 0.97, "learning_rate": 1.7711631368368564e-05, "loss": 1.1857, "step": 263 }, { "epoch": 0.97, "learning_rate": 1.769249395034797e-05, "loss": 1.2376, "step": 264 }, { "epoch": 0.98, "learning_rate": 1.7673287273805342e-05, "loss": 1.1883, "step": 265 }, { "epoch": 0.98, "learning_rate": 1.7654011511665875e-05, "loss": 1.213, "step": 266 }, { "epoch": 0.99, "learning_rate": 1.7634666837476765e-05, "loss": 1.1331, "step": 267 }, { "epoch": 0.99, "learning_rate": 1.761525342540566e-05, "loss": 1.201, "step": 268 }, { "epoch": 0.99, "learning_rate": 1.7595771450239075e-05, "loss": 1.1785, "step": 269 }, { "epoch": 1.0, "learning_rate": 1.757622108738083e-05, "loss": 1.1081, "step": 270 }, { "epoch": 1.0, "learning_rate": 1.7556602512850466e-05, "loss": 1.1684, "step": 271 } ], "logging_steps": 1.0, "max_steps": 1080, "num_train_epochs": 4, "save_steps": 271, "total_flos": 2.0804857960196997e+18, "trial_name": null, "trial_params": null }