{ "best_metric": null, "best_model_checkpoint": null, "epoch": 9.997172745264349, "eval_steps": 500, "global_step": 4420, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 0.0, "loss": 6.8903, "step": 2 }, { "epoch": 0.01, "learning_rate": 1.5037593984962404e-08, "loss": 7.056, "step": 4 }, { "epoch": 0.01, "learning_rate": 4.5112781954887216e-08, "loss": 7.1163, "step": 6 }, { "epoch": 0.02, "learning_rate": 7.518796992481202e-08, "loss": 7.0162, "step": 8 }, { "epoch": 0.02, "learning_rate": 1.0526315789473683e-07, "loss": 7.1292, "step": 10 }, { "epoch": 0.03, "learning_rate": 1.3533834586466163e-07, "loss": 7.0274, "step": 12 }, { "epoch": 0.03, "learning_rate": 1.6541353383458646e-07, "loss": 7.0955, "step": 14 }, { "epoch": 0.04, "learning_rate": 1.9548872180451126e-07, "loss": 7.0135, "step": 16 }, { "epoch": 0.04, "learning_rate": 2.2556390977443606e-07, "loss": 7.191, "step": 18 }, { "epoch": 0.05, "learning_rate": 2.556390977443609e-07, "loss": 6.8574, "step": 20 }, { "epoch": 0.05, "learning_rate": 2.857142857142857e-07, "loss": 7.0637, "step": 22 }, { "epoch": 0.05, "learning_rate": 3.157894736842105e-07, "loss": 6.6183, "step": 24 }, { "epoch": 0.06, "learning_rate": 3.458646616541353e-07, "loss": 6.7347, "step": 26 }, { "epoch": 0.06, "learning_rate": 3.759398496240601e-07, "loss": 6.6015, "step": 28 }, { "epoch": 0.07, "learning_rate": 4.060150375939849e-07, "loss": 6.4449, "step": 30 }, { "epoch": 0.07, "learning_rate": 4.3609022556390975e-07, "loss": 6.1891, "step": 32 }, { "epoch": 0.08, "learning_rate": 4.6616541353383456e-07, "loss": 6.3982, "step": 34 }, { "epoch": 0.08, "learning_rate": 4.962406015037593e-07, "loss": 6.1609, "step": 36 }, { "epoch": 0.09, "learning_rate": 5.263157894736842e-07, "loss": 6.0694, "step": 38 }, { "epoch": 0.09, "learning_rate": 5.56390977443609e-07, "loss": 6.3096, "step": 40 }, { "epoch": 0.09, "learning_rate": 5.864661654135338e-07, "loss": 5.8542, "step": 42 }, { "epoch": 0.1, "learning_rate": 6.165413533834586e-07, "loss": 5.871, "step": 44 }, { "epoch": 0.1, "learning_rate": 6.466165413533834e-07, "loss": 5.7045, "step": 46 }, { "epoch": 0.11, "learning_rate": 6.766917293233082e-07, "loss": 5.9885, "step": 48 }, { "epoch": 0.11, "learning_rate": 7.06766917293233e-07, "loss": 5.7822, "step": 50 }, { "epoch": 0.12, "learning_rate": 7.368421052631578e-07, "loss": 5.5907, "step": 52 }, { "epoch": 0.12, "learning_rate": 7.669172932330827e-07, "loss": 5.3985, "step": 54 }, { "epoch": 0.13, "learning_rate": 7.969924812030074e-07, "loss": 5.5724, "step": 56 }, { "epoch": 0.13, "learning_rate": 8.270676691729323e-07, "loss": 5.0061, "step": 58 }, { "epoch": 0.14, "learning_rate": 8.57142857142857e-07, "loss": 4.6891, "step": 60 }, { "epoch": 0.14, "learning_rate": 8.872180451127819e-07, "loss": 5.3034, "step": 62 }, { "epoch": 0.14, "learning_rate": 9.172932330827066e-07, "loss": 4.9166, "step": 64 }, { "epoch": 0.15, "learning_rate": 9.473684210526315e-07, "loss": 4.9849, "step": 66 }, { "epoch": 0.15, "learning_rate": 9.774436090225563e-07, "loss": 4.9612, "step": 68 }, { "epoch": 0.16, "learning_rate": 1.0075187969924813e-06, "loss": 4.8004, "step": 70 }, { "epoch": 0.16, "learning_rate": 1.037593984962406e-06, "loss": 4.7059, "step": 72 }, { "epoch": 0.17, "learning_rate": 1.0676691729323308e-06, "loss": 4.3691, "step": 74 }, { "epoch": 0.17, "learning_rate": 1.0977443609022555e-06, "loss": 4.5099, "step": 76 }, { "epoch": 0.18, "learning_rate": 1.1278195488721805e-06, "loss": 4.739, "step": 78 }, { "epoch": 0.18, "learning_rate": 1.1578947368421053e-06, "loss": 4.6763, "step": 80 }, { "epoch": 0.19, "learning_rate": 1.18796992481203e-06, "loss": 4.3841, "step": 82 }, { "epoch": 0.19, "learning_rate": 1.218045112781955e-06, "loss": 4.0368, "step": 84 }, { "epoch": 0.19, "learning_rate": 1.2481203007518797e-06, "loss": 4.1215, "step": 86 }, { "epoch": 0.2, "learning_rate": 1.2781954887218045e-06, "loss": 4.5899, "step": 88 }, { "epoch": 0.2, "learning_rate": 1.3082706766917292e-06, "loss": 4.3187, "step": 90 }, { "epoch": 0.21, "learning_rate": 1.3383458646616542e-06, "loss": 4.0256, "step": 92 }, { "epoch": 0.21, "learning_rate": 1.368421052631579e-06, "loss": 4.127, "step": 94 }, { "epoch": 0.22, "learning_rate": 1.3984962406015037e-06, "loss": 4.4956, "step": 96 }, { "epoch": 0.22, "learning_rate": 1.4285714285714286e-06, "loss": 3.9016, "step": 98 }, { "epoch": 0.23, "learning_rate": 1.4586466165413534e-06, "loss": 3.762, "step": 100 }, { "epoch": 0.23, "learning_rate": 1.4887218045112781e-06, "loss": 4.0754, "step": 102 }, { "epoch": 0.24, "learning_rate": 1.5187969924812029e-06, "loss": 4.006, "step": 104 }, { "epoch": 0.24, "learning_rate": 1.5488721804511278e-06, "loss": 3.9874, "step": 106 }, { "epoch": 0.24, "learning_rate": 1.5789473684210526e-06, "loss": 3.9949, "step": 108 }, { "epoch": 0.25, "learning_rate": 1.6090225563909773e-06, "loss": 3.7541, "step": 110 }, { "epoch": 0.25, "learning_rate": 1.6390977443609023e-06, "loss": 4.2143, "step": 112 }, { "epoch": 0.26, "learning_rate": 1.669172932330827e-06, "loss": 3.8826, "step": 114 }, { "epoch": 0.26, "learning_rate": 1.6992481203007518e-06, "loss": 4.4338, "step": 116 }, { "epoch": 0.27, "learning_rate": 1.7293233082706765e-06, "loss": 3.6651, "step": 118 }, { "epoch": 0.27, "learning_rate": 1.7593984962406015e-06, "loss": 3.9413, "step": 120 }, { "epoch": 0.28, "learning_rate": 1.7894736842105262e-06, "loss": 4.2412, "step": 122 }, { "epoch": 0.28, "learning_rate": 1.819548872180451e-06, "loss": 4.0753, "step": 124 }, { "epoch": 0.28, "learning_rate": 1.849624060150376e-06, "loss": 3.5147, "step": 126 }, { "epoch": 0.29, "learning_rate": 1.8796992481203007e-06, "loss": 3.5877, "step": 128 }, { "epoch": 0.29, "learning_rate": 1.9097744360902255e-06, "loss": 3.9202, "step": 130 }, { "epoch": 0.3, "learning_rate": 1.9398496240601504e-06, "loss": 4.2452, "step": 132 }, { "epoch": 0.3, "learning_rate": 1.969924812030075e-06, "loss": 4.2966, "step": 134 }, { "epoch": 0.31, "learning_rate": 2e-06, "loss": 4.0258, "step": 136 }, { "epoch": 0.31, "learning_rate": 1.9999989259544593e-06, "loss": 3.2049, "step": 138 }, { "epoch": 0.32, "learning_rate": 1.9999957038201444e-06, "loss": 3.8649, "step": 140 }, { "epoch": 0.32, "learning_rate": 1.999990333603977e-06, "loss": 3.9265, "step": 142 }, { "epoch": 0.33, "learning_rate": 1.999982815317493e-06, "loss": 3.3749, "step": 144 }, { "epoch": 0.33, "learning_rate": 1.999973148976841e-06, "loss": 3.6495, "step": 146 }, { "epoch": 0.33, "learning_rate": 1.999961334602787e-06, "loss": 3.789, "step": 148 }, { "epoch": 0.34, "learning_rate": 1.999947372220708e-06, "loss": 3.4248, "step": 150 }, { "epoch": 0.34, "learning_rate": 1.9999312618605972e-06, "loss": 3.3628, "step": 152 }, { "epoch": 0.35, "learning_rate": 1.999913003557061e-06, "loss": 3.669, "step": 154 }, { "epoch": 0.35, "learning_rate": 1.9998925973493196e-06, "loss": 3.5372, "step": 156 }, { "epoch": 0.36, "learning_rate": 1.9998700432812073e-06, "loss": 3.2437, "step": 158 }, { "epoch": 0.36, "learning_rate": 1.999845341401173e-06, "loss": 3.9666, "step": 160 }, { "epoch": 0.37, "learning_rate": 1.999818491762278e-06, "loss": 3.5522, "step": 162 }, { "epoch": 0.37, "learning_rate": 1.9997894944221986e-06, "loss": 3.765, "step": 164 }, { "epoch": 0.38, "learning_rate": 1.9997583494432226e-06, "loss": 3.489, "step": 166 }, { "epoch": 0.38, "learning_rate": 1.999725056892252e-06, "loss": 3.7294, "step": 168 }, { "epoch": 0.38, "learning_rate": 1.999689616840804e-06, "loss": 3.3408, "step": 170 }, { "epoch": 0.39, "learning_rate": 1.9996520293650056e-06, "loss": 3.5929, "step": 172 }, { "epoch": 0.39, "learning_rate": 1.9996122945455987e-06, "loss": 3.5767, "step": 174 }, { "epoch": 0.4, "learning_rate": 1.9995704124679367e-06, "loss": 3.167, "step": 176 }, { "epoch": 0.4, "learning_rate": 1.999526383221987e-06, "loss": 3.651, "step": 178 }, { "epoch": 0.41, "learning_rate": 1.999480206902328e-06, "loss": 3.3083, "step": 180 }, { "epoch": 0.41, "learning_rate": 1.9994318836081506e-06, "loss": 3.3985, "step": 182 }, { "epoch": 0.42, "learning_rate": 1.9993814134432575e-06, "loss": 3.2612, "step": 184 }, { "epoch": 0.42, "learning_rate": 1.9993287965160636e-06, "loss": 3.7523, "step": 186 }, { "epoch": 0.43, "learning_rate": 1.9992740329395945e-06, "loss": 3.9041, "step": 188 }, { "epoch": 0.43, "learning_rate": 1.9992171228314873e-06, "loss": 3.5194, "step": 190 }, { "epoch": 0.43, "learning_rate": 1.9991580663139904e-06, "loss": 3.3409, "step": 192 }, { "epoch": 0.44, "learning_rate": 1.999096863513963e-06, "loss": 3.3884, "step": 194 }, { "epoch": 0.44, "learning_rate": 1.999033514562873e-06, "loss": 3.6586, "step": 196 }, { "epoch": 0.45, "learning_rate": 1.9989680195968008e-06, "loss": 3.4453, "step": 198 }, { "epoch": 0.45, "learning_rate": 1.998900378756435e-06, "loss": 3.7887, "step": 200 }, { "epoch": 0.46, "learning_rate": 1.9988305921870747e-06, "loss": 3.2259, "step": 202 }, { "epoch": 0.46, "learning_rate": 1.998758660038628e-06, "loss": 3.926, "step": 204 }, { "epoch": 0.47, "learning_rate": 1.998684582465611e-06, "loss": 3.5587, "step": 206 }, { "epoch": 0.47, "learning_rate": 1.998608359627149e-06, "loss": 3.2248, "step": 208 }, { "epoch": 0.47, "learning_rate": 1.998529991686976e-06, "loss": 3.4838, "step": 210 }, { "epoch": 0.48, "learning_rate": 1.998449478813434e-06, "loss": 3.4091, "step": 212 }, { "epoch": 0.48, "learning_rate": 1.998366821179471e-06, "loss": 3.2509, "step": 214 }, { "epoch": 0.49, "learning_rate": 1.998282018962644e-06, "loss": 3.3953, "step": 216 }, { "epoch": 0.49, "learning_rate": 1.9981950723451152e-06, "loss": 3.3451, "step": 218 }, { "epoch": 0.5, "learning_rate": 1.998105981513654e-06, "loss": 3.3634, "step": 220 }, { "epoch": 0.5, "learning_rate": 1.998014746659636e-06, "loss": 3.2317, "step": 222 }, { "epoch": 0.51, "learning_rate": 1.9979213679790414e-06, "loss": 3.1458, "step": 224 }, { "epoch": 0.51, "learning_rate": 1.9978258456724567e-06, "loss": 3.6836, "step": 226 }, { "epoch": 0.52, "learning_rate": 1.997728179945072e-06, "loss": 3.2641, "step": 228 }, { "epoch": 0.52, "learning_rate": 1.997628371006683e-06, "loss": 3.4994, "step": 230 }, { "epoch": 0.52, "learning_rate": 1.997526419071687e-06, "loss": 3.314, "step": 232 }, { "epoch": 0.53, "learning_rate": 1.997422324359087e-06, "loss": 3.4686, "step": 234 }, { "epoch": 0.53, "learning_rate": 1.9973160870924883e-06, "loss": 3.5634, "step": 236 }, { "epoch": 0.54, "learning_rate": 1.9972077075000974e-06, "loss": 3.3327, "step": 238 }, { "epoch": 0.54, "learning_rate": 1.9970971858147235e-06, "loss": 3.5988, "step": 240 }, { "epoch": 0.55, "learning_rate": 1.996984522273778e-06, "loss": 3.6566, "step": 242 }, { "epoch": 0.55, "learning_rate": 1.996869717119272e-06, "loss": 3.5185, "step": 244 }, { "epoch": 0.56, "learning_rate": 1.9967527705978177e-06, "loss": 3.1404, "step": 246 }, { "epoch": 0.56, "learning_rate": 1.996633682960626e-06, "loss": 3.1059, "step": 248 }, { "epoch": 0.57, "learning_rate": 1.996512454463509e-06, "loss": 3.5391, "step": 250 }, { "epoch": 0.57, "learning_rate": 1.9963890853668764e-06, "loss": 3.5021, "step": 252 }, { "epoch": 0.57, "learning_rate": 1.9962635759357355e-06, "loss": 2.8194, "step": 254 }, { "epoch": 0.58, "learning_rate": 1.996135926439693e-06, "loss": 3.0764, "step": 256 }, { "epoch": 0.58, "learning_rate": 1.9960061371529506e-06, "loss": 3.1193, "step": 258 }, { "epoch": 0.59, "learning_rate": 1.9958742083543086e-06, "loss": 3.2474, "step": 260 }, { "epoch": 0.59, "learning_rate": 1.9957401403271615e-06, "loss": 2.932, "step": 262 }, { "epoch": 0.6, "learning_rate": 1.995603933359499e-06, "loss": 2.8882, "step": 264 }, { "epoch": 0.6, "learning_rate": 1.995465587743908e-06, "loss": 3.2823, "step": 266 }, { "epoch": 0.61, "learning_rate": 1.9953251037775655e-06, "loss": 3.1695, "step": 268 }, { "epoch": 0.61, "learning_rate": 1.9951824817622444e-06, "loss": 3.1922, "step": 270 }, { "epoch": 0.62, "learning_rate": 1.9950377220043103e-06, "loss": 3.1263, "step": 272 }, { "epoch": 0.62, "learning_rate": 1.9948908248147202e-06, "loss": 2.8974, "step": 274 }, { "epoch": 0.62, "learning_rate": 1.994741790509022e-06, "loss": 3.3318, "step": 276 }, { "epoch": 0.63, "learning_rate": 1.994590619407356e-06, "loss": 3.1373, "step": 278 }, { "epoch": 0.63, "learning_rate": 1.9944373118344505e-06, "loss": 3.0683, "step": 280 }, { "epoch": 0.64, "learning_rate": 1.9942818681196243e-06, "loss": 3.1446, "step": 282 }, { "epoch": 0.64, "learning_rate": 1.9941242885967856e-06, "loss": 3.1867, "step": 284 }, { "epoch": 0.65, "learning_rate": 1.9939645736044283e-06, "loss": 3.2701, "step": 286 }, { "epoch": 0.65, "learning_rate": 1.993802723485636e-06, "loss": 3.0124, "step": 288 }, { "epoch": 0.66, "learning_rate": 1.9936387385880763e-06, "loss": 3.1993, "step": 290 }, { "epoch": 0.66, "learning_rate": 1.993472619264004e-06, "loss": 2.841, "step": 292 }, { "epoch": 0.66, "learning_rate": 1.993304365870259e-06, "loss": 3.2096, "step": 294 }, { "epoch": 0.67, "learning_rate": 1.993133978768265e-06, "loss": 2.9372, "step": 296 }, { "epoch": 0.67, "learning_rate": 1.9929614583240286e-06, "loss": 3.1033, "step": 298 }, { "epoch": 0.68, "learning_rate": 1.9927868049081394e-06, "loss": 2.9037, "step": 300 }, { "epoch": 0.68, "learning_rate": 1.992610018895769e-06, "loss": 2.9596, "step": 302 }, { "epoch": 0.69, "learning_rate": 1.9924311006666695e-06, "loss": 2.8743, "step": 304 }, { "epoch": 0.69, "learning_rate": 1.992250050605174e-06, "loss": 2.7332, "step": 306 }, { "epoch": 0.7, "learning_rate": 1.9920668691001946e-06, "loss": 3.3899, "step": 308 }, { "epoch": 0.7, "learning_rate": 1.9918815565452215e-06, "loss": 2.8847, "step": 310 }, { "epoch": 0.71, "learning_rate": 1.9916941133383232e-06, "loss": 3.1096, "step": 312 }, { "epoch": 0.71, "learning_rate": 1.9915045398821445e-06, "loss": 2.9963, "step": 314 }, { "epoch": 0.71, "learning_rate": 1.991312836583907e-06, "loss": 3.1046, "step": 316 }, { "epoch": 0.72, "learning_rate": 1.991119003855406e-06, "loss": 3.2598, "step": 318 }, { "epoch": 0.72, "learning_rate": 1.9909230421130126e-06, "loss": 2.7556, "step": 320 }, { "epoch": 0.73, "learning_rate": 1.99072495177767e-06, "loss": 2.9834, "step": 322 }, { "epoch": 0.73, "learning_rate": 1.990524733274895e-06, "loss": 3.0658, "step": 324 }, { "epoch": 0.74, "learning_rate": 1.9903223870347743e-06, "loss": 3.1869, "step": 326 }, { "epoch": 0.74, "learning_rate": 1.9901179134919664e-06, "loss": 2.9748, "step": 328 }, { "epoch": 0.75, "learning_rate": 1.989911313085699e-06, "loss": 2.9126, "step": 330 }, { "epoch": 0.75, "learning_rate": 1.989702586259769e-06, "loss": 3.136, "step": 332 }, { "epoch": 0.76, "learning_rate": 1.98949173346254e-06, "loss": 3.0548, "step": 334 }, { "epoch": 0.76, "learning_rate": 1.9892787551469436e-06, "loss": 3.0525, "step": 336 }, { "epoch": 0.76, "learning_rate": 1.9890636517704765e-06, "loss": 3.0175, "step": 338 }, { "epoch": 0.77, "learning_rate": 1.9888464237952e-06, "loss": 3.0134, "step": 340 }, { "epoch": 0.77, "learning_rate": 1.9886270716877395e-06, "loss": 3.0958, "step": 342 }, { "epoch": 0.78, "learning_rate": 1.988405595919284e-06, "loss": 3.0671, "step": 344 }, { "epoch": 0.78, "learning_rate": 1.988181996965583e-06, "loss": 3.3007, "step": 346 }, { "epoch": 0.79, "learning_rate": 1.9879562753069475e-06, "loss": 3.0881, "step": 348 }, { "epoch": 0.79, "learning_rate": 1.987728431428249e-06, "loss": 2.901, "step": 350 }, { "epoch": 0.8, "learning_rate": 1.987498465818915e-06, "loss": 3.126, "step": 352 }, { "epoch": 0.8, "learning_rate": 1.9872663789729353e-06, "loss": 2.8135, "step": 354 }, { "epoch": 0.81, "learning_rate": 1.9870321713888513e-06, "loss": 2.8591, "step": 356 }, { "epoch": 0.81, "learning_rate": 1.9867958435697627e-06, "loss": 3.0549, "step": 358 }, { "epoch": 0.81, "learning_rate": 1.9865573960233237e-06, "loss": 3.0025, "step": 360 }, { "epoch": 0.82, "learning_rate": 1.9863168292617415e-06, "loss": 2.8908, "step": 362 }, { "epoch": 0.82, "learning_rate": 1.9860741438017745e-06, "loss": 2.753, "step": 364 }, { "epoch": 0.83, "learning_rate": 1.985829340164734e-06, "loss": 3.3022, "step": 366 }, { "epoch": 0.83, "learning_rate": 1.9855824188764805e-06, "loss": 2.6739, "step": 368 }, { "epoch": 0.84, "learning_rate": 1.9853333804674227e-06, "loss": 2.9021, "step": 370 }, { "epoch": 0.84, "learning_rate": 1.9850822254725187e-06, "loss": 2.796, "step": 372 }, { "epoch": 0.85, "learning_rate": 1.9848289544312713e-06, "loss": 2.9392, "step": 374 }, { "epoch": 0.85, "learning_rate": 1.9845735678877307e-06, "loss": 3.1737, "step": 376 }, { "epoch": 0.85, "learning_rate": 1.9843160663904896e-06, "loss": 2.7822, "step": 378 }, { "epoch": 0.86, "learning_rate": 1.9840564504926856e-06, "loss": 3.1038, "step": 380 }, { "epoch": 0.86, "learning_rate": 1.9837947207519966e-06, "loss": 2.881, "step": 382 }, { "epoch": 0.87, "learning_rate": 1.983530877730642e-06, "loss": 2.5971, "step": 384 }, { "epoch": 0.87, "learning_rate": 1.983264921995381e-06, "loss": 2.7485, "step": 386 }, { "epoch": 0.88, "learning_rate": 1.9829968541175103e-06, "loss": 2.6596, "step": 388 }, { "epoch": 0.88, "learning_rate": 1.9827266746728644e-06, "loss": 2.7252, "step": 390 }, { "epoch": 0.89, "learning_rate": 1.9824543842418137e-06, "loss": 2.8186, "step": 392 }, { "epoch": 0.89, "learning_rate": 1.982179983409262e-06, "loss": 2.7891, "step": 394 }, { "epoch": 0.9, "learning_rate": 1.981903472764648e-06, "loss": 2.6887, "step": 396 }, { "epoch": 0.9, "learning_rate": 1.981624852901941e-06, "loss": 2.5659, "step": 398 }, { "epoch": 0.9, "learning_rate": 1.981344124419643e-06, "loss": 2.8682, "step": 400 }, { "epoch": 0.91, "learning_rate": 1.9810612879207835e-06, "loss": 2.423, "step": 402 }, { "epoch": 0.91, "learning_rate": 1.980776344012921e-06, "loss": 2.8891, "step": 404 }, { "epoch": 0.92, "learning_rate": 1.9804892933081412e-06, "loss": 2.84, "step": 406 }, { "epoch": 0.92, "learning_rate": 1.980200136423055e-06, "loss": 2.7778, "step": 408 }, { "epoch": 0.93, "learning_rate": 1.979908873978798e-06, "loss": 3.15, "step": 410 }, { "epoch": 0.93, "learning_rate": 1.9796155066010285e-06, "loss": 2.8711, "step": 412 }, { "epoch": 0.94, "learning_rate": 1.9793200349199264e-06, "loss": 2.9754, "step": 414 }, { "epoch": 0.94, "learning_rate": 1.979022459570191e-06, "loss": 2.6299, "step": 416 }, { "epoch": 0.95, "learning_rate": 1.978722781191042e-06, "loss": 2.8453, "step": 418 }, { "epoch": 0.95, "learning_rate": 1.978421000426216e-06, "loss": 2.7152, "step": 420 }, { "epoch": 0.95, "learning_rate": 1.978117117923965e-06, "loss": 3.0451, "step": 422 }, { "epoch": 0.96, "learning_rate": 1.9778111343370563e-06, "loss": 2.8192, "step": 424 }, { "epoch": 0.96, "learning_rate": 1.977503050322771e-06, "loss": 3.0503, "step": 426 }, { "epoch": 0.97, "learning_rate": 1.9771928665429016e-06, "loss": 2.803, "step": 428 }, { "epoch": 0.97, "learning_rate": 1.9768805836637507e-06, "loss": 3.0309, "step": 430 }, { "epoch": 0.98, "learning_rate": 1.97656620235613e-06, "loss": 3.2594, "step": 432 }, { "epoch": 0.98, "learning_rate": 1.9762497232953607e-06, "loss": 2.7724, "step": 434 }, { "epoch": 0.99, "learning_rate": 1.975931147161267e-06, "loss": 2.9467, "step": 436 }, { "epoch": 0.99, "learning_rate": 1.9756104746381803e-06, "loss": 2.4082, "step": 438 }, { "epoch": 1.0, "learning_rate": 1.9752877064149344e-06, "loss": 2.8044, "step": 440 }, { "epoch": 1.0, "learning_rate": 1.9749628431848647e-06, "loss": 2.7142, "step": 442 }, { "epoch": 1.0, "learning_rate": 1.9746358856458065e-06, "loss": 2.9282, "step": 444 }, { "epoch": 1.01, "learning_rate": 1.9743068345000954e-06, "loss": 2.8878, "step": 446 }, { "epoch": 1.01, "learning_rate": 1.9739756904545625e-06, "loss": 2.6279, "step": 448 }, { "epoch": 1.02, "learning_rate": 1.9736424542205353e-06, "loss": 3.3394, "step": 450 }, { "epoch": 1.02, "learning_rate": 1.973307126513836e-06, "loss": 3.0321, "step": 452 }, { "epoch": 1.03, "learning_rate": 1.972969708054779e-06, "loss": 2.6647, "step": 454 }, { "epoch": 1.03, "learning_rate": 1.9726301995681697e-06, "loss": 2.8719, "step": 456 }, { "epoch": 1.04, "learning_rate": 1.9722886017833032e-06, "loss": 2.6796, "step": 458 }, { "epoch": 1.04, "learning_rate": 1.971944915433963e-06, "loss": 2.7943, "step": 460 }, { "epoch": 1.04, "learning_rate": 1.9715991412584184e-06, "loss": 2.5229, "step": 462 }, { "epoch": 1.05, "learning_rate": 1.9712512799994236e-06, "loss": 2.6882, "step": 464 }, { "epoch": 1.05, "learning_rate": 1.970901332404217e-06, "loss": 3.1826, "step": 466 }, { "epoch": 1.06, "learning_rate": 1.9705492992245173e-06, "loss": 2.8889, "step": 468 }, { "epoch": 1.06, "learning_rate": 1.9701951812165236e-06, "loss": 2.9288, "step": 470 }, { "epoch": 1.07, "learning_rate": 1.9698389791409147e-06, "loss": 3.2185, "step": 472 }, { "epoch": 1.07, "learning_rate": 1.969480693762844e-06, "loss": 2.8005, "step": 474 }, { "epoch": 1.08, "learning_rate": 1.9691203258519414e-06, "loss": 2.5454, "step": 476 }, { "epoch": 1.08, "learning_rate": 1.9687578761823105e-06, "loss": 2.9237, "step": 478 }, { "epoch": 1.09, "learning_rate": 1.9683933455325258e-06, "loss": 2.5245, "step": 480 }, { "epoch": 1.09, "learning_rate": 1.9680267346856323e-06, "loss": 2.8414, "step": 482 }, { "epoch": 1.09, "learning_rate": 1.967842649434193e-06, "loss": 2.732, "step": 484 }, { "epoch": 1.1, "learning_rate": 1.9674729197696204e-06, "loss": 3.1263, "step": 486 }, { "epoch": 1.1, "learning_rate": 1.967101111885097e-06, "loss": 2.9437, "step": 488 }, { "epoch": 1.11, "learning_rate": 1.966727226579301e-06, "loss": 2.3668, "step": 490 }, { "epoch": 1.11, "learning_rate": 1.966351264655371e-06, "loss": 2.9765, "step": 492 }, { "epoch": 1.12, "learning_rate": 1.9659732269209086e-06, "loss": 2.9363, "step": 494 }, { "epoch": 1.12, "learning_rate": 1.965593114187972e-06, "loss": 3.0973, "step": 496 }, { "epoch": 1.13, "learning_rate": 1.965210927273079e-06, "loss": 2.8855, "step": 498 }, { "epoch": 1.13, "learning_rate": 1.964826666997202e-06, "loss": 2.6448, "step": 500 }, { "epoch": 1.14, "learning_rate": 1.964440334185766e-06, "loss": 2.9767, "step": 502 }, { "epoch": 1.14, "learning_rate": 1.96405192966865e-06, "loss": 2.8073, "step": 504 }, { "epoch": 1.14, "learning_rate": 1.9636614542801816e-06, "loss": 2.8848, "step": 506 }, { "epoch": 1.15, "learning_rate": 1.9632689088591385e-06, "loss": 2.9247, "step": 508 }, { "epoch": 1.15, "learning_rate": 1.9628742942487427e-06, "loss": 2.8988, "step": 510 }, { "epoch": 1.16, "learning_rate": 1.9624776112966636e-06, "loss": 2.7298, "step": 512 }, { "epoch": 1.16, "learning_rate": 1.9620788608550118e-06, "loss": 2.3712, "step": 514 }, { "epoch": 1.17, "learning_rate": 1.961678043780339e-06, "loss": 2.802, "step": 516 }, { "epoch": 1.17, "learning_rate": 1.9612751609336377e-06, "loss": 2.6516, "step": 518 }, { "epoch": 1.18, "learning_rate": 1.9608702131803363e-06, "loss": 2.5732, "step": 520 }, { "epoch": 1.18, "learning_rate": 1.9604632013902996e-06, "loss": 2.5408, "step": 522 }, { "epoch": 1.19, "learning_rate": 1.9600541264378266e-06, "loss": 2.9325, "step": 524 }, { "epoch": 1.19, "learning_rate": 1.9596429892016466e-06, "loss": 2.7673, "step": 526 }, { "epoch": 1.19, "learning_rate": 1.9592297905649202e-06, "loss": 3.0604, "step": 528 }, { "epoch": 1.2, "learning_rate": 1.9588145314152364e-06, "loss": 2.6299, "step": 530 }, { "epoch": 1.2, "learning_rate": 1.958397212644609e-06, "loss": 2.8273, "step": 532 }, { "epoch": 1.21, "learning_rate": 1.9579778351494762e-06, "loss": 2.5243, "step": 534 }, { "epoch": 1.21, "learning_rate": 1.9575563998306996e-06, "loss": 2.51, "step": 536 }, { "epoch": 1.22, "learning_rate": 1.957132907593561e-06, "loss": 2.7396, "step": 538 }, { "epoch": 1.22, "learning_rate": 1.95670735934776e-06, "loss": 2.5031, "step": 540 }, { "epoch": 1.23, "learning_rate": 1.9562797560074133e-06, "loss": 2.6223, "step": 542 }, { "epoch": 1.23, "learning_rate": 1.955850098491051e-06, "loss": 2.6593, "step": 544 }, { "epoch": 1.23, "learning_rate": 1.9554183877216173e-06, "loss": 2.7128, "step": 546 }, { "epoch": 1.24, "learning_rate": 1.954984624626466e-06, "loss": 2.6417, "step": 548 }, { "epoch": 1.24, "learning_rate": 1.95454881013736e-06, "loss": 2.3611, "step": 550 }, { "epoch": 1.25, "learning_rate": 1.954110945190468e-06, "loss": 2.5021, "step": 552 }, { "epoch": 1.25, "learning_rate": 1.9536710307263644e-06, "loss": 2.7342, "step": 554 }, { "epoch": 1.26, "learning_rate": 1.9532290676900247e-06, "loss": 2.5659, "step": 556 }, { "epoch": 1.26, "learning_rate": 1.9527850570308266e-06, "loss": 2.546, "step": 558 }, { "epoch": 1.27, "learning_rate": 1.9523389997025453e-06, "loss": 2.7346, "step": 560 }, { "epoch": 1.27, "learning_rate": 1.951890896663352e-06, "loss": 2.8895, "step": 562 }, { "epoch": 1.28, "learning_rate": 1.9514407488758135e-06, "loss": 2.5559, "step": 564 }, { "epoch": 1.28, "learning_rate": 1.950988557306888e-06, "loss": 2.5673, "step": 566 }, { "epoch": 1.28, "learning_rate": 1.9505343229279237e-06, "loss": 2.4996, "step": 568 }, { "epoch": 1.29, "learning_rate": 1.950078046714658e-06, "loss": 3.0601, "step": 570 }, { "epoch": 1.29, "learning_rate": 1.9496197296472143e-06, "loss": 2.5457, "step": 572 }, { "epoch": 1.3, "learning_rate": 1.949159372710098e-06, "loss": 2.5823, "step": 574 }, { "epoch": 1.3, "learning_rate": 1.9486969768921986e-06, "loss": 2.6923, "step": 576 }, { "epoch": 1.31, "learning_rate": 1.9482325431867846e-06, "loss": 2.5386, "step": 578 }, { "epoch": 1.31, "learning_rate": 1.9477660725915013e-06, "loss": 2.483, "step": 580 }, { "epoch": 1.32, "learning_rate": 1.9472975661083705e-06, "loss": 2.4662, "step": 582 }, { "epoch": 1.32, "learning_rate": 1.9468270247437868e-06, "loss": 2.5708, "step": 584 }, { "epoch": 1.33, "learning_rate": 1.9463544495085153e-06, "loss": 2.5796, "step": 586 }, { "epoch": 1.33, "learning_rate": 1.9458798414176913e-06, "loss": 2.5566, "step": 588 }, { "epoch": 1.33, "learning_rate": 1.945403201490816e-06, "loss": 2.5595, "step": 590 }, { "epoch": 1.34, "learning_rate": 1.9449245307517553e-06, "loss": 2.4607, "step": 592 }, { "epoch": 1.34, "learning_rate": 1.9444438302287374e-06, "loss": 2.2646, "step": 594 }, { "epoch": 1.35, "learning_rate": 1.9439611009543515e-06, "loss": 2.5673, "step": 596 }, { "epoch": 1.35, "learning_rate": 1.943476343965543e-06, "loss": 2.371, "step": 598 }, { "epoch": 1.36, "learning_rate": 1.9429895603036153e-06, "loss": 2.5064, "step": 600 }, { "epoch": 1.36, "learning_rate": 1.9425007510142224e-06, "loss": 2.7005, "step": 602 }, { "epoch": 1.37, "learning_rate": 1.9420099171473732e-06, "loss": 2.5411, "step": 604 }, { "epoch": 1.37, "learning_rate": 1.9415170597574216e-06, "loss": 2.5514, "step": 606 }, { "epoch": 1.38, "learning_rate": 1.9410221799030716e-06, "loss": 2.5828, "step": 608 }, { "epoch": 1.38, "learning_rate": 1.9405252786473694e-06, "loss": 2.3784, "step": 610 }, { "epoch": 1.38, "learning_rate": 1.940026357057705e-06, "loss": 2.7243, "step": 612 }, { "epoch": 1.39, "learning_rate": 1.9395254162058062e-06, "loss": 2.5178, "step": 614 }, { "epoch": 1.39, "learning_rate": 1.9390224571677405e-06, "loss": 2.5705, "step": 616 }, { "epoch": 1.4, "learning_rate": 1.9385174810239096e-06, "loss": 2.9351, "step": 618 }, { "epoch": 1.4, "learning_rate": 1.9380104888590475e-06, "loss": 2.5937, "step": 620 }, { "epoch": 1.41, "learning_rate": 1.9375014817622207e-06, "loss": 2.7249, "step": 622 }, { "epoch": 1.41, "learning_rate": 1.936990460826822e-06, "loss": 2.7076, "step": 624 }, { "epoch": 1.42, "learning_rate": 1.936477427150571e-06, "loss": 2.7452, "step": 626 }, { "epoch": 1.42, "learning_rate": 1.935962381835511e-06, "loss": 2.8691, "step": 628 }, { "epoch": 1.42, "learning_rate": 1.9354453259880065e-06, "loss": 2.5158, "step": 630 }, { "epoch": 1.43, "learning_rate": 1.93492626071874e-06, "loss": 2.89, "step": 632 }, { "epoch": 1.43, "learning_rate": 1.934405187142711e-06, "loss": 2.2936, "step": 634 }, { "epoch": 1.44, "learning_rate": 1.933882106379233e-06, "loss": 2.8166, "step": 636 }, { "epoch": 1.44, "learning_rate": 1.9333570195519318e-06, "loss": 2.4871, "step": 638 }, { "epoch": 1.45, "learning_rate": 1.932829927788741e-06, "loss": 2.6609, "step": 640 }, { "epoch": 1.45, "learning_rate": 1.9323008322219025e-06, "loss": 2.5998, "step": 642 }, { "epoch": 1.46, "learning_rate": 1.9317697339879607e-06, "loss": 2.4645, "step": 644 }, { "epoch": 1.46, "learning_rate": 1.9312366342277637e-06, "loss": 2.7153, "step": 646 }, { "epoch": 1.47, "learning_rate": 1.9307015340864578e-06, "loss": 2.7173, "step": 648 }, { "epoch": 1.47, "learning_rate": 1.9301644347134873e-06, "loss": 2.4206, "step": 650 }, { "epoch": 1.47, "learning_rate": 1.929625337262591e-06, "loss": 2.6368, "step": 652 }, { "epoch": 1.48, "learning_rate": 1.9290842428917983e-06, "loss": 2.6202, "step": 654 }, { "epoch": 1.48, "learning_rate": 1.92854115276343e-06, "loss": 2.729, "step": 656 }, { "epoch": 1.49, "learning_rate": 1.9279960680440924e-06, "loss": 2.3452, "step": 658 }, { "epoch": 1.49, "learning_rate": 1.927448989904678e-06, "loss": 2.7506, "step": 660 }, { "epoch": 1.5, "learning_rate": 1.9268999195203594e-06, "loss": 2.5662, "step": 662 }, { "epoch": 1.5, "learning_rate": 1.926348858070591e-06, "loss": 2.4619, "step": 664 }, { "epoch": 1.51, "learning_rate": 1.9257958067391024e-06, "loss": 2.5436, "step": 666 }, { "epoch": 1.51, "learning_rate": 1.925240766713898e-06, "loss": 2.1889, "step": 668 }, { "epoch": 1.52, "learning_rate": 1.924683739187255e-06, "loss": 2.8072, "step": 670 }, { "epoch": 1.52, "learning_rate": 1.9241247253557184e-06, "loss": 2.6656, "step": 672 }, { "epoch": 1.52, "learning_rate": 1.9235637264201013e-06, "loss": 2.6298, "step": 674 }, { "epoch": 1.53, "learning_rate": 1.923000743585481e-06, "loss": 2.5295, "step": 676 }, { "epoch": 1.53, "learning_rate": 1.922435778061195e-06, "loss": 2.5569, "step": 678 }, { "epoch": 1.54, "learning_rate": 1.921868831060841e-06, "loss": 2.4206, "step": 680 }, { "epoch": 1.54, "learning_rate": 1.921299903802273e-06, "loss": 2.3958, "step": 682 }, { "epoch": 1.55, "learning_rate": 1.9207289975075986e-06, "loss": 2.4561, "step": 684 }, { "epoch": 1.55, "learning_rate": 1.920156113403176e-06, "loss": 2.7795, "step": 686 }, { "epoch": 1.56, "learning_rate": 1.9195812527196133e-06, "loss": 2.9221, "step": 688 }, { "epoch": 1.56, "learning_rate": 1.919004416691763e-06, "loss": 2.7184, "step": 690 }, { "epoch": 1.57, "learning_rate": 1.918425606558721e-06, "loss": 2.7425, "step": 692 }, { "epoch": 1.57, "learning_rate": 1.9178448235638255e-06, "loss": 2.7597, "step": 694 }, { "epoch": 1.57, "learning_rate": 1.91726206895465e-06, "loss": 2.512, "step": 696 }, { "epoch": 1.58, "learning_rate": 1.916677343983005e-06, "loss": 2.4675, "step": 698 }, { "epoch": 1.58, "learning_rate": 1.916090649904933e-06, "loss": 2.6547, "step": 700 }, { "epoch": 1.59, "learning_rate": 1.9155019879807064e-06, "loss": 2.7396, "step": 702 }, { "epoch": 1.59, "learning_rate": 1.9149113594748245e-06, "loss": 2.8, "step": 704 }, { "epoch": 1.6, "learning_rate": 1.914318765656011e-06, "loss": 2.4821, "step": 706 }, { "epoch": 1.6, "learning_rate": 1.913724207797212e-06, "loss": 2.5723, "step": 708 }, { "epoch": 1.61, "learning_rate": 1.9131276871755913e-06, "loss": 2.7257, "step": 710 }, { "epoch": 1.61, "learning_rate": 1.9125292050725296e-06, "loss": 2.6002, "step": 712 }, { "epoch": 1.61, "learning_rate": 1.9119287627736212e-06, "loss": 2.3223, "step": 714 }, { "epoch": 1.62, "learning_rate": 1.9113263615686704e-06, "loss": 2.6472, "step": 716 }, { "epoch": 1.62, "learning_rate": 1.9107220027516902e-06, "loss": 2.4824, "step": 718 }, { "epoch": 1.63, "learning_rate": 1.9101156876208984e-06, "loss": 2.9242, "step": 720 }, { "epoch": 1.63, "learning_rate": 1.9095074174787157e-06, "loss": 2.6182, "step": 722 }, { "epoch": 1.64, "learning_rate": 1.9088971936317603e-06, "loss": 2.6885, "step": 724 }, { "epoch": 1.64, "learning_rate": 1.90828501739085e-06, "loss": 2.382, "step": 726 }, { "epoch": 1.65, "learning_rate": 1.9076708900709945e-06, "loss": 2.6815, "step": 728 }, { "epoch": 1.65, "learning_rate": 1.907054812991395e-06, "loss": 2.739, "step": 730 }, { "epoch": 1.66, "learning_rate": 1.906436787475442e-06, "loss": 2.8119, "step": 732 }, { "epoch": 1.66, "learning_rate": 1.9058168148507097e-06, "loss": 2.492, "step": 734 }, { "epoch": 1.66, "learning_rate": 1.9051948964489562e-06, "loss": 2.9267, "step": 736 }, { "epoch": 1.67, "learning_rate": 1.9045710336061188e-06, "loss": 2.4951, "step": 738 }, { "epoch": 1.67, "learning_rate": 1.9039452276623115e-06, "loss": 2.6114, "step": 740 }, { "epoch": 1.68, "learning_rate": 1.903317479961823e-06, "loss": 2.7481, "step": 742 }, { "epoch": 1.68, "learning_rate": 1.9026877918531122e-06, "loss": 2.5025, "step": 744 }, { "epoch": 1.69, "learning_rate": 1.9020561646888065e-06, "loss": 2.5243, "step": 746 }, { "epoch": 1.69, "learning_rate": 1.9014225998256986e-06, "loss": 2.7307, "step": 748 }, { "epoch": 1.7, "learning_rate": 1.9007870986247436e-06, "loss": 2.4616, "step": 750 }, { "epoch": 1.7, "learning_rate": 1.900149662451056e-06, "loss": 2.3835, "step": 752 }, { "epoch": 1.71, "learning_rate": 1.8995102926739064e-06, "loss": 2.4583, "step": 754 }, { "epoch": 1.71, "learning_rate": 1.8988689906667196e-06, "loss": 2.4495, "step": 756 }, { "epoch": 1.71, "learning_rate": 1.8982257578070708e-06, "loss": 3.1206, "step": 758 }, { "epoch": 1.72, "learning_rate": 1.897580595476683e-06, "loss": 2.7375, "step": 760 }, { "epoch": 1.72, "learning_rate": 1.8969335050614231e-06, "loss": 2.6551, "step": 762 }, { "epoch": 1.73, "learning_rate": 1.8962844879513003e-06, "loss": 2.3353, "step": 764 }, { "epoch": 1.73, "learning_rate": 1.8956335455404626e-06, "loss": 2.7879, "step": 766 }, { "epoch": 1.74, "learning_rate": 1.894980679227194e-06, "loss": 2.5966, "step": 768 }, { "epoch": 1.74, "learning_rate": 1.89432589041391e-06, "loss": 2.5804, "step": 770 }, { "epoch": 1.75, "learning_rate": 1.8936691805071571e-06, "loss": 2.7343, "step": 772 }, { "epoch": 1.75, "learning_rate": 1.8930105509176082e-06, "loss": 2.5984, "step": 774 }, { "epoch": 1.76, "learning_rate": 1.8923500030600592e-06, "loss": 2.701, "step": 776 }, { "epoch": 1.76, "learning_rate": 1.8916875383534274e-06, "loss": 2.4911, "step": 778 }, { "epoch": 1.76, "learning_rate": 1.8910231582207473e-06, "loss": 2.3311, "step": 780 }, { "epoch": 1.77, "learning_rate": 1.8903568640891673e-06, "loss": 2.6318, "step": 782 }, { "epoch": 1.77, "learning_rate": 1.8896886573899487e-06, "loss": 2.2295, "step": 784 }, { "epoch": 1.78, "learning_rate": 1.88901853955846e-06, "loss": 2.7293, "step": 786 }, { "epoch": 1.78, "learning_rate": 1.8883465120341756e-06, "loss": 2.5001, "step": 788 }, { "epoch": 1.79, "learning_rate": 1.8876725762606714e-06, "loss": 2.2673, "step": 790 }, { "epoch": 1.79, "learning_rate": 1.886996733685623e-06, "loss": 2.9348, "step": 792 }, { "epoch": 1.8, "learning_rate": 1.886318985760802e-06, "loss": 2.6025, "step": 794 }, { "epoch": 1.8, "learning_rate": 1.8856393339420724e-06, "loss": 2.2533, "step": 796 }, { "epoch": 1.8, "learning_rate": 1.884957779689388e-06, "loss": 2.2767, "step": 798 }, { "epoch": 1.81, "learning_rate": 1.8842743244667903e-06, "loss": 2.6349, "step": 800 }, { "epoch": 1.81, "learning_rate": 1.8835889697424025e-06, "loss": 2.5554, "step": 802 }, { "epoch": 1.82, "learning_rate": 1.8829017169884293e-06, "loss": 2.4651, "step": 804 }, { "epoch": 1.82, "learning_rate": 1.8822125676811523e-06, "loss": 2.5952, "step": 806 }, { "epoch": 1.83, "learning_rate": 1.881521523300927e-06, "loss": 2.2587, "step": 808 }, { "epoch": 1.83, "learning_rate": 1.8808285853321793e-06, "loss": 2.3114, "step": 810 }, { "epoch": 1.84, "learning_rate": 1.880133755263403e-06, "loss": 2.7006, "step": 812 }, { "epoch": 1.84, "learning_rate": 1.8794370345871574e-06, "loss": 2.4779, "step": 814 }, { "epoch": 1.85, "learning_rate": 1.878738424800061e-06, "loss": 2.304, "step": 816 }, { "epoch": 1.85, "learning_rate": 1.8780379274027914e-06, "loss": 2.5801, "step": 818 }, { "epoch": 1.85, "learning_rate": 1.8773355439000808e-06, "loss": 2.5185, "step": 820 }, { "epoch": 1.86, "learning_rate": 1.876631275800713e-06, "loss": 2.5946, "step": 822 }, { "epoch": 1.86, "learning_rate": 1.8759251246175202e-06, "loss": 2.7588, "step": 824 }, { "epoch": 1.87, "learning_rate": 1.8752170918673794e-06, "loss": 2.7387, "step": 826 }, { "epoch": 1.87, "learning_rate": 1.8745071790712097e-06, "loss": 2.331, "step": 828 }, { "epoch": 1.88, "learning_rate": 1.8737953877539676e-06, "loss": 2.258, "step": 830 }, { "epoch": 1.88, "learning_rate": 1.8730817194446465e-06, "loss": 2.9485, "step": 832 }, { "epoch": 1.89, "learning_rate": 1.8723661756762703e-06, "loss": 2.3587, "step": 834 }, { "epoch": 1.89, "learning_rate": 1.871648757985893e-06, "loss": 2.6883, "step": 836 }, { "epoch": 1.9, "learning_rate": 1.870929467914592e-06, "loss": 2.8789, "step": 838 }, { "epoch": 1.9, "learning_rate": 1.8702083070074692e-06, "loss": 2.6571, "step": 840 }, { "epoch": 1.9, "learning_rate": 1.8694852768136431e-06, "loss": 2.6182, "step": 842 }, { "epoch": 1.91, "learning_rate": 1.8687603788862485e-06, "loss": 2.3781, "step": 844 }, { "epoch": 1.91, "learning_rate": 1.868033614782432e-06, "loss": 2.6386, "step": 846 }, { "epoch": 1.92, "learning_rate": 1.8673049860633496e-06, "loss": 2.7053, "step": 848 }, { "epoch": 1.92, "learning_rate": 1.866574494294162e-06, "loss": 2.7169, "step": 850 }, { "epoch": 1.93, "learning_rate": 1.8658421410440316e-06, "loss": 2.5135, "step": 852 }, { "epoch": 1.93, "learning_rate": 1.8651079278861203e-06, "loss": 2.5498, "step": 854 }, { "epoch": 1.94, "learning_rate": 1.864371856397585e-06, "loss": 2.6104, "step": 856 }, { "epoch": 1.94, "learning_rate": 1.8636339281595739e-06, "loss": 2.5974, "step": 858 }, { "epoch": 1.95, "learning_rate": 1.862894144757224e-06, "loss": 2.5574, "step": 860 }, { "epoch": 1.95, "learning_rate": 1.8621525077796582e-06, "loss": 2.4691, "step": 862 }, { "epoch": 1.95, "learning_rate": 1.8614090188199793e-06, "loss": 2.7892, "step": 864 }, { "epoch": 1.96, "learning_rate": 1.8606636794752699e-06, "loss": 2.4791, "step": 866 }, { "epoch": 1.96, "learning_rate": 1.8599164913465866e-06, "loss": 2.558, "step": 868 }, { "epoch": 1.97, "learning_rate": 1.8591674560389576e-06, "loss": 2.7124, "step": 870 }, { "epoch": 1.97, "learning_rate": 1.858416575161379e-06, "loss": 2.4838, "step": 872 }, { "epoch": 1.98, "learning_rate": 1.8576638503268111e-06, "loss": 2.6113, "step": 874 }, { "epoch": 1.98, "learning_rate": 1.8569092831521757e-06, "loss": 2.7273, "step": 876 }, { "epoch": 1.99, "learning_rate": 1.8561528752583518e-06, "loss": 2.6104, "step": 878 }, { "epoch": 1.99, "learning_rate": 1.8553946282701722e-06, "loss": 2.6673, "step": 880 }, { "epoch": 1.99, "learning_rate": 1.8546345438164206e-06, "loss": 2.4585, "step": 882 }, { "epoch": 2.0, "learning_rate": 1.8538726235298276e-06, "loss": 2.6186, "step": 884 }, { "epoch": 2.0, "learning_rate": 1.8531088690470677e-06, "loss": 2.627, "step": 886 }, { "epoch": 2.01, "learning_rate": 1.8523432820087548e-06, "loss": 2.4928, "step": 888 }, { "epoch": 2.01, "learning_rate": 1.8515758640594394e-06, "loss": 2.538, "step": 890 }, { "epoch": 2.02, "learning_rate": 1.8508066168476057e-06, "loss": 2.63, "step": 892 }, { "epoch": 2.02, "learning_rate": 1.8500355420256662e-06, "loss": 2.3831, "step": 894 }, { "epoch": 2.03, "learning_rate": 1.8492626412499602e-06, "loss": 2.5052, "step": 896 }, { "epoch": 2.03, "learning_rate": 1.848487916180749e-06, "loss": 2.3827, "step": 898 }, { "epoch": 2.04, "learning_rate": 1.847711368482212e-06, "loss": 2.4427, "step": 900 }, { "epoch": 2.04, "learning_rate": 1.8469329998224455e-06, "loss": 2.4639, "step": 902 }, { "epoch": 2.04, "learning_rate": 1.8461528118734551e-06, "loss": 2.6087, "step": 904 }, { "epoch": 2.05, "learning_rate": 1.8453708063111564e-06, "loss": 2.3654, "step": 906 }, { "epoch": 2.05, "learning_rate": 1.8445869848153684e-06, "loss": 2.6367, "step": 908 }, { "epoch": 2.06, "learning_rate": 1.8438013490698105e-06, "loss": 2.4576, "step": 910 }, { "epoch": 2.06, "learning_rate": 1.8430139007621007e-06, "loss": 2.8134, "step": 912 }, { "epoch": 2.07, "learning_rate": 1.842224641583749e-06, "loss": 2.623, "step": 914 }, { "epoch": 2.07, "learning_rate": 1.8414335732301564e-06, "loss": 2.3569, "step": 916 }, { "epoch": 2.08, "learning_rate": 1.8406406974006097e-06, "loss": 2.3732, "step": 918 }, { "epoch": 2.08, "learning_rate": 1.8398460157982783e-06, "loss": 2.3951, "step": 920 }, { "epoch": 2.09, "learning_rate": 1.839049530130211e-06, "loss": 2.8091, "step": 922 }, { "epoch": 2.09, "learning_rate": 1.838251242107331e-06, "loss": 2.6456, "step": 924 }, { "epoch": 2.09, "learning_rate": 1.8374511534444337e-06, "loss": 2.3516, "step": 926 }, { "epoch": 2.1, "learning_rate": 1.8366492658601831e-06, "loss": 2.6604, "step": 928 }, { "epoch": 2.1, "learning_rate": 1.8358455810771062e-06, "loss": 2.9681, "step": 930 }, { "epoch": 2.11, "learning_rate": 1.8350401008215912e-06, "loss": 2.3861, "step": 932 }, { "epoch": 2.11, "learning_rate": 1.8342328268238831e-06, "loss": 2.6136, "step": 934 }, { "epoch": 2.12, "learning_rate": 1.83342376081808e-06, "loss": 2.2425, "step": 936 }, { "epoch": 2.12, "learning_rate": 1.8326129045421295e-06, "loss": 2.3558, "step": 938 }, { "epoch": 2.13, "learning_rate": 1.8318002597378243e-06, "loss": 2.5504, "step": 940 }, { "epoch": 2.13, "learning_rate": 1.8309858281508e-06, "loss": 2.8984, "step": 942 }, { "epoch": 2.14, "learning_rate": 1.8301696115305294e-06, "loss": 2.607, "step": 944 }, { "epoch": 2.14, "learning_rate": 1.8293516116303205e-06, "loss": 2.6641, "step": 946 }, { "epoch": 2.14, "learning_rate": 1.8285318302073113e-06, "loss": 2.3655, "step": 948 }, { "epoch": 2.15, "learning_rate": 1.827710269022467e-06, "loss": 2.649, "step": 950 }, { "epoch": 2.15, "learning_rate": 1.8268869298405762e-06, "loss": 2.2951, "step": 952 }, { "epoch": 2.16, "learning_rate": 1.8260618144302459e-06, "loss": 2.8344, "step": 954 }, { "epoch": 2.16, "learning_rate": 1.8252349245638997e-06, "loss": 2.3341, "step": 956 }, { "epoch": 2.17, "learning_rate": 1.8244062620177721e-06, "loss": 2.3695, "step": 958 }, { "epoch": 2.17, "learning_rate": 1.8235758285719052e-06, "loss": 2.8024, "step": 960 }, { "epoch": 2.18, "learning_rate": 1.8227436260101468e-06, "loss": 2.5526, "step": 962 }, { "epoch": 2.18, "learning_rate": 1.821909656120143e-06, "loss": 2.47, "step": 964 }, { "epoch": 2.18, "learning_rate": 1.821073920693337e-06, "loss": 2.449, "step": 966 }, { "epoch": 2.19, "learning_rate": 1.820236421524965e-06, "loss": 2.4071, "step": 968 }, { "epoch": 2.19, "learning_rate": 1.8193971604140515e-06, "loss": 2.8168, "step": 970 }, { "epoch": 2.2, "learning_rate": 1.8185561391634056e-06, "loss": 2.561, "step": 972 }, { "epoch": 2.2, "learning_rate": 1.8177133595796177e-06, "loss": 2.2887, "step": 974 }, { "epoch": 2.21, "learning_rate": 1.816868823473055e-06, "loss": 2.2514, "step": 976 }, { "epoch": 2.21, "learning_rate": 1.816022532657858e-06, "loss": 2.6151, "step": 978 }, { "epoch": 2.22, "learning_rate": 1.8151744889519365e-06, "loss": 2.5533, "step": 980 }, { "epoch": 2.22, "learning_rate": 1.8143246941769657e-06, "loss": 2.4734, "step": 982 }, { "epoch": 2.23, "learning_rate": 1.8134731501583817e-06, "loss": 2.2393, "step": 984 }, { "epoch": 2.23, "learning_rate": 1.8126198587253794e-06, "loss": 2.5585, "step": 986 }, { "epoch": 2.23, "learning_rate": 1.811764821710906e-06, "loss": 2.4241, "step": 988 }, { "epoch": 2.24, "learning_rate": 1.810908040951659e-06, "loss": 2.7939, "step": 990 }, { "epoch": 2.24, "learning_rate": 1.8100495182880813e-06, "loss": 2.5703, "step": 992 }, { "epoch": 2.25, "learning_rate": 1.809189255564358e-06, "loss": 2.7578, "step": 994 }, { "epoch": 2.25, "learning_rate": 1.808327254628412e-06, "loss": 2.458, "step": 996 }, { "epoch": 2.26, "learning_rate": 1.8074635173318992e-06, "loss": 2.6176, "step": 998 }, { "epoch": 2.26, "learning_rate": 1.8065980455302066e-06, "loss": 2.365, "step": 1000 }, { "epoch": 2.27, "learning_rate": 1.8057308410824463e-06, "loss": 2.4624, "step": 1002 }, { "epoch": 2.27, "learning_rate": 1.8048619058514522e-06, "loss": 2.7063, "step": 1004 }, { "epoch": 2.28, "learning_rate": 1.8039912417037763e-06, "loss": 2.4129, "step": 1006 }, { "epoch": 2.28, "learning_rate": 1.803118850509685e-06, "loss": 2.6864, "step": 1008 }, { "epoch": 2.28, "learning_rate": 1.8022447341431538e-06, "loss": 3.0802, "step": 1010 }, { "epoch": 2.29, "learning_rate": 1.8013688944818638e-06, "loss": 2.3165, "step": 1012 }, { "epoch": 2.29, "learning_rate": 1.8004913334071992e-06, "loss": 2.4781, "step": 1014 }, { "epoch": 2.3, "learning_rate": 1.7996120528042402e-06, "loss": 2.4112, "step": 1016 }, { "epoch": 2.3, "learning_rate": 1.7987310545617622e-06, "loss": 2.3141, "step": 1018 }, { "epoch": 2.31, "learning_rate": 1.7978483405722296e-06, "loss": 2.2761, "step": 1020 }, { "epoch": 2.31, "learning_rate": 1.7969639127317925e-06, "loss": 2.5459, "step": 1022 }, { "epoch": 2.32, "learning_rate": 1.7960777729402823e-06, "loss": 2.5135, "step": 1024 }, { "epoch": 2.32, "learning_rate": 1.795189923101208e-06, "loss": 2.6812, "step": 1026 }, { "epoch": 2.33, "learning_rate": 1.7943003651217522e-06, "loss": 2.4027, "step": 1028 }, { "epoch": 2.33, "learning_rate": 1.7934091009127657e-06, "loss": 2.5999, "step": 1030 }, { "epoch": 2.33, "learning_rate": 1.7925161323887662e-06, "loss": 2.3978, "step": 1032 }, { "epoch": 2.34, "learning_rate": 1.7916214614679306e-06, "loss": 2.6241, "step": 1034 }, { "epoch": 2.34, "learning_rate": 1.790725090072094e-06, "loss": 2.7711, "step": 1036 }, { "epoch": 2.35, "learning_rate": 1.7898270201267436e-06, "loss": 2.2952, "step": 1038 }, { "epoch": 2.35, "learning_rate": 1.7889272535610158e-06, "loss": 2.5796, "step": 1040 }, { "epoch": 2.36, "learning_rate": 1.7880257923076907e-06, "loss": 2.148, "step": 1042 }, { "epoch": 2.36, "learning_rate": 1.7871226383031893e-06, "loss": 2.727, "step": 1044 }, { "epoch": 2.37, "learning_rate": 1.7862177934875687e-06, "loss": 2.3907, "step": 1046 }, { "epoch": 2.37, "learning_rate": 1.785311259804518e-06, "loss": 2.6242, "step": 1048 }, { "epoch": 2.37, "learning_rate": 1.7844030392013538e-06, "loss": 2.4537, "step": 1050 }, { "epoch": 2.38, "learning_rate": 1.7834931336290172e-06, "loss": 2.7705, "step": 1052 }, { "epoch": 2.38, "learning_rate": 1.782581545042068e-06, "loss": 2.393, "step": 1054 }, { "epoch": 2.39, "learning_rate": 1.7816682753986815e-06, "loss": 2.4858, "step": 1056 }, { "epoch": 2.39, "learning_rate": 1.780753326660644e-06, "loss": 2.3062, "step": 1058 }, { "epoch": 2.4, "learning_rate": 1.779836700793349e-06, "loss": 2.2885, "step": 1060 }, { "epoch": 2.4, "learning_rate": 1.7789183997657919e-06, "loss": 2.5484, "step": 1062 }, { "epoch": 2.41, "learning_rate": 1.7779984255505675e-06, "loss": 2.5681, "step": 1064 }, { "epoch": 2.41, "learning_rate": 1.777076780123864e-06, "loss": 2.3677, "step": 1066 }, { "epoch": 2.42, "learning_rate": 1.7761534654654593e-06, "loss": 2.3538, "step": 1068 }, { "epoch": 2.42, "learning_rate": 1.7752284835587182e-06, "loss": 2.6437, "step": 1070 }, { "epoch": 2.42, "learning_rate": 1.774301836390585e-06, "loss": 2.6875, "step": 1072 }, { "epoch": 2.43, "learning_rate": 1.773373525951583e-06, "loss": 2.413, "step": 1074 }, { "epoch": 2.43, "learning_rate": 1.7724435542358078e-06, "loss": 2.6612, "step": 1076 }, { "epoch": 2.44, "learning_rate": 1.7715119232409225e-06, "loss": 2.2541, "step": 1078 }, { "epoch": 2.44, "learning_rate": 1.7705786349681562e-06, "loss": 2.7833, "step": 1080 }, { "epoch": 2.45, "learning_rate": 1.7696436914222965e-06, "loss": 2.692, "step": 1082 }, { "epoch": 2.45, "learning_rate": 1.7687070946116874e-06, "loss": 2.5163, "step": 1084 }, { "epoch": 2.46, "learning_rate": 1.7677688465482244e-06, "loss": 2.4503, "step": 1086 }, { "epoch": 2.46, "learning_rate": 1.7668289492473493e-06, "loss": 2.2691, "step": 1088 }, { "epoch": 2.47, "learning_rate": 1.7658874047280476e-06, "loss": 2.6636, "step": 1090 }, { "epoch": 2.47, "learning_rate": 1.764944215012843e-06, "loss": 2.2402, "step": 1092 }, { "epoch": 2.47, "learning_rate": 1.7639993821277921e-06, "loss": 3.0009, "step": 1094 }, { "epoch": 2.48, "learning_rate": 1.763052908102482e-06, "loss": 2.3759, "step": 1096 }, { "epoch": 2.48, "learning_rate": 1.762104794970026e-06, "loss": 2.4261, "step": 1098 }, { "epoch": 2.49, "learning_rate": 1.7611550447670567e-06, "loss": 2.3872, "step": 1100 }, { "epoch": 2.49, "learning_rate": 1.7602036595337243e-06, "loss": 2.5853, "step": 1102 }, { "epoch": 2.5, "learning_rate": 1.7592506413136906e-06, "loss": 2.3782, "step": 1104 }, { "epoch": 2.5, "learning_rate": 1.7582959921541264e-06, "loss": 2.3908, "step": 1106 }, { "epoch": 2.51, "learning_rate": 1.7573397141057042e-06, "loss": 2.4113, "step": 1108 }, { "epoch": 2.51, "learning_rate": 1.7563818092225967e-06, "loss": 2.4058, "step": 1110 }, { "epoch": 2.52, "learning_rate": 1.7554222795624707e-06, "loss": 2.5073, "step": 1112 }, { "epoch": 2.52, "learning_rate": 1.754461127186483e-06, "loss": 2.4979, "step": 1114 }, { "epoch": 2.52, "learning_rate": 1.7534983541592773e-06, "loss": 2.491, "step": 1116 }, { "epoch": 2.53, "learning_rate": 1.7525339625489772e-06, "loss": 2.3299, "step": 1118 }, { "epoch": 2.53, "learning_rate": 1.7515679544271838e-06, "loss": 2.6209, "step": 1120 }, { "epoch": 2.54, "learning_rate": 1.7506003318689703e-06, "loss": 2.2329, "step": 1122 }, { "epoch": 2.54, "learning_rate": 1.7496310969528785e-06, "loss": 2.84, "step": 1124 }, { "epoch": 2.55, "learning_rate": 1.748660251760913e-06, "loss": 2.4634, "step": 1126 }, { "epoch": 2.55, "learning_rate": 1.7476877983785378e-06, "loss": 2.2628, "step": 1128 }, { "epoch": 2.56, "learning_rate": 1.7467137388946716e-06, "loss": 2.4435, "step": 1130 }, { "epoch": 2.56, "learning_rate": 1.7457380754016823e-06, "loss": 2.6127, "step": 1132 }, { "epoch": 2.56, "learning_rate": 1.7447608099953842e-06, "loss": 2.6242, "step": 1134 }, { "epoch": 2.57, "learning_rate": 1.7437819447750327e-06, "loss": 2.6045, "step": 1136 }, { "epoch": 2.57, "learning_rate": 1.742801481843319e-06, "loss": 2.4415, "step": 1138 }, { "epoch": 2.58, "learning_rate": 1.741819423306367e-06, "loss": 2.3979, "step": 1140 }, { "epoch": 2.58, "learning_rate": 1.740835771273728e-06, "loss": 2.6006, "step": 1142 }, { "epoch": 2.59, "learning_rate": 1.7398505278583758e-06, "loss": 2.6163, "step": 1144 }, { "epoch": 2.59, "learning_rate": 1.7388636951767035e-06, "loss": 2.396, "step": 1146 }, { "epoch": 2.6, "learning_rate": 1.7378752753485171e-06, "loss": 2.4038, "step": 1148 }, { "epoch": 2.6, "learning_rate": 1.7368852704970328e-06, "loss": 2.3577, "step": 1150 }, { "epoch": 2.61, "learning_rate": 1.735893682748871e-06, "loss": 2.8212, "step": 1152 }, { "epoch": 2.61, "learning_rate": 1.7349005142340522e-06, "loss": 2.6463, "step": 1154 }, { "epoch": 2.61, "learning_rate": 1.7339057670859936e-06, "loss": 2.5239, "step": 1156 }, { "epoch": 2.62, "learning_rate": 1.732909443441502e-06, "loss": 2.5437, "step": 1158 }, { "epoch": 2.62, "learning_rate": 1.7319115454407715e-06, "loss": 2.5785, "step": 1160 }, { "epoch": 2.63, "learning_rate": 1.730912075227378e-06, "loss": 2.2862, "step": 1162 }, { "epoch": 2.63, "learning_rate": 1.7299110349482747e-06, "loss": 2.1492, "step": 1164 }, { "epoch": 2.64, "learning_rate": 1.728908426753787e-06, "loss": 2.428, "step": 1166 }, { "epoch": 2.64, "learning_rate": 1.7279042527976085e-06, "loss": 2.4149, "step": 1168 }, { "epoch": 2.65, "learning_rate": 1.7268985152367967e-06, "loss": 2.5587, "step": 1170 }, { "epoch": 2.65, "learning_rate": 1.7258912162317675e-06, "loss": 2.4955, "step": 1172 }, { "epoch": 2.66, "learning_rate": 1.7248823579462904e-06, "loss": 2.4643, "step": 1174 }, { "epoch": 2.66, "learning_rate": 1.7238719425474852e-06, "loss": 2.4708, "step": 1176 }, { "epoch": 2.66, "learning_rate": 1.7228599722058164e-06, "loss": 2.3787, "step": 1178 }, { "epoch": 2.67, "learning_rate": 1.7218464490950884e-06, "loss": 2.6198, "step": 1180 }, { "epoch": 2.67, "learning_rate": 1.7208313753924408e-06, "loss": 2.3774, "step": 1182 }, { "epoch": 2.68, "learning_rate": 1.7198147532783448e-06, "loss": 2.5964, "step": 1184 }, { "epoch": 2.68, "learning_rate": 1.718796584936597e-06, "loss": 2.4021, "step": 1186 }, { "epoch": 2.69, "learning_rate": 1.7177768725543158e-06, "loss": 2.5438, "step": 1188 }, { "epoch": 2.69, "learning_rate": 1.7167556183219362e-06, "loss": 2.5724, "step": 1190 }, { "epoch": 2.7, "learning_rate": 1.7157328244332057e-06, "loss": 2.6797, "step": 1192 }, { "epoch": 2.7, "learning_rate": 1.7147084930851783e-06, "loss": 2.4916, "step": 1194 }, { "epoch": 2.71, "learning_rate": 1.7136826264782114e-06, "loss": 2.3222, "step": 1196 }, { "epoch": 2.71, "learning_rate": 1.7126552268159595e-06, "loss": 2.1401, "step": 1198 }, { "epoch": 2.71, "learning_rate": 1.7116262963053712e-06, "loss": 2.9593, "step": 1200 }, { "epoch": 2.72, "learning_rate": 1.7105958371566823e-06, "loss": 2.4008, "step": 1202 }, { "epoch": 2.72, "learning_rate": 1.709563851583413e-06, "loss": 2.6847, "step": 1204 }, { "epoch": 2.73, "learning_rate": 1.7085303418023626e-06, "loss": 2.208, "step": 1206 }, { "epoch": 2.73, "learning_rate": 1.7074953100336042e-06, "loss": 2.3636, "step": 1208 }, { "epoch": 2.74, "learning_rate": 1.7064587585004806e-06, "loss": 2.4801, "step": 1210 }, { "epoch": 2.74, "learning_rate": 1.705420689429598e-06, "loss": 2.4984, "step": 1212 }, { "epoch": 2.75, "learning_rate": 1.7043811050508244e-06, "loss": 2.5331, "step": 1214 }, { "epoch": 2.75, "learning_rate": 1.703340007597281e-06, "loss": 2.4208, "step": 1216 }, { "epoch": 2.75, "learning_rate": 1.70229739930534e-06, "loss": 2.3254, "step": 1218 }, { "epoch": 2.76, "learning_rate": 1.7012532824146195e-06, "loss": 2.4997, "step": 1220 }, { "epoch": 2.76, "learning_rate": 1.700207659167977e-06, "loss": 2.3298, "step": 1222 }, { "epoch": 2.77, "learning_rate": 1.699160531811507e-06, "loss": 2.4146, "step": 1224 }, { "epoch": 2.77, "learning_rate": 1.698111902594534e-06, "loss": 2.3754, "step": 1226 }, { "epoch": 2.78, "learning_rate": 1.6970617737696096e-06, "loss": 2.7424, "step": 1228 }, { "epoch": 2.78, "learning_rate": 1.696010147592506e-06, "loss": 2.6457, "step": 1230 }, { "epoch": 2.79, "learning_rate": 1.6949570263222116e-06, "loss": 2.5523, "step": 1232 }, { "epoch": 2.79, "learning_rate": 1.6939024122209268e-06, "loss": 2.424, "step": 1234 }, { "epoch": 2.8, "learning_rate": 1.6928463075540594e-06, "loss": 2.3549, "step": 1236 }, { "epoch": 2.8, "learning_rate": 1.6917887145902178e-06, "loss": 2.2031, "step": 1238 }, { "epoch": 2.8, "learning_rate": 1.6907296356012085e-06, "loss": 2.4517, "step": 1240 }, { "epoch": 2.81, "learning_rate": 1.6896690728620296e-06, "loss": 2.4635, "step": 1242 }, { "epoch": 2.81, "learning_rate": 1.6886070286508657e-06, "loss": 2.5939, "step": 1244 }, { "epoch": 2.82, "learning_rate": 1.6875435052490854e-06, "loss": 2.6403, "step": 1246 }, { "epoch": 2.82, "learning_rate": 1.6864785049412334e-06, "loss": 2.3347, "step": 1248 }, { "epoch": 2.83, "learning_rate": 1.6854120300150272e-06, "loss": 2.4685, "step": 1250 }, { "epoch": 2.83, "learning_rate": 1.6843440827613527e-06, "loss": 2.3795, "step": 1252 }, { "epoch": 2.84, "learning_rate": 1.6832746654742573e-06, "loss": 2.5659, "step": 1254 }, { "epoch": 2.84, "learning_rate": 1.6822037804509471e-06, "loss": 2.4002, "step": 1256 }, { "epoch": 2.85, "learning_rate": 1.6811314299917804e-06, "loss": 2.4262, "step": 1258 }, { "epoch": 2.85, "learning_rate": 1.6800576164002635e-06, "loss": 2.599, "step": 1260 }, { "epoch": 2.85, "learning_rate": 1.6789823419830463e-06, "loss": 2.4544, "step": 1262 }, { "epoch": 2.86, "learning_rate": 1.6779056090499158e-06, "loss": 2.3587, "step": 1264 }, { "epoch": 2.86, "learning_rate": 1.676827419913793e-06, "loss": 2.428, "step": 1266 }, { "epoch": 2.87, "learning_rate": 1.6757477768907254e-06, "loss": 2.4027, "step": 1268 }, { "epoch": 2.87, "learning_rate": 1.6746666822998851e-06, "loss": 2.675, "step": 1270 }, { "epoch": 2.88, "learning_rate": 1.6735841384635619e-06, "loss": 2.3356, "step": 1272 }, { "epoch": 2.88, "learning_rate": 1.6725001477071582e-06, "loss": 2.573, "step": 1274 }, { "epoch": 2.89, "learning_rate": 1.6714147123591854e-06, "loss": 2.2519, "step": 1276 }, { "epoch": 2.89, "learning_rate": 1.670327834751257e-06, "loss": 2.7857, "step": 1278 }, { "epoch": 2.9, "learning_rate": 1.6692395172180849e-06, "loss": 2.6319, "step": 1280 }, { "epoch": 2.9, "learning_rate": 1.668149762097475e-06, "loss": 2.6005, "step": 1282 }, { "epoch": 2.9, "learning_rate": 1.6670585717303201e-06, "loss": 2.4265, "step": 1284 }, { "epoch": 2.91, "learning_rate": 1.6659659484605966e-06, "loss": 2.3283, "step": 1286 }, { "epoch": 2.91, "learning_rate": 1.6648718946353584e-06, "loss": 2.684, "step": 1288 }, { "epoch": 2.92, "learning_rate": 1.6637764126047335e-06, "loss": 2.4864, "step": 1290 }, { "epoch": 2.92, "learning_rate": 1.6626795047219168e-06, "loss": 2.5547, "step": 1292 }, { "epoch": 2.93, "learning_rate": 1.661581173343166e-06, "loss": 2.6756, "step": 1294 }, { "epoch": 2.93, "learning_rate": 1.6604814208277972e-06, "loss": 2.5402, "step": 1296 }, { "epoch": 2.94, "learning_rate": 1.659380249538179e-06, "loss": 2.602, "step": 1298 }, { "epoch": 2.94, "learning_rate": 1.6582776618397277e-06, "loss": 2.4283, "step": 1300 }, { "epoch": 2.94, "learning_rate": 1.6571736601009021e-06, "loss": 2.4714, "step": 1302 }, { "epoch": 2.95, "learning_rate": 1.6560682466931981e-06, "loss": 2.4282, "step": 1304 }, { "epoch": 2.95, "learning_rate": 1.654961423991145e-06, "loss": 2.4965, "step": 1306 }, { "epoch": 2.96, "learning_rate": 1.653853194372298e-06, "loss": 2.4844, "step": 1308 }, { "epoch": 2.96, "learning_rate": 1.6527435602172361e-06, "loss": 2.5985, "step": 1310 }, { "epoch": 2.97, "learning_rate": 1.651632523909554e-06, "loss": 2.2583, "step": 1312 }, { "epoch": 2.97, "learning_rate": 1.650520087835859e-06, "loss": 2.6012, "step": 1314 }, { "epoch": 2.98, "learning_rate": 1.6494062543857651e-06, "loss": 2.5839, "step": 1316 }, { "epoch": 2.98, "learning_rate": 1.6482910259518881e-06, "loss": 2.719, "step": 1318 }, { "epoch": 2.99, "learning_rate": 1.64717440492984e-06, "loss": 2.7217, "step": 1320 }, { "epoch": 2.99, "learning_rate": 1.6460563937182248e-06, "loss": 2.4131, "step": 1322 }, { "epoch": 2.99, "learning_rate": 1.6449369947186321e-06, "loss": 2.4515, "step": 1324 }, { "epoch": 3.0, "learning_rate": 1.6438162103356332e-06, "loss": 2.412, "step": 1326 }, { "epoch": 3.0, "learning_rate": 1.642694042976775e-06, "loss": 2.2864, "step": 1328 }, { "epoch": 3.01, "learning_rate": 1.641570495052575e-06, "loss": 2.315, "step": 1330 }, { "epoch": 3.01, "learning_rate": 1.6404455689765165e-06, "loss": 2.4863, "step": 1332 }, { "epoch": 3.02, "learning_rate": 1.6393192671650433e-06, "loss": 2.7534, "step": 1334 }, { "epoch": 3.02, "learning_rate": 1.6381915920375539e-06, "loss": 2.4737, "step": 1336 }, { "epoch": 3.03, "learning_rate": 1.637062546016398e-06, "loss": 2.3454, "step": 1338 }, { "epoch": 3.03, "learning_rate": 1.6359321315268683e-06, "loss": 2.5099, "step": 1340 }, { "epoch": 3.04, "learning_rate": 1.634800350997199e-06, "loss": 2.3486, "step": 1342 }, { "epoch": 3.04, "learning_rate": 1.633667206858557e-06, "loss": 2.2076, "step": 1344 }, { "epoch": 3.04, "learning_rate": 1.6325327015450396e-06, "loss": 2.2752, "step": 1346 }, { "epoch": 3.05, "learning_rate": 1.6313968374936673e-06, "loss": 2.3892, "step": 1348 }, { "epoch": 3.05, "learning_rate": 1.6302596171443795e-06, "loss": 2.3764, "step": 1350 }, { "epoch": 3.06, "learning_rate": 1.6291210429400296e-06, "loss": 2.2785, "step": 1352 }, { "epoch": 3.06, "learning_rate": 1.627981117326378e-06, "loss": 2.5743, "step": 1354 }, { "epoch": 3.07, "learning_rate": 1.6268398427520894e-06, "loss": 2.7498, "step": 1356 }, { "epoch": 3.07, "learning_rate": 1.6256972216687248e-06, "loss": 2.1805, "step": 1358 }, { "epoch": 3.08, "learning_rate": 1.624553256530739e-06, "loss": 2.5828, "step": 1360 }, { "epoch": 3.08, "learning_rate": 1.6234079497954731e-06, "loss": 2.6088, "step": 1362 }, { "epoch": 3.09, "learning_rate": 1.6222613039231501e-06, "loss": 2.4188, "step": 1364 }, { "epoch": 3.09, "learning_rate": 1.62111332137687e-06, "loss": 2.2557, "step": 1366 }, { "epoch": 3.09, "learning_rate": 1.6199640046226035e-06, "loss": 2.6659, "step": 1368 }, { "epoch": 3.1, "learning_rate": 1.6188133561291884e-06, "loss": 2.0596, "step": 1370 }, { "epoch": 3.1, "learning_rate": 1.6176613783683218e-06, "loss": 2.5042, "step": 1372 }, { "epoch": 3.11, "learning_rate": 1.616508073814557e-06, "loss": 2.3132, "step": 1374 }, { "epoch": 3.11, "learning_rate": 1.6153534449452972e-06, "loss": 2.413, "step": 1376 }, { "epoch": 3.12, "learning_rate": 1.6141974942407907e-06, "loss": 2.596, "step": 1378 }, { "epoch": 3.12, "learning_rate": 1.6130402241841247e-06, "loss": 2.3572, "step": 1380 }, { "epoch": 3.13, "learning_rate": 1.6118816372612207e-06, "loss": 2.3489, "step": 1382 }, { "epoch": 3.13, "learning_rate": 1.6107217359608287e-06, "loss": 2.5228, "step": 1384 }, { "epoch": 3.13, "learning_rate": 1.6095605227745229e-06, "loss": 2.3761, "step": 1386 }, { "epoch": 3.14, "learning_rate": 1.608398000196694e-06, "loss": 2.5952, "step": 1388 }, { "epoch": 3.14, "learning_rate": 1.6072341707245474e-06, "loss": 2.1189, "step": 1390 }, { "epoch": 3.15, "learning_rate": 1.6060690368580944e-06, "loss": 2.2626, "step": 1392 }, { "epoch": 3.15, "learning_rate": 1.6049026011001488e-06, "loss": 2.5117, "step": 1394 }, { "epoch": 3.16, "learning_rate": 1.6037348659563205e-06, "loss": 2.3999, "step": 1396 }, { "epoch": 3.16, "learning_rate": 1.6025658339350112e-06, "loss": 2.2231, "step": 1398 }, { "epoch": 3.17, "learning_rate": 1.6013955075474083e-06, "loss": 2.3908, "step": 1400 }, { "epoch": 3.17, "learning_rate": 1.6002238893074794e-06, "loss": 2.1988, "step": 1402 }, { "epoch": 3.18, "learning_rate": 1.599050981731967e-06, "loss": 2.3146, "step": 1404 }, { "epoch": 3.18, "learning_rate": 1.597876787340383e-06, "loss": 2.6744, "step": 1406 }, { "epoch": 3.18, "learning_rate": 1.596701308655005e-06, "loss": 2.405, "step": 1408 }, { "epoch": 3.19, "learning_rate": 1.5955245482008674e-06, "loss": 2.3743, "step": 1410 }, { "epoch": 3.19, "learning_rate": 1.5943465085057594e-06, "loss": 2.6347, "step": 1412 }, { "epoch": 3.2, "learning_rate": 1.5931671921002172e-06, "loss": 2.4252, "step": 1414 }, { "epoch": 3.2, "learning_rate": 1.5919866015175197e-06, "loss": 2.6307, "step": 1416 }, { "epoch": 3.21, "learning_rate": 1.5908047392936835e-06, "loss": 2.1272, "step": 1418 }, { "epoch": 3.21, "learning_rate": 1.589621607967456e-06, "loss": 2.6618, "step": 1420 }, { "epoch": 3.22, "learning_rate": 1.5884372100803112e-06, "loss": 2.6405, "step": 1422 }, { "epoch": 3.22, "learning_rate": 1.5872515481764436e-06, "loss": 2.3396, "step": 1424 }, { "epoch": 3.23, "learning_rate": 1.5860646248027623e-06, "loss": 2.7622, "step": 1426 }, { "epoch": 3.23, "learning_rate": 1.5848764425088878e-06, "loss": 2.3926, "step": 1428 }, { "epoch": 3.23, "learning_rate": 1.5836870038471436e-06, "loss": 2.4872, "step": 1430 }, { "epoch": 3.24, "learning_rate": 1.582496311372552e-06, "loss": 2.3206, "step": 1432 }, { "epoch": 3.24, "learning_rate": 1.5813043676428293e-06, "loss": 2.3901, "step": 1434 }, { "epoch": 3.25, "learning_rate": 1.5801111752183788e-06, "loss": 2.4461, "step": 1436 }, { "epoch": 3.25, "learning_rate": 1.5789167366622866e-06, "loss": 2.5871, "step": 1438 }, { "epoch": 3.26, "learning_rate": 1.5777210545403154e-06, "loss": 2.6168, "step": 1440 }, { "epoch": 3.26, "learning_rate": 1.5765241314208998e-06, "loss": 2.4094, "step": 1442 }, { "epoch": 3.27, "learning_rate": 1.575325969875139e-06, "loss": 2.3476, "step": 1444 }, { "epoch": 3.27, "learning_rate": 1.5741265724767941e-06, "loss": 2.2706, "step": 1446 }, { "epoch": 3.28, "learning_rate": 1.5729259418022789e-06, "loss": 2.2549, "step": 1448 }, { "epoch": 3.28, "learning_rate": 1.5717240804306578e-06, "loss": 2.3836, "step": 1450 }, { "epoch": 3.28, "learning_rate": 1.5705209909436387e-06, "loss": 2.273, "step": 1452 }, { "epoch": 3.29, "learning_rate": 1.569316675925567e-06, "loss": 2.2781, "step": 1454 }, { "epoch": 3.29, "learning_rate": 1.568111137963422e-06, "loss": 2.4964, "step": 1456 }, { "epoch": 3.3, "learning_rate": 1.5669043796468078e-06, "loss": 2.452, "step": 1458 }, { "epoch": 3.3, "learning_rate": 1.5656964035679518e-06, "loss": 2.2103, "step": 1460 }, { "epoch": 3.31, "learning_rate": 1.5644872123216968e-06, "loss": 2.3263, "step": 1462 }, { "epoch": 3.31, "learning_rate": 1.5632768085054956e-06, "loss": 2.3993, "step": 1464 }, { "epoch": 3.32, "learning_rate": 1.5620651947194054e-06, "loss": 2.3741, "step": 1466 }, { "epoch": 3.32, "learning_rate": 1.5608523735660834e-06, "loss": 2.7935, "step": 1468 }, { "epoch": 3.32, "learning_rate": 1.55963834765078e-06, "loss": 2.5123, "step": 1470 }, { "epoch": 3.33, "learning_rate": 1.5584231195813332e-06, "loss": 2.5983, "step": 1472 }, { "epoch": 3.33, "learning_rate": 1.5572066919681634e-06, "loss": 2.4809, "step": 1474 }, { "epoch": 3.34, "learning_rate": 1.5559890674242687e-06, "loss": 2.3429, "step": 1476 }, { "epoch": 3.34, "learning_rate": 1.5547702485652164e-06, "loss": 2.5719, "step": 1478 }, { "epoch": 3.35, "learning_rate": 1.5535502380091411e-06, "loss": 2.4216, "step": 1480 }, { "epoch": 3.35, "learning_rate": 1.5523290383767366e-06, "loss": 2.6257, "step": 1482 }, { "epoch": 3.36, "learning_rate": 1.551106652291251e-06, "loss": 2.5949, "step": 1484 }, { "epoch": 3.36, "learning_rate": 1.5498830823784808e-06, "loss": 2.5236, "step": 1486 }, { "epoch": 3.37, "learning_rate": 1.5486583312667652e-06, "loss": 2.2841, "step": 1488 }, { "epoch": 3.37, "learning_rate": 1.5474324015869819e-06, "loss": 2.6608, "step": 1490 }, { "epoch": 3.37, "learning_rate": 1.5462052959725388e-06, "loss": 2.6327, "step": 1492 }, { "epoch": 3.38, "learning_rate": 1.5449770170593715e-06, "loss": 2.3065, "step": 1494 }, { "epoch": 3.38, "learning_rate": 1.5437475674859335e-06, "loss": 2.4162, "step": 1496 }, { "epoch": 3.39, "learning_rate": 1.542516949893196e-06, "loss": 2.5158, "step": 1498 }, { "epoch": 3.39, "learning_rate": 1.5412851669246368e-06, "loss": 2.4369, "step": 1500 }, { "epoch": 3.4, "learning_rate": 1.540052221226238e-06, "loss": 2.6495, "step": 1502 }, { "epoch": 3.4, "learning_rate": 1.5388181154464795e-06, "loss": 2.3576, "step": 1504 }, { "epoch": 3.41, "learning_rate": 1.5375828522363325e-06, "loss": 2.6325, "step": 1506 }, { "epoch": 3.41, "learning_rate": 1.5363464342492552e-06, "loss": 2.4328, "step": 1508 }, { "epoch": 3.42, "learning_rate": 1.5351088641411863e-06, "loss": 2.5354, "step": 1510 }, { "epoch": 3.42, "learning_rate": 1.5338701445705385e-06, "loss": 2.7207, "step": 1512 }, { "epoch": 3.42, "learning_rate": 1.5326302781981948e-06, "loss": 2.3158, "step": 1514 }, { "epoch": 3.43, "learning_rate": 1.5313892676875008e-06, "loss": 2.6291, "step": 1516 }, { "epoch": 3.43, "learning_rate": 1.5301471157042603e-06, "loss": 2.4181, "step": 1518 }, { "epoch": 3.44, "learning_rate": 1.5289038249167285e-06, "loss": 2.4438, "step": 1520 }, { "epoch": 3.44, "learning_rate": 1.5276593979956078e-06, "loss": 2.4456, "step": 1522 }, { "epoch": 3.45, "learning_rate": 1.5264138376140405e-06, "loss": 2.3465, "step": 1524 }, { "epoch": 3.45, "learning_rate": 1.5251671464476034e-06, "loss": 2.6112, "step": 1526 }, { "epoch": 3.46, "learning_rate": 1.5239193271743025e-06, "loss": 2.7099, "step": 1528 }, { "epoch": 3.46, "learning_rate": 1.5226703824745682e-06, "loss": 2.5169, "step": 1530 }, { "epoch": 3.47, "learning_rate": 1.5214203150312463e-06, "loss": 2.6179, "step": 1532 }, { "epoch": 3.47, "learning_rate": 1.5201691275295963e-06, "loss": 2.6206, "step": 1534 }, { "epoch": 3.47, "learning_rate": 1.5189168226572825e-06, "loss": 2.4832, "step": 1536 }, { "epoch": 3.48, "learning_rate": 1.5176634031043706e-06, "loss": 2.4303, "step": 1538 }, { "epoch": 3.48, "learning_rate": 1.5164088715633185e-06, "loss": 2.3131, "step": 1540 }, { "epoch": 3.49, "learning_rate": 1.515153230728976e-06, "loss": 2.4736, "step": 1542 }, { "epoch": 3.49, "learning_rate": 1.5138964832985724e-06, "loss": 2.3829, "step": 1544 }, { "epoch": 3.5, "learning_rate": 1.5126386319717166e-06, "loss": 2.3293, "step": 1546 }, { "epoch": 3.5, "learning_rate": 1.5113796794503875e-06, "loss": 2.5245, "step": 1548 }, { "epoch": 3.51, "learning_rate": 1.5101196284389297e-06, "loss": 2.6711, "step": 1550 }, { "epoch": 3.51, "learning_rate": 1.5088584816440482e-06, "loss": 2.2194, "step": 1552 }, { "epoch": 3.51, "learning_rate": 1.5075962417748e-06, "loss": 2.6407, "step": 1554 }, { "epoch": 3.52, "learning_rate": 1.5063329115425923e-06, "loss": 2.5811, "step": 1556 }, { "epoch": 3.52, "learning_rate": 1.505068493661173e-06, "loss": 2.533, "step": 1558 }, { "epoch": 3.53, "learning_rate": 1.503802990846627e-06, "loss": 2.3742, "step": 1560 }, { "epoch": 3.53, "learning_rate": 1.50253640581737e-06, "loss": 2.4339, "step": 1562 }, { "epoch": 3.54, "learning_rate": 1.5012687412941412e-06, "loss": 2.1867, "step": 1564 }, { "epoch": 3.54, "learning_rate": 1.5e-06, "loss": 2.3181, "step": 1566 }, { "epoch": 3.55, "learning_rate": 1.4987301846603183e-06, "loss": 2.5327, "step": 1568 }, { "epoch": 3.55, "learning_rate": 1.4974592980027749e-06, "loss": 2.4644, "step": 1570 }, { "epoch": 3.56, "learning_rate": 1.49618734275735e-06, "loss": 2.3521, "step": 1572 }, { "epoch": 3.56, "learning_rate": 1.4949143216563195e-06, "loss": 2.323, "step": 1574 }, { "epoch": 3.56, "learning_rate": 1.493640237434249e-06, "loss": 2.4037, "step": 1576 }, { "epoch": 3.57, "learning_rate": 1.4923650928279867e-06, "loss": 2.2227, "step": 1578 }, { "epoch": 3.57, "learning_rate": 1.4910888905766602e-06, "loss": 2.5474, "step": 1580 }, { "epoch": 3.58, "learning_rate": 1.4898116334216673e-06, "loss": 2.165, "step": 1582 }, { "epoch": 3.58, "learning_rate": 1.4885333241066734e-06, "loss": 2.5061, "step": 1584 }, { "epoch": 3.59, "learning_rate": 1.4872539653776028e-06, "loss": 2.3724, "step": 1586 }, { "epoch": 3.59, "learning_rate": 1.4859735599826352e-06, "loss": 2.6099, "step": 1588 }, { "epoch": 3.6, "learning_rate": 1.4846921106721974e-06, "loss": 2.5928, "step": 1590 }, { "epoch": 3.6, "learning_rate": 1.4834096201989596e-06, "loss": 2.2369, "step": 1592 }, { "epoch": 3.61, "learning_rate": 1.4821260913178282e-06, "loss": 2.3638, "step": 1594 }, { "epoch": 3.61, "learning_rate": 1.4808415267859398e-06, "loss": 2.3177, "step": 1596 }, { "epoch": 3.61, "learning_rate": 1.4795559293626562e-06, "loss": 2.2792, "step": 1598 }, { "epoch": 3.62, "learning_rate": 1.4782693018095577e-06, "loss": 2.3882, "step": 1600 }, { "epoch": 3.62, "learning_rate": 1.4769816468904378e-06, "loss": 2.3006, "step": 1602 }, { "epoch": 3.63, "learning_rate": 1.4756929673712962e-06, "loss": 2.2557, "step": 1604 }, { "epoch": 3.63, "learning_rate": 1.4744032660203339e-06, "loss": 2.5488, "step": 1606 }, { "epoch": 3.64, "learning_rate": 1.4731125456079467e-06, "loss": 2.3487, "step": 1608 }, { "epoch": 3.64, "learning_rate": 1.47182080890672e-06, "loss": 2.641, "step": 1610 }, { "epoch": 3.65, "learning_rate": 1.4705280586914218e-06, "loss": 2.1686, "step": 1612 }, { "epoch": 3.65, "learning_rate": 1.4692342977389976e-06, "loss": 2.3053, "step": 1614 }, { "epoch": 3.66, "learning_rate": 1.4679395288285627e-06, "loss": 2.247, "step": 1616 }, { "epoch": 3.66, "learning_rate": 1.4666437547413999e-06, "loss": 2.6372, "step": 1618 }, { "epoch": 3.66, "learning_rate": 1.465346978260949e-06, "loss": 2.6255, "step": 1620 }, { "epoch": 3.67, "learning_rate": 1.4640492021728043e-06, "loss": 2.1904, "step": 1622 }, { "epoch": 3.67, "learning_rate": 1.4627504292647074e-06, "loss": 2.5925, "step": 1624 }, { "epoch": 3.68, "learning_rate": 1.4614506623265406e-06, "loss": 2.6503, "step": 1626 }, { "epoch": 3.68, "learning_rate": 1.4601499041503216e-06, "loss": 2.3066, "step": 1628 }, { "epoch": 3.69, "learning_rate": 1.458848157530197e-06, "loss": 2.4649, "step": 1630 }, { "epoch": 3.69, "learning_rate": 1.4575454252624383e-06, "loss": 2.2839, "step": 1632 }, { "epoch": 3.7, "learning_rate": 1.4562417101454316e-06, "loss": 2.5123, "step": 1634 }, { "epoch": 3.7, "learning_rate": 1.4549370149796769e-06, "loss": 2.3119, "step": 1636 }, { "epoch": 3.7, "learning_rate": 1.4536313425677775e-06, "loss": 2.4356, "step": 1638 }, { "epoch": 3.71, "learning_rate": 1.452324695714437e-06, "loss": 2.5462, "step": 1640 }, { "epoch": 3.71, "learning_rate": 1.451017077226452e-06, "loss": 2.4099, "step": 1642 }, { "epoch": 3.72, "learning_rate": 1.4497084899127056e-06, "loss": 2.4389, "step": 1644 }, { "epoch": 3.72, "learning_rate": 1.4483989365841632e-06, "loss": 2.7143, "step": 1646 }, { "epoch": 3.73, "learning_rate": 1.447088420053864e-06, "loss": 2.5341, "step": 1648 }, { "epoch": 3.73, "learning_rate": 1.4457769431369173e-06, "loss": 2.6033, "step": 1650 }, { "epoch": 3.74, "learning_rate": 1.4444645086504947e-06, "loss": 2.2592, "step": 1652 }, { "epoch": 3.74, "learning_rate": 1.443151119413825e-06, "loss": 2.3131, "step": 1654 }, { "epoch": 3.75, "learning_rate": 1.4418367782481882e-06, "loss": 2.5731, "step": 1656 }, { "epoch": 3.75, "learning_rate": 1.4405214879769084e-06, "loss": 2.6759, "step": 1658 }, { "epoch": 3.75, "learning_rate": 1.4392052514253497e-06, "loss": 2.5662, "step": 1660 }, { "epoch": 3.76, "learning_rate": 1.437888071420907e-06, "loss": 2.365, "step": 1662 }, { "epoch": 3.76, "learning_rate": 1.4365699507930039e-06, "loss": 2.3041, "step": 1664 }, { "epoch": 3.77, "learning_rate": 1.4352508923730829e-06, "loss": 2.2466, "step": 1666 }, { "epoch": 3.77, "learning_rate": 1.433930898994602e-06, "loss": 2.6212, "step": 1668 }, { "epoch": 3.78, "learning_rate": 1.432609973493027e-06, "loss": 2.3997, "step": 1670 }, { "epoch": 3.78, "learning_rate": 1.431288118705826e-06, "loss": 2.4061, "step": 1672 }, { "epoch": 3.79, "learning_rate": 1.4299653374724642e-06, "loss": 2.6446, "step": 1674 }, { "epoch": 3.79, "learning_rate": 1.4286416326343957e-06, "loss": 2.3368, "step": 1676 }, { "epoch": 3.8, "learning_rate": 1.4273170070350587e-06, "loss": 2.2844, "step": 1678 }, { "epoch": 3.8, "learning_rate": 1.4259914635198701e-06, "loss": 2.3913, "step": 1680 }, { "epoch": 3.8, "learning_rate": 1.4246650049362185e-06, "loss": 2.271, "step": 1682 }, { "epoch": 3.81, "learning_rate": 1.4233376341334567e-06, "loss": 2.2982, "step": 1684 }, { "epoch": 3.81, "learning_rate": 1.422009353962899e-06, "loss": 2.1357, "step": 1686 }, { "epoch": 3.82, "learning_rate": 1.4206801672778117e-06, "loss": 2.1893, "step": 1688 }, { "epoch": 3.82, "learning_rate": 1.4193500769334088e-06, "loss": 2.5499, "step": 1690 }, { "epoch": 3.83, "learning_rate": 1.4180190857868459e-06, "loss": 2.4208, "step": 1692 }, { "epoch": 3.83, "learning_rate": 1.4166871966972132e-06, "loss": 2.5682, "step": 1694 }, { "epoch": 3.84, "learning_rate": 1.4153544125255291e-06, "loss": 2.5289, "step": 1696 }, { "epoch": 3.84, "learning_rate": 1.4140207361347364e-06, "loss": 2.622, "step": 1698 }, { "epoch": 3.85, "learning_rate": 1.4126861703896926e-06, "loss": 2.3856, "step": 1700 }, { "epoch": 3.85, "learning_rate": 1.4113507181571669e-06, "loss": 2.3768, "step": 1702 }, { "epoch": 3.85, "learning_rate": 1.4100143823058324e-06, "loss": 2.3536, "step": 1704 }, { "epoch": 3.86, "learning_rate": 1.4086771657062597e-06, "loss": 2.4242, "step": 1706 }, { "epoch": 3.86, "learning_rate": 1.4073390712309126e-06, "loss": 2.5667, "step": 1708 }, { "epoch": 3.87, "learning_rate": 1.4060001017541395e-06, "loss": 2.2116, "step": 1710 }, { "epoch": 3.87, "learning_rate": 1.4046602601521688e-06, "loss": 2.3444, "step": 1712 }, { "epoch": 3.88, "learning_rate": 1.4033195493031021e-06, "loss": 2.4935, "step": 1714 }, { "epoch": 3.88, "learning_rate": 1.4019779720869088e-06, "loss": 2.4833, "step": 1716 }, { "epoch": 3.89, "learning_rate": 1.4006355313854189e-06, "loss": 2.7645, "step": 1718 }, { "epoch": 3.89, "learning_rate": 1.3992922300823166e-06, "loss": 2.5373, "step": 1720 }, { "epoch": 3.89, "learning_rate": 1.3979480710631366e-06, "loss": 2.3545, "step": 1722 }, { "epoch": 3.9, "learning_rate": 1.3966030572152542e-06, "loss": 2.5239, "step": 1724 }, { "epoch": 3.9, "learning_rate": 1.395257191427882e-06, "loss": 2.5872, "step": 1726 }, { "epoch": 3.91, "learning_rate": 1.3939104765920616e-06, "loss": 2.3978, "step": 1728 }, { "epoch": 3.91, "learning_rate": 1.39256291560066e-06, "loss": 2.5142, "step": 1730 }, { "epoch": 3.92, "learning_rate": 1.3912145113483606e-06, "loss": 2.7417, "step": 1732 }, { "epoch": 3.92, "learning_rate": 1.3898652667316582e-06, "loss": 2.3602, "step": 1734 }, { "epoch": 3.93, "learning_rate": 1.3885151846488536e-06, "loss": 2.3813, "step": 1736 }, { "epoch": 3.93, "learning_rate": 1.387164268000046e-06, "loss": 2.5534, "step": 1738 }, { "epoch": 3.94, "learning_rate": 1.385812519687127e-06, "loss": 2.5258, "step": 1740 }, { "epoch": 3.94, "learning_rate": 1.3844599426137755e-06, "loss": 2.6035, "step": 1742 }, { "epoch": 3.94, "learning_rate": 1.3831065396854507e-06, "loss": 2.5751, "step": 1744 }, { "epoch": 3.95, "learning_rate": 1.3817523138093842e-06, "loss": 2.5558, "step": 1746 }, { "epoch": 3.95, "learning_rate": 1.3803972678945774e-06, "loss": 2.0873, "step": 1748 }, { "epoch": 3.96, "learning_rate": 1.379041404851792e-06, "loss": 2.1898, "step": 1750 }, { "epoch": 3.96, "learning_rate": 1.3776847275935456e-06, "loss": 2.5692, "step": 1752 }, { "epoch": 3.97, "learning_rate": 1.3763272390341042e-06, "loss": 2.4294, "step": 1754 }, { "epoch": 3.97, "learning_rate": 1.374968942089477e-06, "loss": 2.429, "step": 1756 }, { "epoch": 3.98, "learning_rate": 1.3736098396774094e-06, "loss": 2.4356, "step": 1758 }, { "epoch": 3.98, "learning_rate": 1.3722499347173772e-06, "loss": 2.2805, "step": 1760 }, { "epoch": 3.99, "learning_rate": 1.3708892301305807e-06, "loss": 2.4228, "step": 1762 }, { "epoch": 3.99, "learning_rate": 1.3695277288399365e-06, "loss": 2.2363, "step": 1764 }, { "epoch": 3.99, "learning_rate": 1.3681654337700736e-06, "loss": 2.426, "step": 1766 }, { "epoch": 4.0, "learning_rate": 1.3668023478473264e-06, "loss": 2.7234, "step": 1768 }, { "epoch": 4.0, "learning_rate": 1.365438473999727e-06, "loss": 2.5752, "step": 1770 }, { "epoch": 4.01, "learning_rate": 1.3640738151570005e-06, "loss": 2.4224, "step": 1772 }, { "epoch": 4.01, "learning_rate": 1.362708374250559e-06, "loss": 2.4265, "step": 1774 }, { "epoch": 4.02, "learning_rate": 1.3613421542134936e-06, "loss": 2.4323, "step": 1776 }, { "epoch": 4.02, "learning_rate": 1.3599751579805697e-06, "loss": 2.2774, "step": 1778 }, { "epoch": 4.03, "learning_rate": 1.358607388488219e-06, "loss": 2.5681, "step": 1780 }, { "epoch": 4.03, "learning_rate": 1.357238848674536e-06, "loss": 2.2788, "step": 1782 }, { "epoch": 4.04, "learning_rate": 1.3558695414792677e-06, "loss": 2.5053, "step": 1784 }, { "epoch": 4.04, "learning_rate": 1.3544994698438115e-06, "loss": 2.4669, "step": 1786 }, { "epoch": 4.04, "learning_rate": 1.3531286367112058e-06, "loss": 2.2683, "step": 1788 }, { "epoch": 4.05, "learning_rate": 1.351757045026125e-06, "loss": 2.2351, "step": 1790 }, { "epoch": 4.05, "learning_rate": 1.350384697734873e-06, "loss": 2.3426, "step": 1792 }, { "epoch": 4.06, "learning_rate": 1.3490115977853772e-06, "loss": 2.3011, "step": 1794 }, { "epoch": 4.06, "learning_rate": 1.34763774812718e-06, "loss": 2.3215, "step": 1796 }, { "epoch": 4.07, "learning_rate": 1.3462631517114374e-06, "loss": 2.4746, "step": 1798 }, { "epoch": 4.07, "learning_rate": 1.3448878114909064e-06, "loss": 2.4723, "step": 1800 }, { "epoch": 4.08, "learning_rate": 1.3435117304199434e-06, "loss": 2.5015, "step": 1802 }, { "epoch": 4.08, "learning_rate": 1.3421349114544962e-06, "loss": 2.3998, "step": 1804 }, { "epoch": 4.08, "learning_rate": 1.3407573575520973e-06, "loss": 2.2199, "step": 1806 }, { "epoch": 4.09, "learning_rate": 1.3393790716718577e-06, "loss": 2.1974, "step": 1808 }, { "epoch": 4.09, "learning_rate": 1.3380000567744608e-06, "loss": 2.4168, "step": 1810 }, { "epoch": 4.1, "learning_rate": 1.3366203158221566e-06, "loss": 2.4152, "step": 1812 }, { "epoch": 4.1, "learning_rate": 1.335239851778754e-06, "loss": 2.4792, "step": 1814 }, { "epoch": 4.11, "learning_rate": 1.3338586676096157e-06, "loss": 2.267, "step": 1816 }, { "epoch": 4.11, "learning_rate": 1.3324767662816513e-06, "loss": 2.2245, "step": 1818 }, { "epoch": 4.12, "learning_rate": 1.3310941507633107e-06, "loss": 2.1719, "step": 1820 }, { "epoch": 4.12, "learning_rate": 1.3297108240245775e-06, "loss": 2.5786, "step": 1822 }, { "epoch": 4.13, "learning_rate": 1.3283267890369638e-06, "loss": 2.4891, "step": 1824 }, { "epoch": 4.13, "learning_rate": 1.326942048773503e-06, "loss": 2.232, "step": 1826 }, { "epoch": 4.13, "learning_rate": 1.3255566062087428e-06, "loss": 2.2262, "step": 1828 }, { "epoch": 4.14, "learning_rate": 1.3241704643187408e-06, "loss": 2.1527, "step": 1830 }, { "epoch": 4.14, "learning_rate": 1.3227836260810555e-06, "loss": 2.5635, "step": 1832 }, { "epoch": 4.15, "learning_rate": 1.3213960944747416e-06, "loss": 2.6371, "step": 1834 }, { "epoch": 4.15, "learning_rate": 1.3200078724803437e-06, "loss": 2.1861, "step": 1836 }, { "epoch": 4.16, "learning_rate": 1.3186189630798892e-06, "loss": 2.2795, "step": 1838 }, { "epoch": 4.16, "learning_rate": 1.3172293692568816e-06, "loss": 2.363, "step": 1840 }, { "epoch": 4.17, "learning_rate": 1.3158390939962952e-06, "loss": 2.8042, "step": 1842 }, { "epoch": 4.17, "learning_rate": 1.3144481402845677e-06, "loss": 2.4579, "step": 1844 }, { "epoch": 4.18, "learning_rate": 1.3130565111095947e-06, "loss": 2.486, "step": 1846 }, { "epoch": 4.18, "learning_rate": 1.3116642094607221e-06, "loss": 2.3256, "step": 1848 }, { "epoch": 4.18, "learning_rate": 1.3102712383287411e-06, "loss": 2.5683, "step": 1850 }, { "epoch": 4.19, "learning_rate": 1.30887760070588e-06, "loss": 2.3407, "step": 1852 }, { "epoch": 4.19, "learning_rate": 1.3074832995857996e-06, "loss": 2.2962, "step": 1854 }, { "epoch": 4.2, "learning_rate": 1.3060883379635855e-06, "loss": 2.7058, "step": 1856 }, { "epoch": 4.2, "learning_rate": 1.304692718835743e-06, "loss": 2.3485, "step": 1858 }, { "epoch": 4.21, "learning_rate": 1.3032964452001886e-06, "loss": 2.6635, "step": 1860 }, { "epoch": 4.21, "learning_rate": 1.301899520056245e-06, "loss": 2.1123, "step": 1862 }, { "epoch": 4.22, "learning_rate": 1.3005019464046352e-06, "loss": 2.4141, "step": 1864 }, { "epoch": 4.22, "learning_rate": 1.2991037272474743e-06, "loss": 2.5197, "step": 1866 }, { "epoch": 4.23, "learning_rate": 1.2977048655882644e-06, "loss": 2.2944, "step": 1868 }, { "epoch": 4.23, "learning_rate": 1.296305364431888e-06, "loss": 2.6415, "step": 1870 }, { "epoch": 4.23, "learning_rate": 1.294905226784601e-06, "loss": 2.3808, "step": 1872 }, { "epoch": 4.24, "learning_rate": 1.2935044556540265e-06, "loss": 2.4877, "step": 1874 }, { "epoch": 4.24, "learning_rate": 1.2921030540491482e-06, "loss": 2.6193, "step": 1876 }, { "epoch": 4.25, "learning_rate": 1.2907010249803052e-06, "loss": 2.3594, "step": 1878 }, { "epoch": 4.25, "learning_rate": 1.2892983714591825e-06, "loss": 2.17, "step": 1880 }, { "epoch": 4.26, "learning_rate": 1.2878950964988086e-06, "loss": 2.4788, "step": 1882 }, { "epoch": 4.26, "learning_rate": 1.2864912031135455e-06, "loss": 2.4027, "step": 1884 }, { "epoch": 4.27, "learning_rate": 1.2850866943190843e-06, "loss": 2.4353, "step": 1886 }, { "epoch": 4.27, "learning_rate": 1.2836815731324375e-06, "loss": 2.6185, "step": 1888 }, { "epoch": 4.27, "learning_rate": 1.2822758425719336e-06, "loss": 2.2183, "step": 1890 }, { "epoch": 4.28, "learning_rate": 1.2808695056572098e-06, "loss": 2.3426, "step": 1892 }, { "epoch": 4.28, "learning_rate": 1.2794625654092057e-06, "loss": 2.2975, "step": 1894 }, { "epoch": 4.29, "learning_rate": 1.2780550248501577e-06, "loss": 2.5333, "step": 1896 }, { "epoch": 4.29, "learning_rate": 1.2766468870035905e-06, "loss": 2.3948, "step": 1898 }, { "epoch": 4.3, "learning_rate": 1.2752381548943122e-06, "loss": 2.161, "step": 1900 }, { "epoch": 4.3, "learning_rate": 1.2738288315484088e-06, "loss": 2.7284, "step": 1902 }, { "epoch": 4.31, "learning_rate": 1.272418919993234e-06, "loss": 2.2664, "step": 1904 }, { "epoch": 4.31, "learning_rate": 1.2710084232574073e-06, "loss": 2.4391, "step": 1906 }, { "epoch": 4.32, "learning_rate": 1.2695973443708035e-06, "loss": 2.5834, "step": 1908 }, { "epoch": 4.32, "learning_rate": 1.2681856863645485e-06, "loss": 2.4076, "step": 1910 }, { "epoch": 4.32, "learning_rate": 1.2667734522710128e-06, "loss": 2.4514, "step": 1912 }, { "epoch": 4.33, "learning_rate": 1.2653606451238035e-06, "loss": 2.2832, "step": 1914 }, { "epoch": 4.33, "learning_rate": 1.263947267957759e-06, "loss": 2.4629, "step": 1916 }, { "epoch": 4.34, "learning_rate": 1.2625333238089421e-06, "loss": 2.5803, "step": 1918 }, { "epoch": 4.34, "learning_rate": 1.2611188157146341e-06, "loss": 2.2946, "step": 1920 }, { "epoch": 4.35, "learning_rate": 1.2597037467133267e-06, "loss": 2.4157, "step": 1922 }, { "epoch": 4.35, "learning_rate": 1.2582881198447172e-06, "loss": 2.4456, "step": 1924 }, { "epoch": 4.36, "learning_rate": 1.2568719381497013e-06, "loss": 1.9894, "step": 1926 }, { "epoch": 4.36, "learning_rate": 1.2554552046703658e-06, "loss": 2.2723, "step": 1928 }, { "epoch": 4.37, "learning_rate": 1.2540379224499838e-06, "loss": 2.5817, "step": 1930 }, { "epoch": 4.37, "learning_rate": 1.2526200945330056e-06, "loss": 2.2494, "step": 1932 }, { "epoch": 4.37, "learning_rate": 1.251201723965056e-06, "loss": 2.4479, "step": 1934 }, { "epoch": 4.38, "learning_rate": 1.2497828137929233e-06, "loss": 2.1995, "step": 1936 }, { "epoch": 4.38, "learning_rate": 1.2483633670645562e-06, "loss": 2.2064, "step": 1938 }, { "epoch": 4.39, "learning_rate": 1.2469433868290552e-06, "loss": 2.1307, "step": 1940 }, { "epoch": 4.39, "learning_rate": 1.2455228761366674e-06, "loss": 2.3635, "step": 1942 }, { "epoch": 4.4, "learning_rate": 1.244101838038779e-06, "loss": 2.2071, "step": 1944 }, { "epoch": 4.4, "learning_rate": 1.2426802755879096e-06, "loss": 2.4216, "step": 1946 }, { "epoch": 4.41, "learning_rate": 1.2412581918377045e-06, "loss": 2.4252, "step": 1948 }, { "epoch": 4.41, "learning_rate": 1.2398355898429295e-06, "loss": 2.7158, "step": 1950 }, { "epoch": 4.42, "learning_rate": 1.2384124726594626e-06, "loss": 2.1449, "step": 1952 }, { "epoch": 4.42, "learning_rate": 1.23698884334429e-06, "loss": 2.2603, "step": 1954 }, { "epoch": 4.42, "learning_rate": 1.235564704955496e-06, "loss": 2.5274, "step": 1956 }, { "epoch": 4.43, "learning_rate": 1.2341400605522604e-06, "loss": 2.4358, "step": 1958 }, { "epoch": 4.43, "learning_rate": 1.2327149131948492e-06, "loss": 2.3367, "step": 1960 }, { "epoch": 4.44, "learning_rate": 1.2312892659446083e-06, "loss": 2.2516, "step": 1962 }, { "epoch": 4.44, "learning_rate": 1.2298631218639584e-06, "loss": 2.5467, "step": 1964 }, { "epoch": 4.45, "learning_rate": 1.228436484016386e-06, "loss": 2.6495, "step": 1966 }, { "epoch": 4.45, "learning_rate": 1.22700935546644e-06, "loss": 2.3837, "step": 1968 }, { "epoch": 4.46, "learning_rate": 1.225581739279722e-06, "loss": 2.4825, "step": 1970 }, { "epoch": 4.46, "learning_rate": 1.224153638522882e-06, "loss": 2.3524, "step": 1972 }, { "epoch": 4.46, "learning_rate": 1.2227250562636098e-06, "loss": 2.6791, "step": 1974 }, { "epoch": 4.47, "learning_rate": 1.2212959955706309e-06, "loss": 2.3547, "step": 1976 }, { "epoch": 4.47, "learning_rate": 1.2198664595136977e-06, "loss": 2.4261, "step": 1978 }, { "epoch": 4.48, "learning_rate": 1.2184364511635835e-06, "loss": 2.2508, "step": 1980 }, { "epoch": 4.48, "learning_rate": 1.2170059735920768e-06, "loss": 2.3813, "step": 1982 }, { "epoch": 4.49, "learning_rate": 1.2155750298719736e-06, "loss": 2.4343, "step": 1984 }, { "epoch": 4.49, "learning_rate": 1.2141436230770716e-06, "loss": 2.2746, "step": 1986 }, { "epoch": 4.5, "learning_rate": 1.2127117562821627e-06, "loss": 2.343, "step": 1988 }, { "epoch": 4.5, "learning_rate": 1.2112794325630273e-06, "loss": 2.436, "step": 1990 }, { "epoch": 4.51, "learning_rate": 1.2098466549964268e-06, "loss": 2.3389, "step": 1992 }, { "epoch": 4.51, "learning_rate": 1.2084134266600986e-06, "loss": 2.7032, "step": 1994 }, { "epoch": 4.51, "learning_rate": 1.2069797506327474e-06, "loss": 2.7591, "step": 1996 }, { "epoch": 4.52, "learning_rate": 1.2055456299940397e-06, "loss": 2.3661, "step": 1998 }, { "epoch": 4.52, "learning_rate": 1.2041110678245974e-06, "loss": 2.2633, "step": 2000 }, { "epoch": 4.53, "learning_rate": 1.2026760672059908e-06, "loss": 2.5338, "step": 2002 }, { "epoch": 4.53, "learning_rate": 1.2012406312207316e-06, "loss": 2.5867, "step": 2004 }, { "epoch": 4.54, "learning_rate": 1.199804762952268e-06, "loss": 2.4737, "step": 2006 }, { "epoch": 4.54, "learning_rate": 1.1983684654849741e-06, "loss": 2.3638, "step": 2008 }, { "epoch": 4.55, "learning_rate": 1.1969317419041494e-06, "loss": 2.2326, "step": 2010 }, { "epoch": 4.55, "learning_rate": 1.1954945952960058e-06, "loss": 2.3512, "step": 2012 }, { "epoch": 4.56, "learning_rate": 1.1940570287476661e-06, "loss": 2.2958, "step": 2014 }, { "epoch": 4.56, "learning_rate": 1.1926190453471537e-06, "loss": 2.1707, "step": 2016 }, { "epoch": 4.56, "learning_rate": 1.1911806481833875e-06, "loss": 2.2141, "step": 2018 }, { "epoch": 4.57, "learning_rate": 1.1897418403461762e-06, "loss": 2.3797, "step": 2020 }, { "epoch": 4.57, "learning_rate": 1.1883026249262099e-06, "loss": 2.4088, "step": 2022 }, { "epoch": 4.58, "learning_rate": 1.1868630050150543e-06, "loss": 2.3545, "step": 2024 }, { "epoch": 4.58, "learning_rate": 1.1854229837051443e-06, "loss": 2.4329, "step": 2026 }, { "epoch": 4.59, "learning_rate": 1.1839825640897767e-06, "loss": 2.3957, "step": 2028 }, { "epoch": 4.59, "learning_rate": 1.1825417492631041e-06, "loss": 2.2269, "step": 2030 }, { "epoch": 4.6, "learning_rate": 1.1811005423201281e-06, "loss": 2.3754, "step": 2032 }, { "epoch": 4.6, "learning_rate": 1.1796589463566922e-06, "loss": 2.5927, "step": 2034 }, { "epoch": 4.61, "learning_rate": 1.1782169644694758e-06, "loss": 2.5426, "step": 2036 }, { "epoch": 4.61, "learning_rate": 1.1767745997559877e-06, "loss": 2.5174, "step": 2038 }, { "epoch": 4.61, "learning_rate": 1.1753318553145586e-06, "loss": 2.3862, "step": 2040 }, { "epoch": 4.62, "learning_rate": 1.1738887342443344e-06, "loss": 2.2668, "step": 2042 }, { "epoch": 4.62, "learning_rate": 1.1724452396452713e-06, "loss": 2.1973, "step": 2044 }, { "epoch": 4.63, "learning_rate": 1.171001374618127e-06, "loss": 2.6375, "step": 2046 }, { "epoch": 4.63, "learning_rate": 1.1695571422644549e-06, "loss": 2.5391, "step": 2048 }, { "epoch": 4.64, "learning_rate": 1.1681125456865975e-06, "loss": 2.7426, "step": 2050 }, { "epoch": 4.64, "learning_rate": 1.1666675879876805e-06, "loss": 2.4048, "step": 2052 }, { "epoch": 4.65, "learning_rate": 1.1652222722716039e-06, "loss": 2.3801, "step": 2054 }, { "epoch": 4.65, "learning_rate": 1.1637766016430379e-06, "loss": 2.3919, "step": 2056 }, { "epoch": 4.65, "learning_rate": 1.1623305792074146e-06, "loss": 2.5378, "step": 2058 }, { "epoch": 4.66, "learning_rate": 1.1608842080709217e-06, "loss": 2.3905, "step": 2060 }, { "epoch": 4.66, "learning_rate": 1.1594374913404965e-06, "loss": 2.2424, "step": 2062 }, { "epoch": 4.67, "learning_rate": 1.1579904321238182e-06, "loss": 2.4612, "step": 2064 }, { "epoch": 4.67, "learning_rate": 1.1565430335293016e-06, "loss": 2.238, "step": 2066 }, { "epoch": 4.68, "learning_rate": 1.1550952986660909e-06, "loss": 2.8252, "step": 2068 }, { "epoch": 4.68, "learning_rate": 1.1536472306440526e-06, "loss": 2.5775, "step": 2070 }, { "epoch": 4.69, "learning_rate": 1.1521988325737684e-06, "loss": 2.4405, "step": 2072 }, { "epoch": 4.69, "learning_rate": 1.150750107566529e-06, "loss": 2.5779, "step": 2074 }, { "epoch": 4.7, "learning_rate": 1.1493010587343284e-06, "loss": 2.303, "step": 2076 }, { "epoch": 4.7, "learning_rate": 1.1478516891898548e-06, "loss": 2.382, "step": 2078 }, { "epoch": 4.7, "learning_rate": 1.1464020020464864e-06, "loss": 2.4109, "step": 2080 }, { "epoch": 4.71, "learning_rate": 1.1449520004182833e-06, "loss": 2.4196, "step": 2082 }, { "epoch": 4.71, "learning_rate": 1.1435016874199806e-06, "loss": 2.3244, "step": 2084 }, { "epoch": 4.72, "learning_rate": 1.1420510661669834e-06, "loss": 2.5645, "step": 2086 }, { "epoch": 4.72, "learning_rate": 1.1406001397753576e-06, "loss": 2.1936, "step": 2088 }, { "epoch": 4.73, "learning_rate": 1.1391489113618255e-06, "loss": 2.4046, "step": 2090 }, { "epoch": 4.73, "learning_rate": 1.1376973840437581e-06, "loss": 2.3622, "step": 2092 }, { "epoch": 4.74, "learning_rate": 1.1362455609391678e-06, "loss": 2.2028, "step": 2094 }, { "epoch": 4.74, "learning_rate": 1.1347934451667037e-06, "loss": 2.1606, "step": 2096 }, { "epoch": 4.75, "learning_rate": 1.1333410398456418e-06, "loss": 2.3497, "step": 2098 }, { "epoch": 4.75, "learning_rate": 1.1318883480958816e-06, "loss": 2.3943, "step": 2100 }, { "epoch": 4.75, "learning_rate": 1.130435373037937e-06, "loss": 2.3429, "step": 2102 }, { "epoch": 4.76, "learning_rate": 1.128982117792931e-06, "loss": 2.1936, "step": 2104 }, { "epoch": 4.76, "learning_rate": 1.1275285854825882e-06, "loss": 2.2376, "step": 2106 }, { "epoch": 4.77, "learning_rate": 1.1260747792292283e-06, "loss": 2.5502, "step": 2108 }, { "epoch": 4.77, "learning_rate": 1.1246207021557595e-06, "loss": 2.3205, "step": 2110 }, { "epoch": 4.78, "learning_rate": 1.1231663573856716e-06, "loss": 2.3361, "step": 2112 }, { "epoch": 4.78, "learning_rate": 1.1217117480430302e-06, "loss": 2.3491, "step": 2114 }, { "epoch": 4.79, "learning_rate": 1.1202568772524684e-06, "loss": 2.3551, "step": 2116 }, { "epoch": 4.79, "learning_rate": 1.118801748139181e-06, "loss": 2.4922, "step": 2118 }, { "epoch": 4.8, "learning_rate": 1.117346363828918e-06, "loss": 2.3158, "step": 2120 }, { "epoch": 4.8, "learning_rate": 1.1158907274479772e-06, "loss": 2.4769, "step": 2122 }, { "epoch": 4.8, "learning_rate": 1.1144348421231987e-06, "loss": 2.4177, "step": 2124 }, { "epoch": 4.81, "learning_rate": 1.1129787109819562e-06, "loss": 2.2472, "step": 2126 }, { "epoch": 4.81, "learning_rate": 1.1115223371521524e-06, "loss": 2.3932, "step": 2128 }, { "epoch": 4.82, "learning_rate": 1.1100657237622109e-06, "loss": 2.4636, "step": 2130 }, { "epoch": 4.82, "learning_rate": 1.1086088739410697e-06, "loss": 2.1997, "step": 2132 }, { "epoch": 4.83, "learning_rate": 1.1071517908181752e-06, "loss": 2.6186, "step": 2134 }, { "epoch": 4.83, "learning_rate": 1.105694477523474e-06, "loss": 2.3176, "step": 2136 }, { "epoch": 4.84, "learning_rate": 1.1042369371874088e-06, "loss": 2.4185, "step": 2138 }, { "epoch": 4.84, "learning_rate": 1.1027791729409084e-06, "loss": 2.6309, "step": 2140 }, { "epoch": 4.84, "learning_rate": 1.1013211879153832e-06, "loss": 2.4281, "step": 2142 }, { "epoch": 4.85, "learning_rate": 1.0998629852427179e-06, "loss": 2.0809, "step": 2144 }, { "epoch": 4.85, "learning_rate": 1.0984045680552647e-06, "loss": 2.3214, "step": 2146 }, { "epoch": 4.86, "learning_rate": 1.0969459394858364e-06, "loss": 2.3554, "step": 2148 }, { "epoch": 4.86, "learning_rate": 1.0954871026677001e-06, "loss": 2.3334, "step": 2150 }, { "epoch": 4.87, "learning_rate": 1.0940280607345704e-06, "loss": 2.6128, "step": 2152 }, { "epoch": 4.87, "learning_rate": 1.0925688168206018e-06, "loss": 2.3866, "step": 2154 }, { "epoch": 4.88, "learning_rate": 1.0911093740603836e-06, "loss": 2.2807, "step": 2156 }, { "epoch": 4.88, "learning_rate": 1.0896497355889316e-06, "loss": 2.2577, "step": 2158 }, { "epoch": 4.89, "learning_rate": 1.0881899045416818e-06, "loss": 2.3046, "step": 2160 }, { "epoch": 4.89, "learning_rate": 1.0867298840544849e-06, "loss": 2.5557, "step": 2162 }, { "epoch": 4.89, "learning_rate": 1.085269677263597e-06, "loss": 2.4749, "step": 2164 }, { "epoch": 4.9, "learning_rate": 1.0838092873056768e-06, "loss": 2.4613, "step": 2166 }, { "epoch": 4.9, "learning_rate": 1.0823487173177735e-06, "loss": 2.4206, "step": 2168 }, { "epoch": 4.91, "learning_rate": 1.0808879704373249e-06, "loss": 2.5469, "step": 2170 }, { "epoch": 4.91, "learning_rate": 1.0794270498021486e-06, "loss": 2.7428, "step": 2172 }, { "epoch": 4.92, "learning_rate": 1.077965958550435e-06, "loss": 2.1805, "step": 2174 }, { "epoch": 4.92, "learning_rate": 1.076504699820741e-06, "loss": 2.4371, "step": 2176 }, { "epoch": 4.93, "learning_rate": 1.0750432767519837e-06, "loss": 2.5389, "step": 2178 }, { "epoch": 4.93, "learning_rate": 1.073581692483433e-06, "loss": 2.3166, "step": 2180 }, { "epoch": 4.94, "learning_rate": 1.072119950154705e-06, "loss": 2.4784, "step": 2182 }, { "epoch": 4.94, "learning_rate": 1.0706580529057553e-06, "loss": 2.2944, "step": 2184 }, { "epoch": 4.94, "learning_rate": 1.069196003876872e-06, "loss": 2.4812, "step": 2186 }, { "epoch": 4.95, "learning_rate": 1.06773380620867e-06, "loss": 2.2998, "step": 2188 }, { "epoch": 4.95, "learning_rate": 1.0662714630420833e-06, "loss": 2.1607, "step": 2190 }, { "epoch": 4.96, "learning_rate": 1.0648089775183575e-06, "loss": 2.5073, "step": 2192 }, { "epoch": 4.96, "learning_rate": 1.0633463527790457e-06, "loss": 2.7015, "step": 2194 }, { "epoch": 4.97, "learning_rate": 1.0618835919659981e-06, "loss": 2.2588, "step": 2196 }, { "epoch": 4.97, "learning_rate": 1.0604206982213585e-06, "loss": 2.5762, "step": 2198 }, { "epoch": 4.98, "learning_rate": 1.0589576746875556e-06, "loss": 2.3897, "step": 2200 }, { "epoch": 4.98, "learning_rate": 1.0574945245072979e-06, "loss": 2.4374, "step": 2202 }, { "epoch": 4.99, "learning_rate": 1.0560312508235648e-06, "loss": 2.1639, "step": 2204 }, { "epoch": 4.99, "learning_rate": 1.0545678567796016e-06, "loss": 2.4232, "step": 2206 }, { "epoch": 4.99, "learning_rate": 1.053104345518912e-06, "loss": 2.1925, "step": 2208 }, { "epoch": 5.0, "learning_rate": 1.0516407201852513e-06, "loss": 2.355, "step": 2210 }, { "epoch": 5.0, "learning_rate": 1.0501769839226202e-06, "loss": 2.6249, "step": 2212 }, { "epoch": 5.01, "learning_rate": 1.0487131398752573e-06, "loss": 2.3273, "step": 2214 }, { "epoch": 5.01, "learning_rate": 1.0472491911876332e-06, "loss": 2.2022, "step": 2216 }, { "epoch": 5.02, "learning_rate": 1.045785141004443e-06, "loss": 2.5215, "step": 2218 }, { "epoch": 5.02, "learning_rate": 1.0443209924705999e-06, "loss": 2.3913, "step": 2220 }, { "epoch": 5.03, "learning_rate": 1.0428567487312279e-06, "loss": 2.0402, "step": 2222 }, { "epoch": 5.03, "learning_rate": 1.041392412931656e-06, "loss": 2.2759, "step": 2224 }, { "epoch": 5.03, "learning_rate": 1.0399279882174114e-06, "loss": 2.3752, "step": 2226 }, { "epoch": 5.04, "learning_rate": 1.0384634777342114e-06, "loss": 2.4845, "step": 2228 }, { "epoch": 5.04, "learning_rate": 1.0369988846279576e-06, "loss": 2.2978, "step": 2230 }, { "epoch": 5.05, "learning_rate": 1.03553421204473e-06, "loss": 2.4405, "step": 2232 }, { "epoch": 5.05, "learning_rate": 1.0340694631307786e-06, "loss": 2.2374, "step": 2234 }, { "epoch": 5.06, "learning_rate": 1.0326046410325166e-06, "loss": 2.3413, "step": 2236 }, { "epoch": 5.06, "learning_rate": 1.0311397488965166e-06, "loss": 2.4933, "step": 2238 }, { "epoch": 5.07, "learning_rate": 1.0296747898694994e-06, "loss": 2.2753, "step": 2240 }, { "epoch": 5.07, "learning_rate": 1.028209767098331e-06, "loss": 2.5901, "step": 2242 }, { "epoch": 5.08, "learning_rate": 1.0267446837300133e-06, "loss": 2.5863, "step": 2244 }, { "epoch": 5.08, "learning_rate": 1.0252795429116792e-06, "loss": 2.432, "step": 2246 }, { "epoch": 5.08, "learning_rate": 1.0238143477905842e-06, "loss": 2.4135, "step": 2248 }, { "epoch": 5.09, "learning_rate": 1.0223491015141012e-06, "loss": 2.2242, "step": 2250 }, { "epoch": 5.09, "learning_rate": 1.0208838072297129e-06, "loss": 2.3453, "step": 2252 }, { "epoch": 5.1, "learning_rate": 1.019418468085004e-06, "loss": 2.3431, "step": 2254 }, { "epoch": 5.1, "learning_rate": 1.0179530872276573e-06, "loss": 2.1643, "step": 2256 }, { "epoch": 5.11, "learning_rate": 1.016487667805444e-06, "loss": 2.2139, "step": 2258 }, { "epoch": 5.11, "learning_rate": 1.0150222129662182e-06, "loss": 2.2468, "step": 2260 }, { "epoch": 5.12, "learning_rate": 1.0135567258579111e-06, "loss": 2.4914, "step": 2262 }, { "epoch": 5.12, "learning_rate": 1.0120912096285222e-06, "loss": 2.3727, "step": 2264 }, { "epoch": 5.13, "learning_rate": 1.0106256674261136e-06, "loss": 2.1403, "step": 2266 }, { "epoch": 5.13, "learning_rate": 1.0091601023988031e-06, "loss": 2.3905, "step": 2268 }, { "epoch": 5.13, "learning_rate": 1.007694517694759e-06, "loss": 2.1131, "step": 2270 }, { "epoch": 5.14, "learning_rate": 1.00622891646219e-06, "loss": 2.3395, "step": 2272 }, { "epoch": 5.14, "learning_rate": 1.0047633018493408e-06, "loss": 2.5324, "step": 2274 }, { "epoch": 5.15, "learning_rate": 1.0032976770044854e-06, "loss": 2.2844, "step": 2276 }, { "epoch": 5.15, "learning_rate": 1.0018320450759195e-06, "loss": 2.4777, "step": 2278 }, { "epoch": 5.16, "learning_rate": 1.000366409211954e-06, "loss": 2.4501, "step": 2280 }, { "epoch": 5.16, "learning_rate": 9.989007725609083e-07, "loss": 2.5747, "step": 2282 }, { "epoch": 5.17, "learning_rate": 9.974351382711028e-07, "loss": 2.5167, "step": 2284 }, { "epoch": 5.17, "learning_rate": 9.95969509490854e-07, "loss": 2.4251, "step": 2286 }, { "epoch": 5.18, "learning_rate": 9.945038893684663e-07, "loss": 2.4821, "step": 2288 }, { "epoch": 5.18, "learning_rate": 9.930382810522246e-07, "loss": 2.2526, "step": 2290 }, { "epoch": 5.18, "learning_rate": 9.915726876903891e-07, "loss": 2.3031, "step": 2292 }, { "epoch": 5.19, "learning_rate": 9.901071124311883e-07, "loss": 2.1421, "step": 2294 }, { "epoch": 5.19, "learning_rate": 9.88641558422811e-07, "loss": 2.6568, "step": 2296 }, { "epoch": 5.2, "learning_rate": 9.871760288134006e-07, "loss": 2.3728, "step": 2298 }, { "epoch": 5.2, "learning_rate": 9.857105267510484e-07, "loss": 2.4358, "step": 2300 }, { "epoch": 5.21, "learning_rate": 9.842450553837865e-07, "loss": 2.4469, "step": 2302 }, { "epoch": 5.21, "learning_rate": 9.827796178595805e-07, "loss": 2.2206, "step": 2304 }, { "epoch": 5.22, "learning_rate": 9.813142173263233e-07, "loss": 2.2388, "step": 2306 }, { "epoch": 5.22, "learning_rate": 9.798488569318295e-07, "loss": 2.5086, "step": 2308 }, { "epoch": 5.22, "learning_rate": 9.78383539823827e-07, "loss": 2.3137, "step": 2310 }, { "epoch": 5.23, "learning_rate": 9.76918269149949e-07, "loss": 2.0895, "step": 2312 }, { "epoch": 5.23, "learning_rate": 9.754530480577314e-07, "loss": 2.4254, "step": 2314 }, { "epoch": 5.24, "learning_rate": 9.739878796946027e-07, "loss": 2.4146, "step": 2316 }, { "epoch": 5.24, "learning_rate": 9.725227672078772e-07, "loss": 2.5433, "step": 2318 }, { "epoch": 5.25, "learning_rate": 9.710577137447505e-07, "loss": 2.4301, "step": 2320 }, { "epoch": 5.25, "learning_rate": 9.695927224522907e-07, "loss": 2.4225, "step": 2322 }, { "epoch": 5.26, "learning_rate": 9.68127796477433e-07, "loss": 2.3673, "step": 2324 }, { "epoch": 5.26, "learning_rate": 9.66662938966971e-07, "loss": 2.2955, "step": 2326 }, { "epoch": 5.27, "learning_rate": 9.651981530675524e-07, "loss": 2.2772, "step": 2328 }, { "epoch": 5.27, "learning_rate": 9.637334419256713e-07, "loss": 2.4733, "step": 2330 }, { "epoch": 5.27, "learning_rate": 9.6226880868766e-07, "loss": 2.8809, "step": 2332 }, { "epoch": 5.28, "learning_rate": 9.60804256499684e-07, "loss": 2.3127, "step": 2334 }, { "epoch": 5.28, "learning_rate": 9.593397885077353e-07, "loss": 2.6099, "step": 2336 }, { "epoch": 5.29, "learning_rate": 9.57875407857624e-07, "loss": 2.3925, "step": 2338 }, { "epoch": 5.29, "learning_rate": 9.564111176949734e-07, "loss": 2.4485, "step": 2340 }, { "epoch": 5.3, "learning_rate": 9.549469211652123e-07, "loss": 2.2315, "step": 2342 }, { "epoch": 5.3, "learning_rate": 9.534828214135682e-07, "loss": 2.2182, "step": 2344 }, { "epoch": 5.31, "learning_rate": 9.520188215850602e-07, "loss": 2.4335, "step": 2346 }, { "epoch": 5.31, "learning_rate": 9.505549248244936e-07, "loss": 2.5005, "step": 2348 }, { "epoch": 5.32, "learning_rate": 9.490911342764525e-07, "loss": 2.2757, "step": 2350 }, { "epoch": 5.32, "learning_rate": 9.476274530852913e-07, "loss": 2.3001, "step": 2352 }, { "epoch": 5.32, "learning_rate": 9.46163884395131e-07, "loss": 2.173, "step": 2354 }, { "epoch": 5.33, "learning_rate": 9.447004313498507e-07, "loss": 2.363, "step": 2356 }, { "epoch": 5.33, "learning_rate": 9.432370970930809e-07, "loss": 2.2835, "step": 2358 }, { "epoch": 5.34, "learning_rate": 9.41773884768196e-07, "loss": 2.307, "step": 2360 }, { "epoch": 5.34, "learning_rate": 9.403107975183102e-07, "loss": 2.2666, "step": 2362 }, { "epoch": 5.35, "learning_rate": 9.388478384862682e-07, "loss": 2.3614, "step": 2364 }, { "epoch": 5.35, "learning_rate": 9.373850108146386e-07, "loss": 2.0857, "step": 2366 }, { "epoch": 5.36, "learning_rate": 9.359223176457087e-07, "loss": 2.5566, "step": 2368 }, { "epoch": 5.36, "learning_rate": 9.344597621214771e-07, "loss": 2.3728, "step": 2370 }, { "epoch": 5.36, "learning_rate": 9.329973473836458e-07, "loss": 2.3273, "step": 2372 }, { "epoch": 5.37, "learning_rate": 9.315350765736148e-07, "loss": 2.1808, "step": 2374 }, { "epoch": 5.37, "learning_rate": 9.300729528324756e-07, "loss": 2.3019, "step": 2376 }, { "epoch": 5.38, "learning_rate": 9.286109793010026e-07, "loss": 2.2624, "step": 2378 }, { "epoch": 5.38, "learning_rate": 9.271491591196482e-07, "loss": 2.4553, "step": 2380 }, { "epoch": 5.39, "learning_rate": 9.256874954285354e-07, "loss": 2.2152, "step": 2382 }, { "epoch": 5.39, "learning_rate": 9.242259913674509e-07, "loss": 2.715, "step": 2384 }, { "epoch": 5.4, "learning_rate": 9.227646500758387e-07, "loss": 2.1977, "step": 2386 }, { "epoch": 5.4, "learning_rate": 9.213034746927926e-07, "loss": 2.5103, "step": 2388 }, { "epoch": 5.41, "learning_rate": 9.198424683570505e-07, "loss": 2.3052, "step": 2390 }, { "epoch": 5.41, "learning_rate": 9.183816342069878e-07, "loss": 2.1901, "step": 2392 }, { "epoch": 5.41, "learning_rate": 9.169209753806082e-07, "loss": 2.2311, "step": 2394 }, { "epoch": 5.42, "learning_rate": 9.154604950155404e-07, "loss": 2.2232, "step": 2396 }, { "epoch": 5.42, "learning_rate": 9.140001962490293e-07, "loss": 2.3053, "step": 2398 }, { "epoch": 5.43, "learning_rate": 9.125400822179301e-07, "loss": 2.4779, "step": 2400 }, { "epoch": 5.43, "learning_rate": 9.110801560586998e-07, "loss": 2.441, "step": 2402 }, { "epoch": 5.44, "learning_rate": 9.096204209073933e-07, "loss": 2.5225, "step": 2404 }, { "epoch": 5.44, "learning_rate": 9.081608798996549e-07, "loss": 2.3408, "step": 2406 }, { "epoch": 5.45, "learning_rate": 9.067015361707109e-07, "loss": 2.3998, "step": 2408 }, { "epoch": 5.45, "learning_rate": 9.05242392855365e-07, "loss": 2.308, "step": 2410 }, { "epoch": 5.46, "learning_rate": 9.037834530879897e-07, "loss": 2.3218, "step": 2412 }, { "epoch": 5.46, "learning_rate": 9.023247200025213e-07, "loss": 2.5219, "step": 2414 }, { "epoch": 5.46, "learning_rate": 9.008661967324503e-07, "loss": 2.346, "step": 2416 }, { "epoch": 5.47, "learning_rate": 8.994078864108179e-07, "loss": 2.0946, "step": 2418 }, { "epoch": 5.47, "learning_rate": 8.979497921702081e-07, "loss": 2.3819, "step": 2420 }, { "epoch": 5.48, "learning_rate": 8.964919171427389e-07, "loss": 2.1452, "step": 2422 }, { "epoch": 5.48, "learning_rate": 8.950342644600595e-07, "loss": 2.0626, "step": 2424 }, { "epoch": 5.49, "learning_rate": 8.935768372533408e-07, "loss": 2.3418, "step": 2426 }, { "epoch": 5.49, "learning_rate": 8.921196386532685e-07, "loss": 2.4828, "step": 2428 }, { "epoch": 5.5, "learning_rate": 8.906626717900383e-07, "loss": 2.2321, "step": 2430 }, { "epoch": 5.5, "learning_rate": 8.892059397933476e-07, "loss": 2.2295, "step": 2432 }, { "epoch": 5.51, "learning_rate": 8.877494457923899e-07, "loss": 2.2563, "step": 2434 }, { "epoch": 5.51, "learning_rate": 8.862931929158462e-07, "loss": 2.4069, "step": 2436 }, { "epoch": 5.51, "learning_rate": 8.848371842918804e-07, "loss": 2.4871, "step": 2438 }, { "epoch": 5.52, "learning_rate": 8.833814230481327e-07, "loss": 2.1938, "step": 2440 }, { "epoch": 5.52, "learning_rate": 8.819259123117096e-07, "loss": 2.5121, "step": 2442 }, { "epoch": 5.53, "learning_rate": 8.804706552091811e-07, "loss": 2.3073, "step": 2444 }, { "epoch": 5.53, "learning_rate": 8.790156548665723e-07, "loss": 2.3042, "step": 2446 }, { "epoch": 5.54, "learning_rate": 8.775609144093562e-07, "loss": 2.0554, "step": 2448 }, { "epoch": 5.54, "learning_rate": 8.761064369624477e-07, "loss": 2.2464, "step": 2450 }, { "epoch": 5.55, "learning_rate": 8.746522256501973e-07, "loss": 2.5404, "step": 2452 }, { "epoch": 5.55, "learning_rate": 8.73198283596383e-07, "loss": 2.7003, "step": 2454 }, { "epoch": 5.55, "learning_rate": 8.717446139242044e-07, "loss": 2.2159, "step": 2456 }, { "epoch": 5.56, "learning_rate": 8.702912197562771e-07, "loss": 2.2748, "step": 2458 }, { "epoch": 5.56, "learning_rate": 8.688381042146236e-07, "loss": 2.3034, "step": 2460 }, { "epoch": 5.57, "learning_rate": 8.673852704206689e-07, "loss": 2.2095, "step": 2462 }, { "epoch": 5.57, "learning_rate": 8.65932721495232e-07, "loss": 2.2086, "step": 2464 }, { "epoch": 5.58, "learning_rate": 8.644804605585201e-07, "loss": 2.2465, "step": 2466 }, { "epoch": 5.58, "learning_rate": 8.630284907301225e-07, "loss": 2.3245, "step": 2468 }, { "epoch": 5.59, "learning_rate": 8.615768151290024e-07, "loss": 2.294, "step": 2470 }, { "epoch": 5.59, "learning_rate": 8.601254368734908e-07, "loss": 2.3998, "step": 2472 }, { "epoch": 5.6, "learning_rate": 8.586743590812808e-07, "loss": 2.5294, "step": 2474 }, { "epoch": 5.6, "learning_rate": 8.572235848694202e-07, "loss": 2.3761, "step": 2476 }, { "epoch": 5.6, "learning_rate": 8.557731173543029e-07, "loss": 2.1873, "step": 2478 }, { "epoch": 5.61, "learning_rate": 8.54322959651666e-07, "loss": 2.6505, "step": 2480 }, { "epoch": 5.61, "learning_rate": 8.528731148765806e-07, "loss": 2.1418, "step": 2482 }, { "epoch": 5.62, "learning_rate": 8.521483108101453e-07, "loss": 2.2971, "step": 2484 }, { "epoch": 5.62, "learning_rate": 8.506989412656718e-07, "loss": 2.178, "step": 2486 }, { "epoch": 5.63, "learning_rate": 8.492498924334711e-07, "loss": 2.1965, "step": 2488 }, { "epoch": 5.63, "learning_rate": 8.47801167426232e-07, "loss": 2.2415, "step": 2490 }, { "epoch": 5.64, "learning_rate": 8.463527693559475e-07, "loss": 2.2727, "step": 2492 }, { "epoch": 5.64, "learning_rate": 8.44904701333909e-07, "loss": 2.4316, "step": 2494 }, { "epoch": 5.65, "learning_rate": 8.434569664706982e-07, "loss": 2.2054, "step": 2496 }, { "epoch": 5.65, "learning_rate": 8.420095678761817e-07, "loss": 2.3326, "step": 2498 }, { "epoch": 5.65, "learning_rate": 8.405625086595034e-07, "loss": 2.4215, "step": 2500 }, { "epoch": 5.66, "learning_rate": 8.391157919290781e-07, "loss": 2.3286, "step": 2502 }, { "epoch": 5.66, "learning_rate": 8.376694207925854e-07, "loss": 2.34, "step": 2504 }, { "epoch": 5.67, "learning_rate": 8.362233983569622e-07, "loss": 2.8653, "step": 2506 }, { "epoch": 5.67, "learning_rate": 8.347777277283961e-07, "loss": 2.6285, "step": 2508 }, { "epoch": 5.68, "learning_rate": 8.333324120123194e-07, "loss": 2.5004, "step": 2510 }, { "epoch": 5.68, "learning_rate": 8.318874543134025e-07, "loss": 2.4015, "step": 2512 }, { "epoch": 5.69, "learning_rate": 8.304428577355452e-07, "loss": 2.2038, "step": 2514 }, { "epoch": 5.69, "learning_rate": 8.297206958367709e-07, "loss": 2.3619, "step": 2516 }, { "epoch": 5.7, "learning_rate": 8.282766467586193e-07, "loss": 2.398, "step": 2518 }, { "epoch": 5.7, "learning_rate": 8.268329665578708e-07, "loss": 2.1842, "step": 2520 }, { "epoch": 5.7, "learning_rate": 8.253896583356824e-07, "loss": 2.4201, "step": 2522 }, { "epoch": 5.71, "learning_rate": 8.239467251924118e-07, "loss": 2.3461, "step": 2524 }, { "epoch": 5.71, "learning_rate": 8.225041702276107e-07, "loss": 2.5341, "step": 2526 }, { "epoch": 5.72, "learning_rate": 8.210619965400183e-07, "loss": 2.5787, "step": 2528 }, { "epoch": 5.72, "learning_rate": 8.196202072275554e-07, "loss": 2.222, "step": 2530 }, { "epoch": 5.73, "learning_rate": 8.181788053873167e-07, "loss": 2.4516, "step": 2532 }, { "epoch": 5.73, "learning_rate": 8.167377941155642e-07, "loss": 2.5411, "step": 2534 }, { "epoch": 5.74, "learning_rate": 8.152971765077219e-07, "loss": 2.5724, "step": 2536 }, { "epoch": 5.74, "learning_rate": 8.138569556583672e-07, "loss": 2.4833, "step": 2538 }, { "epoch": 5.74, "learning_rate": 8.124171346612263e-07, "loss": 2.5704, "step": 2540 }, { "epoch": 5.75, "learning_rate": 8.109777166091652e-07, "loss": 2.5729, "step": 2542 }, { "epoch": 5.75, "learning_rate": 8.095387045941852e-07, "loss": 2.1751, "step": 2544 }, { "epoch": 5.76, "learning_rate": 8.081001017074152e-07, "loss": 2.3624, "step": 2546 }, { "epoch": 5.76, "learning_rate": 8.066619110391054e-07, "loss": 2.4528, "step": 2548 }, { "epoch": 5.77, "learning_rate": 8.0522413567862e-07, "loss": 2.4156, "step": 2550 }, { "epoch": 5.77, "learning_rate": 8.037867787144314e-07, "loss": 2.4374, "step": 2552 }, { "epoch": 5.78, "learning_rate": 8.02349843234114e-07, "loss": 2.2263, "step": 2554 }, { "epoch": 5.78, "learning_rate": 8.009133323243351e-07, "loss": 2.5188, "step": 2556 }, { "epoch": 5.79, "learning_rate": 7.994772490708514e-07, "loss": 2.1436, "step": 2558 }, { "epoch": 5.79, "learning_rate": 7.980415965585011e-07, "loss": 2.4208, "step": 2560 }, { "epoch": 5.79, "learning_rate": 7.966063778711956e-07, "loss": 2.4573, "step": 2562 }, { "epoch": 5.8, "learning_rate": 7.951715960919157e-07, "loss": 2.4805, "step": 2564 }, { "epoch": 5.8, "learning_rate": 7.937372543027035e-07, "loss": 2.3773, "step": 2566 }, { "epoch": 5.81, "learning_rate": 7.92303355584656e-07, "loss": 2.327, "step": 2568 }, { "epoch": 5.81, "learning_rate": 7.908699030179177e-07, "loss": 2.354, "step": 2570 }, { "epoch": 5.82, "learning_rate": 7.894368996816755e-07, "loss": 2.3706, "step": 2572 }, { "epoch": 5.82, "learning_rate": 7.880043486541517e-07, "loss": 2.43, "step": 2574 }, { "epoch": 5.83, "learning_rate": 7.865722530125952e-07, "loss": 2.613, "step": 2576 }, { "epoch": 5.83, "learning_rate": 7.851406158332786e-07, "loss": 2.1504, "step": 2578 }, { "epoch": 5.84, "learning_rate": 7.837094401914889e-07, "loss": 2.5713, "step": 2580 }, { "epoch": 5.84, "learning_rate": 7.82278729161522e-07, "loss": 2.2792, "step": 2582 }, { "epoch": 5.84, "learning_rate": 7.808484858166749e-07, "loss": 2.4921, "step": 2584 }, { "epoch": 5.85, "learning_rate": 7.794187132292408e-07, "loss": 2.4913, "step": 2586 }, { "epoch": 5.85, "learning_rate": 7.77989414470502e-07, "loss": 2.2147, "step": 2588 }, { "epoch": 5.86, "learning_rate": 7.765605926107213e-07, "loss": 2.7114, "step": 2590 }, { "epoch": 5.86, "learning_rate": 7.75132250719139e-07, "loss": 2.3006, "step": 2592 }, { "epoch": 5.87, "learning_rate": 7.737043918639634e-07, "loss": 2.4085, "step": 2594 }, { "epoch": 5.87, "learning_rate": 7.722770191123658e-07, "loss": 2.2064, "step": 2596 }, { "epoch": 5.88, "learning_rate": 7.708501355304719e-07, "loss": 2.5217, "step": 2598 }, { "epoch": 5.88, "learning_rate": 7.694237441833584e-07, "loss": 2.4016, "step": 2600 }, { "epoch": 5.89, "learning_rate": 7.679978481350437e-07, "loss": 2.2748, "step": 2602 }, { "epoch": 5.89, "learning_rate": 7.665724504484819e-07, "loss": 2.7744, "step": 2604 }, { "epoch": 5.89, "learning_rate": 7.651475541855577e-07, "loss": 2.5243, "step": 2606 }, { "epoch": 5.9, "learning_rate": 7.637231624070776e-07, "loss": 2.5171, "step": 2608 }, { "epoch": 5.9, "learning_rate": 7.622992781727652e-07, "loss": 2.6107, "step": 2610 }, { "epoch": 5.91, "learning_rate": 7.608759045412534e-07, "loss": 2.3215, "step": 2612 }, { "epoch": 5.91, "learning_rate": 7.594530445700781e-07, "loss": 2.2903, "step": 2614 }, { "epoch": 5.92, "learning_rate": 7.580307013156726e-07, "loss": 2.4102, "step": 2616 }, { "epoch": 5.92, "learning_rate": 7.566088778333597e-07, "loss": 2.3011, "step": 2618 }, { "epoch": 5.93, "learning_rate": 7.551875771773451e-07, "loss": 2.4777, "step": 2620 }, { "epoch": 5.93, "learning_rate": 7.53766802400713e-07, "loss": 2.2883, "step": 2622 }, { "epoch": 5.93, "learning_rate": 7.523465565554165e-07, "loss": 2.603, "step": 2624 }, { "epoch": 5.94, "learning_rate": 7.509268426922729e-07, "loss": 2.3403, "step": 2626 }, { "epoch": 5.94, "learning_rate": 7.495076638609573e-07, "loss": 2.2766, "step": 2628 }, { "epoch": 5.95, "learning_rate": 7.48089023109995e-07, "loss": 2.5495, "step": 2630 }, { "epoch": 5.95, "learning_rate": 7.466709234867551e-07, "loss": 2.2699, "step": 2632 }, { "epoch": 5.96, "learning_rate": 7.452533680374451e-07, "loss": 2.232, "step": 2634 }, { "epoch": 5.96, "learning_rate": 7.438363598071037e-07, "loss": 2.4567, "step": 2636 }, { "epoch": 5.97, "learning_rate": 7.424199018395927e-07, "loss": 2.3238, "step": 2638 }, { "epoch": 5.97, "learning_rate": 7.410039971775932e-07, "loss": 2.2977, "step": 2640 }, { "epoch": 5.98, "learning_rate": 7.395886488625975e-07, "loss": 2.513, "step": 2642 }, { "epoch": 5.98, "learning_rate": 7.38173859934903e-07, "loss": 2.4299, "step": 2644 }, { "epoch": 5.98, "learning_rate": 7.367596334336046e-07, "loss": 2.2186, "step": 2646 }, { "epoch": 5.99, "learning_rate": 7.353459723965896e-07, "loss": 2.6373, "step": 2648 }, { "epoch": 5.99, "learning_rate": 7.339328798605315e-07, "loss": 2.1771, "step": 2650 }, { "epoch": 6.0, "learning_rate": 7.325203588608808e-07, "loss": 2.3566, "step": 2652 }, { "epoch": 6.0, "learning_rate": 7.311084124318617e-07, "loss": 2.3604, "step": 2654 }, { "epoch": 6.01, "learning_rate": 7.296970436064634e-07, "loss": 2.6705, "step": 2656 }, { "epoch": 6.01, "learning_rate": 7.282862554164355e-07, "loss": 2.306, "step": 2658 }, { "epoch": 6.02, "learning_rate": 7.268760508922786e-07, "loss": 2.3193, "step": 2660 }, { "epoch": 6.02, "learning_rate": 7.25466433063241e-07, "loss": 2.4695, "step": 2662 }, { "epoch": 6.03, "learning_rate": 7.240574049573105e-07, "loss": 2.3212, "step": 2664 }, { "epoch": 6.03, "learning_rate": 7.226489696012068e-07, "loss": 2.3396, "step": 2666 }, { "epoch": 6.03, "learning_rate": 7.212411300203778e-07, "loss": 2.7672, "step": 2668 }, { "epoch": 6.04, "learning_rate": 7.198338892389915e-07, "loss": 2.3035, "step": 2670 }, { "epoch": 6.04, "learning_rate": 7.184272502799291e-07, "loss": 2.2063, "step": 2672 }, { "epoch": 6.05, "learning_rate": 7.170212161647789e-07, "loss": 2.3545, "step": 2674 }, { "epoch": 6.05, "learning_rate": 7.156157899138305e-07, "loss": 2.1973, "step": 2676 }, { "epoch": 6.06, "learning_rate": 7.142109745460676e-07, "loss": 2.3296, "step": 2678 }, { "epoch": 6.06, "learning_rate": 7.128067730791611e-07, "loss": 2.4783, "step": 2680 }, { "epoch": 6.07, "learning_rate": 7.114031885294642e-07, "loss": 2.6186, "step": 2682 }, { "epoch": 6.07, "learning_rate": 7.10000223912004e-07, "loss": 2.1012, "step": 2684 }, { "epoch": 6.08, "learning_rate": 7.085978822404766e-07, "loss": 2.3392, "step": 2686 }, { "epoch": 6.08, "learning_rate": 7.071961665272392e-07, "loss": 2.3101, "step": 2688 }, { "epoch": 6.08, "learning_rate": 7.05795079783305e-07, "loss": 2.2984, "step": 2690 }, { "epoch": 6.09, "learning_rate": 7.043946250183361e-07, "loss": 2.3071, "step": 2692 }, { "epoch": 6.09, "learning_rate": 7.02994805240637e-07, "loss": 2.5653, "step": 2694 }, { "epoch": 6.1, "learning_rate": 7.015956234571474e-07, "loss": 2.4521, "step": 2696 }, { "epoch": 6.1, "learning_rate": 7.001970826734378e-07, "loss": 2.5733, "step": 2698 }, { "epoch": 6.11, "learning_rate": 6.987991858937015e-07, "loss": 2.0661, "step": 2700 }, { "epoch": 6.11, "learning_rate": 6.97401936120747e-07, "loss": 2.5841, "step": 2702 }, { "epoch": 6.12, "learning_rate": 6.96005336355995e-07, "loss": 2.2641, "step": 2704 }, { "epoch": 6.12, "learning_rate": 6.94609389599469e-07, "loss": 2.2637, "step": 2706 }, { "epoch": 6.12, "learning_rate": 6.932140988497892e-07, "loss": 2.2499, "step": 2708 }, { "epoch": 6.13, "learning_rate": 6.918194671041674e-07, "loss": 2.509, "step": 2710 }, { "epoch": 6.13, "learning_rate": 6.904254973584002e-07, "loss": 2.2785, "step": 2712 }, { "epoch": 6.14, "learning_rate": 6.890321926068608e-07, "loss": 2.4755, "step": 2714 }, { "epoch": 6.14, "learning_rate": 6.876395558424949e-07, "loss": 2.4773, "step": 2716 }, { "epoch": 6.15, "learning_rate": 6.862475900568132e-07, "loss": 2.333, "step": 2718 }, { "epoch": 6.15, "learning_rate": 6.848562982398855e-07, "loss": 2.4779, "step": 2720 }, { "epoch": 6.16, "learning_rate": 6.834656833803326e-07, "loss": 2.4282, "step": 2722 }, { "epoch": 6.16, "learning_rate": 6.82075748465322e-07, "loss": 2.104, "step": 2724 }, { "epoch": 6.17, "learning_rate": 6.806864964805612e-07, "loss": 2.483, "step": 2726 }, { "epoch": 6.17, "learning_rate": 6.792979304102888e-07, "loss": 2.2719, "step": 2728 }, { "epoch": 6.17, "learning_rate": 6.77910053237272e-07, "loss": 2.5435, "step": 2730 }, { "epoch": 6.18, "learning_rate": 6.765228679427973e-07, "loss": 2.5445, "step": 2732 }, { "epoch": 6.18, "learning_rate": 6.751363775066652e-07, "loss": 2.3369, "step": 2734 }, { "epoch": 6.19, "learning_rate": 6.737505849071828e-07, "loss": 2.3555, "step": 2736 }, { "epoch": 6.19, "learning_rate": 6.723654931211593e-07, "loss": 2.1379, "step": 2738 }, { "epoch": 6.2, "learning_rate": 6.709811051238982e-07, "loss": 2.1932, "step": 2740 }, { "epoch": 6.2, "learning_rate": 6.695974238891906e-07, "loss": 2.5776, "step": 2742 }, { "epoch": 6.21, "learning_rate": 6.682144523893098e-07, "loss": 2.1765, "step": 2744 }, { "epoch": 6.21, "learning_rate": 6.66832193595005e-07, "loss": 2.1806, "step": 2746 }, { "epoch": 6.22, "learning_rate": 6.654506504754937e-07, "loss": 2.3733, "step": 2748 }, { "epoch": 6.22, "learning_rate": 6.640698259984563e-07, "loss": 2.3373, "step": 2750 }, { "epoch": 6.22, "learning_rate": 6.626897231300294e-07, "loss": 2.2696, "step": 2752 }, { "epoch": 6.23, "learning_rate": 6.613103448348004e-07, "loss": 2.2713, "step": 2754 }, { "epoch": 6.23, "learning_rate": 6.599316940757986e-07, "loss": 2.2902, "step": 2756 }, { "epoch": 6.24, "learning_rate": 6.585537738144916e-07, "loss": 2.3606, "step": 2758 }, { "epoch": 6.24, "learning_rate": 6.571765870107782e-07, "loss": 2.3262, "step": 2760 }, { "epoch": 6.25, "learning_rate": 6.558001366229807e-07, "loss": 2.2879, "step": 2762 }, { "epoch": 6.25, "learning_rate": 6.544244256078395e-07, "loss": 2.2362, "step": 2764 }, { "epoch": 6.26, "learning_rate": 6.530494569205078e-07, "loss": 2.5146, "step": 2766 }, { "epoch": 6.26, "learning_rate": 6.516752335145435e-07, "loss": 2.6295, "step": 2768 }, { "epoch": 6.27, "learning_rate": 6.503017583419033e-07, "loss": 2.291, "step": 2770 }, { "epoch": 6.27, "learning_rate": 6.48929034352937e-07, "loss": 2.3465, "step": 2772 }, { "epoch": 6.27, "learning_rate": 6.475570644963811e-07, "loss": 2.2647, "step": 2774 }, { "epoch": 6.28, "learning_rate": 6.461858517193517e-07, "loss": 2.5567, "step": 2776 }, { "epoch": 6.28, "learning_rate": 6.448153989673384e-07, "loss": 2.3326, "step": 2778 }, { "epoch": 6.29, "learning_rate": 6.434457091841988e-07, "loss": 2.4799, "step": 2780 }, { "epoch": 6.29, "learning_rate": 6.420767853121518e-07, "loss": 2.2515, "step": 2782 }, { "epoch": 6.3, "learning_rate": 6.407086302917694e-07, "loss": 2.0771, "step": 2784 }, { "epoch": 6.3, "learning_rate": 6.393412470619741e-07, "loss": 2.4317, "step": 2786 }, { "epoch": 6.31, "learning_rate": 6.379746385600294e-07, "loss": 2.4658, "step": 2788 }, { "epoch": 6.31, "learning_rate": 6.366088077215349e-07, "loss": 2.333, "step": 2790 }, { "epoch": 6.31, "learning_rate": 6.352437574804194e-07, "loss": 2.3936, "step": 2792 }, { "epoch": 6.32, "learning_rate": 6.338794907689351e-07, "loss": 2.3265, "step": 2794 }, { "epoch": 6.32, "learning_rate": 6.325160105176519e-07, "loss": 2.1475, "step": 2796 }, { "epoch": 6.33, "learning_rate": 6.311533196554486e-07, "loss": 2.2806, "step": 2798 }, { "epoch": 6.33, "learning_rate": 6.297914211095097e-07, "loss": 2.4101, "step": 2800 }, { "epoch": 6.34, "learning_rate": 6.284303178053179e-07, "loss": 2.2414, "step": 2802 }, { "epoch": 6.34, "learning_rate": 6.270700126666457e-07, "loss": 2.3114, "step": 2804 }, { "epoch": 6.35, "learning_rate": 6.257105086155537e-07, "loss": 2.0424, "step": 2806 }, { "epoch": 6.35, "learning_rate": 6.243518085723795e-07, "loss": 2.4947, "step": 2808 }, { "epoch": 6.36, "learning_rate": 6.229939154557358e-07, "loss": 2.1634, "step": 2810 }, { "epoch": 6.36, "learning_rate": 6.216368321824992e-07, "loss": 2.2303, "step": 2812 }, { "epoch": 6.36, "learning_rate": 6.20280561667809e-07, "loss": 2.4121, "step": 2814 }, { "epoch": 6.37, "learning_rate": 6.189251068250582e-07, "loss": 2.3059, "step": 2816 }, { "epoch": 6.37, "learning_rate": 6.175704705658861e-07, "loss": 2.1436, "step": 2818 }, { "epoch": 6.38, "learning_rate": 6.162166558001756e-07, "loss": 2.4704, "step": 2820 }, { "epoch": 6.38, "learning_rate": 6.148636654360437e-07, "loss": 2.3432, "step": 2822 }, { "epoch": 6.39, "learning_rate": 6.135115023798378e-07, "loss": 2.4694, "step": 2824 }, { "epoch": 6.39, "learning_rate": 6.12160169536126e-07, "loss": 2.1774, "step": 2826 }, { "epoch": 6.4, "learning_rate": 6.108096698076952e-07, "loss": 2.3985, "step": 2828 }, { "epoch": 6.4, "learning_rate": 6.09460006095542e-07, "loss": 2.1717, "step": 2830 }, { "epoch": 6.41, "learning_rate": 6.08111181298866e-07, "loss": 2.3996, "step": 2832 }, { "epoch": 6.41, "learning_rate": 6.067631983150665e-07, "loss": 2.3045, "step": 2834 }, { "epoch": 6.41, "learning_rate": 6.054160600397336e-07, "loss": 2.4791, "step": 2836 }, { "epoch": 6.42, "learning_rate": 6.04069769366643e-07, "loss": 2.2783, "step": 2838 }, { "epoch": 6.42, "learning_rate": 6.027243291877495e-07, "loss": 2.3579, "step": 2840 }, { "epoch": 6.43, "learning_rate": 6.013797423931813e-07, "loss": 2.3617, "step": 2842 }, { "epoch": 6.43, "learning_rate": 6.000360118712336e-07, "loss": 2.3048, "step": 2844 }, { "epoch": 6.44, "learning_rate": 5.986931405083611e-07, "loss": 2.4872, "step": 2846 }, { "epoch": 6.44, "learning_rate": 5.973511311891747e-07, "loss": 2.2881, "step": 2848 }, { "epoch": 6.45, "learning_rate": 5.960099867964319e-07, "loss": 2.4967, "step": 2850 }, { "epoch": 6.45, "learning_rate": 5.94669710211034e-07, "loss": 2.6076, "step": 2852 }, { "epoch": 6.46, "learning_rate": 5.933303043120162e-07, "loss": 2.1849, "step": 2854 }, { "epoch": 6.46, "learning_rate": 5.919917719765446e-07, "loss": 2.2264, "step": 2856 }, { "epoch": 6.46, "learning_rate": 5.906541160799092e-07, "loss": 2.6351, "step": 2858 }, { "epoch": 6.47, "learning_rate": 5.893173394955158e-07, "loss": 2.2375, "step": 2860 }, { "epoch": 6.47, "learning_rate": 5.879814450948826e-07, "loss": 2.337, "step": 2862 }, { "epoch": 6.48, "learning_rate": 5.866464357476324e-07, "loss": 2.5032, "step": 2864 }, { "epoch": 6.48, "learning_rate": 5.853123143214876e-07, "loss": 2.6742, "step": 2866 }, { "epoch": 6.49, "learning_rate": 5.839790836822614e-07, "loss": 2.2436, "step": 2868 }, { "epoch": 6.49, "learning_rate": 5.826467466938553e-07, "loss": 2.2489, "step": 2870 }, { "epoch": 6.5, "learning_rate": 5.813153062182504e-07, "loss": 2.4944, "step": 2872 }, { "epoch": 6.5, "learning_rate": 5.799847651155022e-07, "loss": 2.242, "step": 2874 }, { "epoch": 6.5, "learning_rate": 5.786551262437341e-07, "loss": 2.2145, "step": 2876 }, { "epoch": 6.51, "learning_rate": 5.773263924591309e-07, "loss": 2.3368, "step": 2878 }, { "epoch": 6.51, "learning_rate": 5.759985666159348e-07, "loss": 2.3857, "step": 2880 }, { "epoch": 6.52, "learning_rate": 5.746716515664363e-07, "loss": 2.5574, "step": 2882 }, { "epoch": 6.52, "learning_rate": 5.733456501609697e-07, "loss": 2.6827, "step": 2884 }, { "epoch": 6.53, "learning_rate": 5.720205652479067e-07, "loss": 2.3012, "step": 2886 }, { "epoch": 6.53, "learning_rate": 5.706963996736504e-07, "loss": 2.4365, "step": 2888 }, { "epoch": 6.54, "learning_rate": 5.693731562826288e-07, "loss": 2.0728, "step": 2890 }, { "epoch": 6.54, "learning_rate": 5.6805083791729e-07, "loss": 2.3692, "step": 2892 }, { "epoch": 6.55, "learning_rate": 5.667294474180937e-07, "loss": 2.4155, "step": 2894 }, { "epoch": 6.55, "learning_rate": 5.654089876235073e-07, "loss": 2.1968, "step": 2896 }, { "epoch": 6.55, "learning_rate": 5.640894613699982e-07, "loss": 2.0421, "step": 2898 }, { "epoch": 6.56, "learning_rate": 5.6277087149203e-07, "loss": 2.4892, "step": 2900 }, { "epoch": 6.56, "learning_rate": 5.614532208220528e-07, "loss": 2.2188, "step": 2902 }, { "epoch": 6.57, "learning_rate": 5.601365121905007e-07, "loss": 2.3704, "step": 2904 }, { "epoch": 6.57, "learning_rate": 5.588207484257839e-07, "loss": 2.3284, "step": 2906 }, { "epoch": 6.58, "learning_rate": 5.575059323542825e-07, "loss": 2.3581, "step": 2908 }, { "epoch": 6.58, "learning_rate": 5.561920668003413e-07, "loss": 2.2069, "step": 2910 }, { "epoch": 6.59, "learning_rate": 5.548791545862632e-07, "loss": 2.377, "step": 2912 }, { "epoch": 6.59, "learning_rate": 5.535671985323034e-07, "loss": 2.552, "step": 2914 }, { "epoch": 6.6, "learning_rate": 5.522562014566628e-07, "loss": 2.4803, "step": 2916 }, { "epoch": 6.6, "learning_rate": 5.509461661754822e-07, "loss": 2.2848, "step": 2918 }, { "epoch": 6.6, "learning_rate": 5.496370955028379e-07, "loss": 2.3721, "step": 2920 }, { "epoch": 6.61, "learning_rate": 5.483289922507312e-07, "loss": 2.2689, "step": 2922 }, { "epoch": 6.61, "learning_rate": 5.470218592290883e-07, "loss": 2.4126, "step": 2924 }, { "epoch": 6.62, "learning_rate": 5.457156992457493e-07, "loss": 2.4723, "step": 2926 }, { "epoch": 6.62, "learning_rate": 5.444105151064657e-07, "loss": 2.5204, "step": 2928 }, { "epoch": 6.63, "learning_rate": 5.431063096148908e-07, "loss": 2.2726, "step": 2930 }, { "epoch": 6.63, "learning_rate": 5.418030855725776e-07, "loss": 2.3093, "step": 2932 }, { "epoch": 6.64, "learning_rate": 5.405008457789697e-07, "loss": 2.5789, "step": 2934 }, { "epoch": 6.64, "learning_rate": 5.391995930313969e-07, "loss": 2.496, "step": 2936 }, { "epoch": 6.65, "learning_rate": 5.378993301250682e-07, "loss": 2.2452, "step": 2938 }, { "epoch": 6.65, "learning_rate": 5.366000598530674e-07, "loss": 2.3148, "step": 2940 }, { "epoch": 6.65, "learning_rate": 5.353017850063452e-07, "loss": 2.7162, "step": 2942 }, { "epoch": 6.66, "learning_rate": 5.340045083737142e-07, "loss": 2.4087, "step": 2944 }, { "epoch": 6.66, "learning_rate": 5.327082327418419e-07, "loss": 2.2211, "step": 2946 }, { "epoch": 6.67, "learning_rate": 5.314129608952484e-07, "loss": 2.0549, "step": 2948 }, { "epoch": 6.67, "learning_rate": 5.301186956162935e-07, "loss": 2.5625, "step": 2950 }, { "epoch": 6.68, "learning_rate": 5.288254396851782e-07, "loss": 2.2932, "step": 2952 }, { "epoch": 6.68, "learning_rate": 5.275331958799335e-07, "loss": 2.3089, "step": 2954 }, { "epoch": 6.69, "learning_rate": 5.262419669764171e-07, "loss": 2.4793, "step": 2956 }, { "epoch": 6.69, "learning_rate": 5.24951755748306e-07, "loss": 2.2383, "step": 2958 }, { "epoch": 6.69, "learning_rate": 5.236625649670913e-07, "loss": 2.4058, "step": 2960 }, { "epoch": 6.7, "learning_rate": 5.223743974020727e-07, "loss": 2.4452, "step": 2962 }, { "epoch": 6.7, "learning_rate": 5.210872558203514e-07, "loss": 2.232, "step": 2964 }, { "epoch": 6.71, "learning_rate": 5.198011429868245e-07, "loss": 2.6852, "step": 2966 }, { "epoch": 6.71, "learning_rate": 5.185160616641793e-07, "loss": 2.0647, "step": 2968 }, { "epoch": 6.72, "learning_rate": 5.172320146128884e-07, "loss": 2.5407, "step": 2970 }, { "epoch": 6.72, "learning_rate": 5.159490045912006e-07, "loss": 2.313, "step": 2972 }, { "epoch": 6.73, "learning_rate": 5.146670343551394e-07, "loss": 2.291, "step": 2974 }, { "epoch": 6.73, "learning_rate": 5.133861066584929e-07, "loss": 2.2558, "step": 2976 }, { "epoch": 6.74, "learning_rate": 5.121062242528109e-07, "loss": 2.6145, "step": 2978 }, { "epoch": 6.74, "learning_rate": 5.108273898873967e-07, "loss": 2.4894, "step": 2980 }, { "epoch": 6.74, "learning_rate": 5.095496063093044e-07, "loss": 2.5229, "step": 2982 }, { "epoch": 6.75, "learning_rate": 5.082728762633278e-07, "loss": 2.1288, "step": 2984 }, { "epoch": 6.75, "learning_rate": 5.069972024920003e-07, "loss": 2.467, "step": 2986 }, { "epoch": 6.76, "learning_rate": 5.057225877355851e-07, "loss": 2.5339, "step": 2988 }, { "epoch": 6.76, "learning_rate": 5.044490347320715e-07, "loss": 2.123, "step": 2990 }, { "epoch": 6.77, "learning_rate": 5.031765462171659e-07, "loss": 2.5362, "step": 2992 }, { "epoch": 6.77, "learning_rate": 5.019051249242907e-07, "loss": 2.3589, "step": 2994 }, { "epoch": 6.78, "learning_rate": 5.006347735845744e-07, "loss": 2.612, "step": 2996 }, { "epoch": 6.78, "learning_rate": 4.993654949268472e-07, "loss": 2.2357, "step": 2998 }, { "epoch": 6.79, "learning_rate": 4.980972916776349e-07, "loss": 2.0088, "step": 3000 }, { "epoch": 6.79, "learning_rate": 4.968301665611545e-07, "loss": 2.4529, "step": 3002 }, { "epoch": 6.79, "learning_rate": 4.955641222993057e-07, "loss": 2.352, "step": 3004 }, { "epoch": 6.8, "learning_rate": 4.942991616116667e-07, "loss": 2.2461, "step": 3006 }, { "epoch": 6.8, "learning_rate": 4.930352872154881e-07, "loss": 2.4761, "step": 3008 }, { "epoch": 6.81, "learning_rate": 4.917725018256885e-07, "loss": 2.3513, "step": 3010 }, { "epoch": 6.81, "learning_rate": 4.905108081548441e-07, "loss": 2.0589, "step": 3012 }, { "epoch": 6.82, "learning_rate": 4.892502089131891e-07, "loss": 2.3859, "step": 3014 }, { "epoch": 6.82, "learning_rate": 4.879907068086047e-07, "loss": 2.4326, "step": 3016 }, { "epoch": 6.83, "learning_rate": 4.867323045466173e-07, "loss": 2.2585, "step": 3018 }, { "epoch": 6.83, "learning_rate": 4.854750048303881e-07, "loss": 2.2026, "step": 3020 }, { "epoch": 6.84, "learning_rate": 4.842188103607126e-07, "loss": 2.4944, "step": 3022 }, { "epoch": 6.84, "learning_rate": 4.829637238360103e-07, "loss": 2.3826, "step": 3024 }, { "epoch": 6.84, "learning_rate": 4.817097479523214e-07, "loss": 2.4484, "step": 3026 }, { "epoch": 6.85, "learning_rate": 4.804568854033003e-07, "loss": 2.1429, "step": 3028 }, { "epoch": 6.85, "learning_rate": 4.792051388802103e-07, "loss": 2.5137, "step": 3030 }, { "epoch": 6.86, "learning_rate": 4.779545110719167e-07, "loss": 2.6513, "step": 3032 }, { "epoch": 6.86, "learning_rate": 4.767050046648818e-07, "loss": 2.4917, "step": 3034 }, { "epoch": 6.87, "learning_rate": 4.7545662234315933e-07, "loss": 2.3492, "step": 3036 }, { "epoch": 6.87, "learning_rate": 4.742093667883881e-07, "loss": 2.1966, "step": 3038 }, { "epoch": 6.88, "learning_rate": 4.729632406797868e-07, "loss": 2.5651, "step": 3040 }, { "epoch": 6.88, "learning_rate": 4.717182466941472e-07, "loss": 2.2645, "step": 3042 }, { "epoch": 6.88, "learning_rate": 4.704743875058307e-07, "loss": 2.2814, "step": 3044 }, { "epoch": 6.89, "learning_rate": 4.6923166578675976e-07, "loss": 2.247, "step": 3046 }, { "epoch": 6.89, "learning_rate": 4.679900842064137e-07, "loss": 2.3914, "step": 3048 }, { "epoch": 6.9, "learning_rate": 4.667496454318225e-07, "loss": 2.5838, "step": 3050 }, { "epoch": 6.9, "learning_rate": 4.6551035212756284e-07, "loss": 2.348, "step": 3052 }, { "epoch": 6.91, "learning_rate": 4.642722069557481e-07, "loss": 2.4168, "step": 3054 }, { "epoch": 6.91, "learning_rate": 4.63035212576028e-07, "loss": 2.3571, "step": 3056 }, { "epoch": 6.92, "learning_rate": 4.6179937164557824e-07, "loss": 2.1961, "step": 3058 }, { "epoch": 6.92, "learning_rate": 4.60564686819099e-07, "loss": 2.2318, "step": 3060 }, { "epoch": 6.93, "learning_rate": 4.593311607488042e-07, "loss": 2.3089, "step": 3062 }, { "epoch": 6.93, "learning_rate": 4.5809879608442127e-07, "loss": 2.32, "step": 3064 }, { "epoch": 6.93, "learning_rate": 4.5686759547318145e-07, "loss": 2.3229, "step": 3066 }, { "epoch": 6.94, "learning_rate": 4.556375615598157e-07, "loss": 2.1972, "step": 3068 }, { "epoch": 6.94, "learning_rate": 4.5440869698654847e-07, "loss": 2.0506, "step": 3070 }, { "epoch": 6.95, "learning_rate": 4.5318100439309383e-07, "loss": 2.4581, "step": 3072 }, { "epoch": 6.95, "learning_rate": 4.519544864166467e-07, "loss": 2.428, "step": 3074 }, { "epoch": 6.96, "learning_rate": 4.5072914569187936e-07, "loss": 2.3864, "step": 3076 }, { "epoch": 6.96, "learning_rate": 4.4950498485093504e-07, "loss": 2.2819, "step": 3078 }, { "epoch": 6.97, "learning_rate": 4.4828200652342376e-07, "loss": 2.3175, "step": 3080 }, { "epoch": 6.97, "learning_rate": 4.4706021333641286e-07, "loss": 2.4947, "step": 3082 }, { "epoch": 6.98, "learning_rate": 4.458396079144267e-07, "loss": 2.6006, "step": 3084 }, { "epoch": 6.98, "learning_rate": 4.4462019287943633e-07, "loss": 2.1521, "step": 3086 }, { "epoch": 6.98, "learning_rate": 4.434019708508563e-07, "loss": 2.1405, "step": 3088 }, { "epoch": 6.99, "learning_rate": 4.421849444455382e-07, "loss": 2.3048, "step": 3090 }, { "epoch": 6.99, "learning_rate": 4.4096911627776633e-07, "loss": 2.2149, "step": 3092 }, { "epoch": 7.0, "learning_rate": 4.397544889592499e-07, "loss": 2.2301, "step": 3094 }, { "epoch": 7.0, "learning_rate": 4.38541065099119e-07, "loss": 2.2929, "step": 3096 }, { "epoch": 7.01, "learning_rate": 4.373288473039185e-07, "loss": 2.5705, "step": 3098 }, { "epoch": 7.01, "learning_rate": 4.361178381776034e-07, "loss": 2.1664, "step": 3100 }, { "epoch": 7.02, "learning_rate": 4.3490804032153027e-07, "loss": 2.2688, "step": 3102 }, { "epoch": 7.02, "learning_rate": 4.3369945633445626e-07, "loss": 2.391, "step": 3104 }, { "epoch": 7.03, "learning_rate": 4.324920888125294e-07, "loss": 2.2777, "step": 3106 }, { "epoch": 7.03, "learning_rate": 4.3128594034928514e-07, "loss": 2.0721, "step": 3108 }, { "epoch": 7.03, "learning_rate": 4.300810135356397e-07, "loss": 2.3379, "step": 3110 }, { "epoch": 7.04, "learning_rate": 4.2887731095988664e-07, "loss": 2.2214, "step": 3112 }, { "epoch": 7.04, "learning_rate": 4.2767483520768833e-07, "loss": 2.6695, "step": 3114 }, { "epoch": 7.05, "learning_rate": 4.26473588862072e-07, "loss": 2.3539, "step": 3116 }, { "epoch": 7.05, "learning_rate": 4.252735745034243e-07, "loss": 2.3112, "step": 3118 }, { "epoch": 7.06, "learning_rate": 4.2407479470948495e-07, "loss": 2.1182, "step": 3120 }, { "epoch": 7.06, "learning_rate": 4.22877252055343e-07, "loss": 2.1283, "step": 3122 }, { "epoch": 7.07, "learning_rate": 4.2168094911342867e-07, "loss": 2.2434, "step": 3124 }, { "epoch": 7.07, "learning_rate": 4.2048588845350974e-07, "loss": 2.3956, "step": 3126 }, { "epoch": 7.07, "learning_rate": 4.192920726426853e-07, "loss": 2.2923, "step": 3128 }, { "epoch": 7.08, "learning_rate": 4.1809950424538043e-07, "loss": 2.4565, "step": 3130 }, { "epoch": 7.08, "learning_rate": 4.169081858233404e-07, "loss": 2.3932, "step": 3132 }, { "epoch": 7.09, "learning_rate": 4.157181199356262e-07, "loss": 2.1525, "step": 3134 }, { "epoch": 7.09, "learning_rate": 4.145293091386076e-07, "loss": 2.1397, "step": 3136 }, { "epoch": 7.1, "learning_rate": 4.1334175598595845e-07, "loss": 2.2545, "step": 3138 }, { "epoch": 7.1, "learning_rate": 4.121554630286506e-07, "loss": 2.7604, "step": 3140 }, { "epoch": 7.11, "learning_rate": 4.109704328149505e-07, "loss": 2.1315, "step": 3142 }, { "epoch": 7.11, "learning_rate": 4.097866678904096e-07, "loss": 2.3746, "step": 3144 }, { "epoch": 7.12, "learning_rate": 4.086041707978638e-07, "loss": 2.262, "step": 3146 }, { "epoch": 7.12, "learning_rate": 4.0742294407742386e-07, "loss": 2.5298, "step": 3148 }, { "epoch": 7.12, "learning_rate": 4.0624299026647335e-07, "loss": 2.035, "step": 3150 }, { "epoch": 7.13, "learning_rate": 4.050643118996593e-07, "loss": 2.2675, "step": 3152 }, { "epoch": 7.13, "learning_rate": 4.038869115088911e-07, "loss": 2.2519, "step": 3154 }, { "epoch": 7.14, "learning_rate": 4.0271079162333175e-07, "loss": 2.474, "step": 3156 }, { "epoch": 7.14, "learning_rate": 4.015359547693938e-07, "loss": 2.5323, "step": 3158 }, { "epoch": 7.15, "learning_rate": 4.003624034707336e-07, "loss": 2.2379, "step": 3160 }, { "epoch": 7.15, "learning_rate": 3.991901402482468e-07, "loss": 2.2635, "step": 3162 }, { "epoch": 7.16, "learning_rate": 3.9801916762006137e-07, "loss": 2.6183, "step": 3164 }, { "epoch": 7.16, "learning_rate": 3.9684948810153305e-07, "loss": 2.5431, "step": 3166 }, { "epoch": 7.17, "learning_rate": 3.9568110420523957e-07, "loss": 2.3616, "step": 3168 }, { "epoch": 7.17, "learning_rate": 3.945140184409772e-07, "loss": 2.2793, "step": 3170 }, { "epoch": 7.17, "learning_rate": 3.9334823331575086e-07, "loss": 2.224, "step": 3172 }, { "epoch": 7.18, "learning_rate": 3.9218375133377445e-07, "loss": 2.3404, "step": 3174 }, { "epoch": 7.18, "learning_rate": 3.910205749964608e-07, "loss": 2.4168, "step": 3176 }, { "epoch": 7.19, "learning_rate": 3.898587068024186e-07, "loss": 2.3829, "step": 3178 }, { "epoch": 7.19, "learning_rate": 3.8869814924744637e-07, "loss": 2.4255, "step": 3180 }, { "epoch": 7.2, "learning_rate": 3.875389048245281e-07, "loss": 2.2857, "step": 3182 }, { "epoch": 7.2, "learning_rate": 3.86380976023826e-07, "loss": 2.5262, "step": 3184 }, { "epoch": 7.21, "learning_rate": 3.8522436533267654e-07, "loss": 2.2654, "step": 3186 }, { "epoch": 7.21, "learning_rate": 3.8406907523558496e-07, "loss": 2.2859, "step": 3188 }, { "epoch": 7.22, "learning_rate": 3.829151082142196e-07, "loss": 2.5575, "step": 3190 }, { "epoch": 7.22, "learning_rate": 3.817624667474062e-07, "loss": 2.1387, "step": 3192 }, { "epoch": 7.22, "learning_rate": 3.806111533111245e-07, "loss": 2.1788, "step": 3194 }, { "epoch": 7.23, "learning_rate": 3.794611703785002e-07, "loss": 2.3718, "step": 3196 }, { "epoch": 7.23, "learning_rate": 3.7831252041980143e-07, "loss": 2.4759, "step": 3198 }, { "epoch": 7.24, "learning_rate": 3.771652059024328e-07, "loss": 2.2483, "step": 3200 }, { "epoch": 7.24, "learning_rate": 3.760192292909303e-07, "loss": 2.1328, "step": 3202 }, { "epoch": 7.25, "learning_rate": 3.7487459304695655e-07, "loss": 2.521, "step": 3204 }, { "epoch": 7.25, "learning_rate": 3.737312996292941e-07, "loss": 2.1955, "step": 3206 }, { "epoch": 7.26, "learning_rate": 3.725893514938414e-07, "loss": 2.2915, "step": 3208 }, { "epoch": 7.26, "learning_rate": 3.7144875109360675e-07, "loss": 2.4738, "step": 3210 }, { "epoch": 7.26, "learning_rate": 3.703095008787047e-07, "loss": 2.2094, "step": 3212 }, { "epoch": 7.27, "learning_rate": 3.6917160329634714e-07, "loss": 2.3182, "step": 3214 }, { "epoch": 7.27, "learning_rate": 3.6803506079084256e-07, "loss": 2.1551, "step": 3216 }, { "epoch": 7.28, "learning_rate": 3.6689987580358773e-07, "loss": 2.1328, "step": 3218 }, { "epoch": 7.28, "learning_rate": 3.657660507730631e-07, "loss": 2.5026, "step": 3220 }, { "epoch": 7.29, "learning_rate": 3.6463358813482805e-07, "loss": 2.1657, "step": 3222 }, { "epoch": 7.29, "learning_rate": 3.6350249032151594e-07, "loss": 2.2007, "step": 3224 }, { "epoch": 7.3, "learning_rate": 3.6237275976282753e-07, "loss": 2.4264, "step": 3226 }, { "epoch": 7.3, "learning_rate": 3.6124439888552725e-07, "loss": 2.4743, "step": 3228 }, { "epoch": 7.31, "learning_rate": 3.6011741011343645e-07, "loss": 2.1036, "step": 3230 }, { "epoch": 7.31, "learning_rate": 3.589917958674307e-07, "loss": 2.572, "step": 3232 }, { "epoch": 7.31, "learning_rate": 3.578675585654305e-07, "loss": 2.0887, "step": 3234 }, { "epoch": 7.32, "learning_rate": 3.5674470062240125e-07, "loss": 2.4678, "step": 3236 }, { "epoch": 7.32, "learning_rate": 3.556232244503432e-07, "loss": 2.4499, "step": 3238 }, { "epoch": 7.33, "learning_rate": 3.5450313245829034e-07, "loss": 2.3848, "step": 3240 }, { "epoch": 7.33, "learning_rate": 3.5338442705230097e-07, "loss": 2.1436, "step": 3242 }, { "epoch": 7.34, "learning_rate": 3.522671106354572e-07, "loss": 2.5811, "step": 3244 }, { "epoch": 7.34, "learning_rate": 3.5115118560785607e-07, "loss": 2.5358, "step": 3246 }, { "epoch": 7.35, "learning_rate": 3.5003665436660636e-07, "loss": 2.4689, "step": 3248 }, { "epoch": 7.35, "learning_rate": 3.489235193058222e-07, "loss": 2.2282, "step": 3250 }, { "epoch": 7.36, "learning_rate": 3.478117828166197e-07, "loss": 2.2035, "step": 3252 }, { "epoch": 7.36, "learning_rate": 3.467014472871099e-07, "loss": 2.1824, "step": 3254 }, { "epoch": 7.36, "learning_rate": 3.455925151023946e-07, "loss": 2.3074, "step": 3256 }, { "epoch": 7.37, "learning_rate": 3.444849886445612e-07, "loss": 2.1646, "step": 3258 }, { "epoch": 7.37, "learning_rate": 3.433788702926772e-07, "loss": 2.2208, "step": 3260 }, { "epoch": 7.38, "learning_rate": 3.422741624227855e-07, "loss": 2.397, "step": 3262 }, { "epoch": 7.38, "learning_rate": 3.4117086740789967e-07, "loss": 2.2592, "step": 3264 }, { "epoch": 7.39, "learning_rate": 3.4006898761799776e-07, "loss": 2.4963, "step": 3266 }, { "epoch": 7.39, "learning_rate": 3.3896852542001777e-07, "loss": 2.5038, "step": 3268 }, { "epoch": 7.4, "learning_rate": 3.378694831778527e-07, "loss": 2.4851, "step": 3270 }, { "epoch": 7.4, "learning_rate": 3.367718632523452e-07, "loss": 2.43, "step": 3272 }, { "epoch": 7.41, "learning_rate": 3.356756680012833e-07, "loss": 2.5678, "step": 3274 }, { "epoch": 7.41, "learning_rate": 3.345808997793942e-07, "loss": 2.1539, "step": 3276 }, { "epoch": 7.41, "learning_rate": 3.3348756093833965e-07, "loss": 2.1828, "step": 3278 }, { "epoch": 7.42, "learning_rate": 3.3239565382671087e-07, "loss": 2.4646, "step": 3280 }, { "epoch": 7.42, "learning_rate": 3.313051807900239e-07, "loss": 2.1353, "step": 3282 }, { "epoch": 7.43, "learning_rate": 3.30216144170714e-07, "loss": 2.4583, "step": 3284 }, { "epoch": 7.43, "learning_rate": 3.2912854630813137e-07, "loss": 2.2403, "step": 3286 }, { "epoch": 7.44, "learning_rate": 3.2804238953853524e-07, "loss": 2.5755, "step": 3288 }, { "epoch": 7.44, "learning_rate": 3.2695767619508905e-07, "loss": 2.4918, "step": 3290 }, { "epoch": 7.45, "learning_rate": 3.2587440860785567e-07, "loss": 2.4181, "step": 3292 }, { "epoch": 7.45, "learning_rate": 3.2479258910379347e-07, "loss": 2.1952, "step": 3294 }, { "epoch": 7.45, "learning_rate": 3.237122200067479e-07, "loss": 2.3317, "step": 3296 }, { "epoch": 7.46, "learning_rate": 3.2263330363745136e-07, "loss": 2.0153, "step": 3298 }, { "epoch": 7.46, "learning_rate": 3.215558423135136e-07, "loss": 2.2844, "step": 3300 }, { "epoch": 7.47, "learning_rate": 3.2047983834942085e-07, "loss": 2.4493, "step": 3302 }, { "epoch": 7.47, "learning_rate": 3.1940529405652627e-07, "loss": 2.4057, "step": 3304 }, { "epoch": 7.48, "learning_rate": 3.183322117430497e-07, "loss": 2.1862, "step": 3306 }, { "epoch": 7.48, "learning_rate": 3.1726059371406953e-07, "loss": 2.6167, "step": 3308 }, { "epoch": 7.49, "learning_rate": 3.1619044227151894e-07, "loss": 2.4327, "step": 3310 }, { "epoch": 7.49, "learning_rate": 3.151217597141802e-07, "loss": 2.264, "step": 3312 }, { "epoch": 7.5, "learning_rate": 3.140545483376815e-07, "loss": 2.1945, "step": 3314 }, { "epoch": 7.5, "learning_rate": 3.1298881043448976e-07, "loss": 2.5421, "step": 3316 }, { "epoch": 7.5, "learning_rate": 3.11924548293907e-07, "loss": 2.1653, "step": 3318 }, { "epoch": 7.51, "learning_rate": 3.108617642020651e-07, "loss": 2.3909, "step": 3320 }, { "epoch": 7.51, "learning_rate": 3.0980046044192187e-07, "loss": 2.5704, "step": 3322 }, { "epoch": 7.52, "learning_rate": 3.087406392932532e-07, "loss": 2.3593, "step": 3324 }, { "epoch": 7.52, "learning_rate": 3.0768230303265254e-07, "loss": 2.1842, "step": 3326 }, { "epoch": 7.53, "learning_rate": 3.066254539335218e-07, "loss": 2.2524, "step": 3328 }, { "epoch": 7.53, "learning_rate": 3.0557009426607007e-07, "loss": 2.4896, "step": 3330 }, { "epoch": 7.54, "learning_rate": 3.045162262973048e-07, "loss": 2.3859, "step": 3332 }, { "epoch": 7.54, "learning_rate": 3.0346385229103125e-07, "loss": 2.2594, "step": 3334 }, { "epoch": 7.55, "learning_rate": 3.0241297450784443e-07, "loss": 2.3181, "step": 3336 }, { "epoch": 7.55, "learning_rate": 3.0136359520512545e-07, "loss": 2.2603, "step": 3338 }, { "epoch": 7.55, "learning_rate": 3.003157166370367e-07, "loss": 2.2832, "step": 3340 }, { "epoch": 7.56, "learning_rate": 2.992693410545165e-07, "loss": 2.1501, "step": 3342 }, { "epoch": 7.56, "learning_rate": 2.9822447070527547e-07, "loss": 2.5098, "step": 3344 }, { "epoch": 7.57, "learning_rate": 2.971811078337899e-07, "loss": 2.3524, "step": 3346 }, { "epoch": 7.57, "learning_rate": 2.961392546812983e-07, "loss": 2.5895, "step": 3348 }, { "epoch": 7.58, "learning_rate": 2.950989134857963e-07, "loss": 2.3269, "step": 3350 }, { "epoch": 7.58, "learning_rate": 2.940600864820314e-07, "loss": 2.4572, "step": 3352 }, { "epoch": 7.59, "learning_rate": 2.9302277590149835e-07, "loss": 2.3999, "step": 3354 }, { "epoch": 7.59, "learning_rate": 2.9198698397243535e-07, "loss": 2.4635, "step": 3356 }, { "epoch": 7.6, "learning_rate": 2.9095271291981747e-07, "loss": 2.3415, "step": 3358 }, { "epoch": 7.6, "learning_rate": 2.899199649653532e-07, "loss": 2.3258, "step": 3360 }, { "epoch": 7.6, "learning_rate": 2.8888874232747896e-07, "loss": 2.227, "step": 3362 }, { "epoch": 7.61, "learning_rate": 2.8785904722135567e-07, "loss": 2.3294, "step": 3364 }, { "epoch": 7.61, "learning_rate": 2.868308818588611e-07, "loss": 2.5475, "step": 3366 }, { "epoch": 7.62, "learning_rate": 2.8580424844858884e-07, "loss": 2.5204, "step": 3368 }, { "epoch": 7.62, "learning_rate": 2.8477914919584076e-07, "loss": 2.2726, "step": 3370 }, { "epoch": 7.63, "learning_rate": 2.837555863026236e-07, "loss": 2.2483, "step": 3372 }, { "epoch": 7.63, "learning_rate": 2.8273356196764307e-07, "loss": 2.3288, "step": 3374 }, { "epoch": 7.64, "learning_rate": 2.8171307838630144e-07, "loss": 2.4139, "step": 3376 }, { "epoch": 7.64, "learning_rate": 2.8069413775068986e-07, "loss": 2.4272, "step": 3378 }, { "epoch": 7.64, "learning_rate": 2.796767422495856e-07, "loss": 2.1728, "step": 3380 }, { "epoch": 7.65, "learning_rate": 2.786608940684467e-07, "loss": 2.1175, "step": 3382 }, { "epoch": 7.65, "learning_rate": 2.776465953894085e-07, "loss": 2.3987, "step": 3384 }, { "epoch": 7.66, "learning_rate": 2.766338483912757e-07, "loss": 2.4435, "step": 3386 }, { "epoch": 7.66, "learning_rate": 2.756226552495219e-07, "loss": 2.2848, "step": 3388 }, { "epoch": 7.67, "learning_rate": 2.7461301813628155e-07, "loss": 2.3723, "step": 3390 }, { "epoch": 7.67, "learning_rate": 2.73604939220348e-07, "loss": 2.3791, "step": 3392 }, { "epoch": 7.68, "learning_rate": 2.7259842066716543e-07, "loss": 2.4471, "step": 3394 }, { "epoch": 7.68, "learning_rate": 2.7159346463882826e-07, "loss": 2.2133, "step": 3396 }, { "epoch": 7.69, "learning_rate": 2.7059007329407326e-07, "loss": 2.3595, "step": 3398 }, { "epoch": 7.69, "learning_rate": 2.6958824878827635e-07, "loss": 2.3127, "step": 3400 }, { "epoch": 7.69, "learning_rate": 2.685879932734476e-07, "loss": 2.3173, "step": 3402 }, { "epoch": 7.7, "learning_rate": 2.675893088982275e-07, "loss": 2.1802, "step": 3404 }, { "epoch": 7.7, "learning_rate": 2.665921978078809e-07, "loss": 2.3848, "step": 3406 }, { "epoch": 7.71, "learning_rate": 2.6559666214429297e-07, "loss": 2.3093, "step": 3408 }, { "epoch": 7.71, "learning_rate": 2.646027040459651e-07, "loss": 2.1086, "step": 3410 }, { "epoch": 7.72, "learning_rate": 2.636103256480102e-07, "loss": 2.4374, "step": 3412 }, { "epoch": 7.72, "learning_rate": 2.626195290821467e-07, "loss": 2.2971, "step": 3414 }, { "epoch": 7.73, "learning_rate": 2.616303164766965e-07, "loss": 2.42, "step": 3416 }, { "epoch": 7.73, "learning_rate": 2.6064268995657823e-07, "loss": 2.3013, "step": 3418 }, { "epoch": 7.74, "learning_rate": 2.5965665164330354e-07, "loss": 2.3163, "step": 3420 }, { "epoch": 7.74, "learning_rate": 2.5867220365497254e-07, "loss": 2.376, "step": 3422 }, { "epoch": 7.74, "learning_rate": 2.5768934810626885e-07, "loss": 2.3108, "step": 3424 }, { "epoch": 7.75, "learning_rate": 2.5670808710845624e-07, "loss": 2.4786, "step": 3426 }, { "epoch": 7.75, "learning_rate": 2.557284227693727e-07, "loss": 2.1118, "step": 3428 }, { "epoch": 7.76, "learning_rate": 2.5475035719342606e-07, "loss": 2.444, "step": 3430 }, { "epoch": 7.76, "learning_rate": 2.5377389248159033e-07, "loss": 2.1952, "step": 3432 }, { "epoch": 7.77, "learning_rate": 2.527990307314013e-07, "loss": 2.283, "step": 3434 }, { "epoch": 7.77, "learning_rate": 2.518257740369497e-07, "loss": 2.3963, "step": 3436 }, { "epoch": 7.78, "learning_rate": 2.5085412448888045e-07, "loss": 2.1796, "step": 3438 }, { "epoch": 7.78, "learning_rate": 2.49884084174385e-07, "loss": 2.3689, "step": 3440 }, { "epoch": 7.79, "learning_rate": 2.489156551771981e-07, "loss": 2.4479, "step": 3442 }, { "epoch": 7.79, "learning_rate": 2.479488395775935e-07, "loss": 2.2417, "step": 3444 }, { "epoch": 7.79, "learning_rate": 2.469836394523794e-07, "loss": 2.4555, "step": 3446 }, { "epoch": 7.8, "learning_rate": 2.460200568748936e-07, "loss": 2.7688, "step": 3448 }, { "epoch": 7.8, "learning_rate": 2.45058093914999e-07, "loss": 2.7413, "step": 3450 }, { "epoch": 7.81, "learning_rate": 2.440977526390795e-07, "loss": 2.3979, "step": 3452 }, { "epoch": 7.81, "learning_rate": 2.431390351100364e-07, "loss": 2.3415, "step": 3454 }, { "epoch": 7.82, "learning_rate": 2.421819433872813e-07, "loss": 2.6412, "step": 3456 }, { "epoch": 7.82, "learning_rate": 2.4122647952673504e-07, "loss": 2.4298, "step": 3458 }, { "epoch": 7.83, "learning_rate": 2.4027264558082085e-07, "loss": 2.186, "step": 3460 }, { "epoch": 7.83, "learning_rate": 2.393204435984608e-07, "loss": 2.3957, "step": 3462 }, { "epoch": 7.83, "learning_rate": 2.3836987562507138e-07, "loss": 2.5346, "step": 3464 }, { "epoch": 7.84, "learning_rate": 2.3742094370255972e-07, "loss": 2.325, "step": 3466 }, { "epoch": 7.84, "learning_rate": 2.364736498693175e-07, "loss": 2.4938, "step": 3468 }, { "epoch": 7.85, "learning_rate": 2.3552799616021845e-07, "loss": 2.4997, "step": 3470 }, { "epoch": 7.85, "learning_rate": 2.3458398460661233e-07, "loss": 2.3212, "step": 3472 }, { "epoch": 7.86, "learning_rate": 2.3364161723632302e-07, "loss": 2.3675, "step": 3474 }, { "epoch": 7.86, "learning_rate": 2.3270089607364018e-07, "loss": 2.1652, "step": 3476 }, { "epoch": 7.87, "learning_rate": 2.3176182313931948e-07, "loss": 2.4536, "step": 3478 }, { "epoch": 7.87, "learning_rate": 2.308244004505745e-07, "loss": 2.2274, "step": 3480 }, { "epoch": 7.88, "learning_rate": 2.2988863002107538e-07, "loss": 2.4275, "step": 3482 }, { "epoch": 7.88, "learning_rate": 2.2895451386094124e-07, "loss": 2.1462, "step": 3484 }, { "epoch": 7.88, "learning_rate": 2.2802205397673935e-07, "loss": 2.4634, "step": 3486 }, { "epoch": 7.89, "learning_rate": 2.270912523714782e-07, "loss": 2.435, "step": 3488 }, { "epoch": 7.89, "learning_rate": 2.2616211104460459e-07, "loss": 2.4373, "step": 3490 }, { "epoch": 7.9, "learning_rate": 2.2523463199199822e-07, "loss": 2.3186, "step": 3492 }, { "epoch": 7.9, "learning_rate": 2.2430881720596927e-07, "loss": 2.44, "step": 3494 }, { "epoch": 7.91, "learning_rate": 2.2338466867525196e-07, "loss": 2.4308, "step": 3496 }, { "epoch": 7.91, "learning_rate": 2.2246218838500143e-07, "loss": 2.2673, "step": 3498 }, { "epoch": 7.92, "learning_rate": 2.2154137831678932e-07, "loss": 2.2352, "step": 3500 }, { "epoch": 7.92, "learning_rate": 2.2062224044859966e-07, "loss": 2.202, "step": 3502 }, { "epoch": 7.93, "learning_rate": 2.1970477675482423e-07, "loss": 2.2827, "step": 3504 }, { "epoch": 7.93, "learning_rate": 2.1878898920625832e-07, "loss": 2.1762, "step": 3506 }, { "epoch": 7.93, "learning_rate": 2.1787487977009765e-07, "loss": 2.474, "step": 3508 }, { "epoch": 7.94, "learning_rate": 2.169624504099321e-07, "loss": 2.2976, "step": 3510 }, { "epoch": 7.94, "learning_rate": 2.1605170308574318e-07, "loss": 2.1453, "step": 3512 }, { "epoch": 7.95, "learning_rate": 2.1514263975389879e-07, "loss": 2.1924, "step": 3514 }, { "epoch": 7.95, "learning_rate": 2.1423526236715052e-07, "loss": 2.2609, "step": 3516 }, { "epoch": 7.96, "learning_rate": 2.1378220651243138e-07, "loss": 2.4962, "step": 3518 }, { "epoch": 7.96, "learning_rate": 2.128773616968108e-07, "loss": 2.186, "step": 3520 }, { "epoch": 7.97, "learning_rate": 2.1197420769230912e-07, "loss": 2.2364, "step": 3522 }, { "epoch": 7.97, "learning_rate": 2.110727464389843e-07, "loss": 2.5511, "step": 3524 }, { "epoch": 7.98, "learning_rate": 2.101729798732561e-07, "loss": 2.1022, "step": 3526 }, { "epoch": 7.98, "learning_rate": 2.0927490992790587e-07, "loss": 2.501, "step": 3528 }, { "epoch": 7.98, "learning_rate": 2.0837853853206934e-07, "loss": 2.3158, "step": 3530 }, { "epoch": 7.99, "learning_rate": 2.0748386761123404e-07, "loss": 2.3638, "step": 3532 }, { "epoch": 7.99, "learning_rate": 2.0659089908723415e-07, "loss": 2.2822, "step": 3534 }, { "epoch": 8.0, "learning_rate": 2.0614505382362125e-07, "loss": 2.4747, "step": 3536 }, { "epoch": 8.0, "learning_rate": 2.0525464249031454e-07, "loss": 2.1788, "step": 3538 }, { "epoch": 8.01, "learning_rate": 2.043659383424222e-07, "loss": 2.0549, "step": 3540 }, { "epoch": 8.01, "learning_rate": 2.0347894328896177e-07, "loss": 2.1212, "step": 3542 }, { "epoch": 8.02, "learning_rate": 2.0259365923527894e-07, "loss": 2.4062, "step": 3544 }, { "epoch": 8.02, "learning_rate": 2.0171008808304547e-07, "loss": 2.3898, "step": 3546 }, { "epoch": 8.02, "learning_rate": 2.0082823173025142e-07, "loss": 2.2393, "step": 3548 }, { "epoch": 8.03, "learning_rate": 1.9994809207120556e-07, "loss": 2.1458, "step": 3550 }, { "epoch": 8.03, "learning_rate": 1.9906967099652771e-07, "loss": 2.3189, "step": 3552 }, { "epoch": 8.04, "learning_rate": 1.981929703931463e-07, "loss": 2.1485, "step": 3554 }, { "epoch": 8.04, "learning_rate": 1.973179921442938e-07, "loss": 2.2943, "step": 3556 }, { "epoch": 8.05, "learning_rate": 1.964447381295038e-07, "loss": 2.2134, "step": 3558 }, { "epoch": 8.05, "learning_rate": 1.9557321022460505e-07, "loss": 2.2823, "step": 3560 }, { "epoch": 8.06, "learning_rate": 1.9470341030171898e-07, "loss": 2.5778, "step": 3562 }, { "epoch": 8.06, "learning_rate": 1.9383534022925484e-07, "loss": 2.3283, "step": 3564 }, { "epoch": 8.07, "learning_rate": 1.929690018719069e-07, "loss": 2.0778, "step": 3566 }, { "epoch": 8.07, "learning_rate": 1.9210439709064785e-07, "loss": 2.3643, "step": 3568 }, { "epoch": 8.07, "learning_rate": 1.9124152774272827e-07, "loss": 2.1915, "step": 3570 }, { "epoch": 8.08, "learning_rate": 1.9038039568166974e-07, "loss": 2.372, "step": 3572 }, { "epoch": 8.08, "learning_rate": 1.8952100275726268e-07, "loss": 2.1317, "step": 3574 }, { "epoch": 8.09, "learning_rate": 1.8866335081556072e-07, "loss": 2.1938, "step": 3576 }, { "epoch": 8.09, "learning_rate": 1.8780744169887918e-07, "loss": 2.3531, "step": 3578 }, { "epoch": 8.1, "learning_rate": 1.8695327724578836e-07, "loss": 2.0558, "step": 3580 }, { "epoch": 8.1, "learning_rate": 1.861008592911113e-07, "loss": 2.1199, "step": 3582 }, { "epoch": 8.11, "learning_rate": 1.8525018966591953e-07, "loss": 2.1674, "step": 3584 }, { "epoch": 8.11, "learning_rate": 1.844012701975286e-07, "loss": 2.5179, "step": 3586 }, { "epoch": 8.12, "learning_rate": 1.8355410270949545e-07, "loss": 2.2588, "step": 3588 }, { "epoch": 8.12, "learning_rate": 1.8270868902161219e-07, "loss": 2.4867, "step": 3590 }, { "epoch": 8.12, "learning_rate": 1.8186503094990512e-07, "loss": 2.5123, "step": 3592 }, { "epoch": 8.13, "learning_rate": 1.810231303066283e-07, "loss": 2.067, "step": 3594 }, { "epoch": 8.13, "learning_rate": 1.8018298890026106e-07, "loss": 2.1489, "step": 3596 }, { "epoch": 8.14, "learning_rate": 1.7934460853550336e-07, "loss": 2.4436, "step": 3598 }, { "epoch": 8.14, "learning_rate": 1.7850799101327318e-07, "loss": 2.6137, "step": 3600 }, { "epoch": 8.15, "learning_rate": 1.7767313813070084e-07, "loss": 2.4861, "step": 3602 }, { "epoch": 8.15, "learning_rate": 1.7684005168112647e-07, "loss": 2.2893, "step": 3604 }, { "epoch": 8.16, "learning_rate": 1.7600873345409517e-07, "loss": 2.3534, "step": 3606 }, { "epoch": 8.16, "learning_rate": 1.7517918523535514e-07, "loss": 2.479, "step": 3608 }, { "epoch": 8.17, "learning_rate": 1.743514088068504e-07, "loss": 2.2013, "step": 3610 }, { "epoch": 8.17, "learning_rate": 1.7352540594672105e-07, "loss": 2.4611, "step": 3612 }, { "epoch": 8.17, "learning_rate": 1.7270117842929577e-07, "loss": 2.2597, "step": 3614 }, { "epoch": 8.18, "learning_rate": 1.7187872802509117e-07, "loss": 2.5705, "step": 3616 }, { "epoch": 8.18, "learning_rate": 1.7105805650080462e-07, "loss": 2.1431, "step": 3618 }, { "epoch": 8.19, "learning_rate": 1.7023916561931405e-07, "loss": 2.3859, "step": 3620 }, { "epoch": 8.19, "learning_rate": 1.6942205713967138e-07, "loss": 2.4055, "step": 3622 }, { "epoch": 8.2, "learning_rate": 1.6860673281710013e-07, "loss": 2.2329, "step": 3624 }, { "epoch": 8.2, "learning_rate": 1.677931944029909e-07, "loss": 2.5085, "step": 3626 }, { "epoch": 8.21, "learning_rate": 1.6698144364489875e-07, "loss": 2.3044, "step": 3628 }, { "epoch": 8.21, "learning_rate": 1.661714822865381e-07, "loss": 2.5598, "step": 3630 }, { "epoch": 8.21, "learning_rate": 1.6536331206777965e-07, "loss": 2.4912, "step": 3632 }, { "epoch": 8.22, "learning_rate": 1.6455693472464638e-07, "loss": 2.1866, "step": 3634 }, { "epoch": 8.22, "learning_rate": 1.6375235198931113e-07, "loss": 2.245, "step": 3636 }, { "epoch": 8.23, "learning_rate": 1.6294956559008976e-07, "loss": 2.3741, "step": 3638 }, { "epoch": 8.23, "learning_rate": 1.6214857725144137e-07, "loss": 2.4793, "step": 3640 }, { "epoch": 8.24, "learning_rate": 1.6134938869396163e-07, "loss": 2.3812, "step": 3642 }, { "epoch": 8.24, "learning_rate": 1.6055200163438032e-07, "loss": 2.203, "step": 3644 }, { "epoch": 8.25, "learning_rate": 1.5975641778555738e-07, "loss": 2.4009, "step": 3646 }, { "epoch": 8.25, "learning_rate": 1.5896263885647964e-07, "loss": 2.2717, "step": 3648 }, { "epoch": 8.26, "learning_rate": 1.581706665522564e-07, "loss": 2.3982, "step": 3650 }, { "epoch": 8.26, "learning_rate": 1.5738050257411628e-07, "loss": 2.2986, "step": 3652 }, { "epoch": 8.26, "learning_rate": 1.5659214861940352e-07, "loss": 2.3759, "step": 3654 }, { "epoch": 8.27, "learning_rate": 1.5580560638157425e-07, "loss": 2.7631, "step": 3656 }, { "epoch": 8.27, "learning_rate": 1.5502087755019245e-07, "loss": 2.4175, "step": 3658 }, { "epoch": 8.28, "learning_rate": 1.542379638109278e-07, "loss": 2.4338, "step": 3660 }, { "epoch": 8.28, "learning_rate": 1.5345686684554994e-07, "loss": 2.4166, "step": 3662 }, { "epoch": 8.29, "learning_rate": 1.5267758833192625e-07, "loss": 2.1653, "step": 3664 }, { "epoch": 8.29, "learning_rate": 1.5190012994401814e-07, "loss": 2.2565, "step": 3666 }, { "epoch": 8.3, "learning_rate": 1.511244933518766e-07, "loss": 2.2848, "step": 3668 }, { "epoch": 8.3, "learning_rate": 1.5035068022164033e-07, "loss": 2.4534, "step": 3670 }, { "epoch": 8.31, "learning_rate": 1.4957869221552998e-07, "loss": 2.4818, "step": 3672 }, { "epoch": 8.31, "learning_rate": 1.4880853099184631e-07, "loss": 2.5356, "step": 3674 }, { "epoch": 8.31, "learning_rate": 1.480401982049655e-07, "loss": 2.2729, "step": 3676 }, { "epoch": 8.32, "learning_rate": 1.4727369550533687e-07, "loss": 2.3927, "step": 3678 }, { "epoch": 8.32, "learning_rate": 1.4650902453947734e-07, "loss": 2.2466, "step": 3680 }, { "epoch": 8.33, "learning_rate": 1.4574618694997032e-07, "loss": 2.5247, "step": 3682 }, { "epoch": 8.33, "learning_rate": 1.4498518437546025e-07, "loss": 2.3951, "step": 3684 }, { "epoch": 8.34, "learning_rate": 1.4422601845065009e-07, "loss": 2.2343, "step": 3686 }, { "epoch": 8.34, "learning_rate": 1.4346869080629698e-07, "loss": 2.3271, "step": 3688 }, { "epoch": 8.35, "learning_rate": 1.427132030692102e-07, "loss": 2.4791, "step": 3690 }, { "epoch": 8.35, "learning_rate": 1.4195955686224604e-07, "loss": 2.3011, "step": 3692 }, { "epoch": 8.36, "learning_rate": 1.4120775380430517e-07, "loss": 2.5667, "step": 3694 }, { "epoch": 8.36, "learning_rate": 1.4045779551032877e-07, "loss": 2.2304, "step": 3696 }, { "epoch": 8.36, "learning_rate": 1.3970968359129632e-07, "loss": 2.2826, "step": 3698 }, { "epoch": 8.37, "learning_rate": 1.3896341965421943e-07, "loss": 2.5231, "step": 3700 }, { "epoch": 8.37, "learning_rate": 1.382190053021416e-07, "loss": 2.0304, "step": 3702 }, { "epoch": 8.38, "learning_rate": 1.3747644213413224e-07, "loss": 2.4519, "step": 3704 }, { "epoch": 8.38, "learning_rate": 1.367357317452854e-07, "loss": 2.3202, "step": 3706 }, { "epoch": 8.39, "learning_rate": 1.3599687572671358e-07, "loss": 2.3184, "step": 3708 }, { "epoch": 8.39, "learning_rate": 1.352598756655474e-07, "loss": 2.0781, "step": 3710 }, { "epoch": 8.4, "learning_rate": 1.3452473314493007e-07, "loss": 2.3513, "step": 3712 }, { "epoch": 8.4, "learning_rate": 1.3379144974401436e-07, "loss": 2.2974, "step": 3714 }, { "epoch": 8.4, "learning_rate": 1.3306002703795983e-07, "loss": 2.2226, "step": 3716 }, { "epoch": 8.41, "learning_rate": 1.3233046659792947e-07, "loss": 2.2723, "step": 3718 }, { "epoch": 8.41, "learning_rate": 1.3160276999108533e-07, "loss": 2.3053, "step": 3720 }, { "epoch": 8.42, "learning_rate": 1.3087693878058591e-07, "loss": 2.4518, "step": 3722 }, { "epoch": 8.42, "learning_rate": 1.3015297452558283e-07, "loss": 2.2635, "step": 3724 }, { "epoch": 8.43, "learning_rate": 1.2943087878121727e-07, "loss": 2.2041, "step": 3726 }, { "epoch": 8.43, "learning_rate": 1.287106530986165e-07, "loss": 2.196, "step": 3728 }, { "epoch": 8.44, "learning_rate": 1.2799229902489117e-07, "loss": 2.3408, "step": 3730 }, { "epoch": 8.44, "learning_rate": 1.2727581810313114e-07, "loss": 2.5223, "step": 3732 }, { "epoch": 8.45, "learning_rate": 1.2656121187240266e-07, "loss": 2.4009, "step": 3734 }, { "epoch": 8.45, "learning_rate": 1.25848481867745e-07, "loss": 2.4088, "step": 3736 }, { "epoch": 8.45, "learning_rate": 1.2513762962016695e-07, "loss": 2.3094, "step": 3738 }, { "epoch": 8.46, "learning_rate": 1.2442865665664414e-07, "loss": 2.354, "step": 3740 }, { "epoch": 8.46, "learning_rate": 1.2372156450011518e-07, "loss": 2.5011, "step": 3742 }, { "epoch": 8.47, "learning_rate": 1.2301635466947812e-07, "loss": 2.5396, "step": 3744 }, { "epoch": 8.47, "learning_rate": 1.2231302867958805e-07, "loss": 2.2474, "step": 3746 }, { "epoch": 8.48, "learning_rate": 1.2161158804125316e-07, "loss": 2.141, "step": 3748 }, { "epoch": 8.48, "learning_rate": 1.2091203426123175e-07, "loss": 1.9833, "step": 3750 }, { "epoch": 8.49, "learning_rate": 1.2021436884222946e-07, "loss": 2.4734, "step": 3752 }, { "epoch": 8.49, "learning_rate": 1.1951859328289493e-07, "loss": 2.401, "step": 3754 }, { "epoch": 8.5, "learning_rate": 1.1882470907781727e-07, "loss": 2.3442, "step": 3756 }, { "epoch": 8.5, "learning_rate": 1.1813271771752298e-07, "loss": 2.3096, "step": 3758 }, { "epoch": 8.5, "learning_rate": 1.17442620688473e-07, "loss": 2.3408, "step": 3760 }, { "epoch": 8.51, "learning_rate": 1.1675441947305787e-07, "loss": 2.3648, "step": 3762 }, { "epoch": 8.51, "learning_rate": 1.1606811554959706e-07, "loss": 2.0118, "step": 3764 }, { "epoch": 8.52, "learning_rate": 1.1538371039233341e-07, "loss": 2.5944, "step": 3766 }, { "epoch": 8.52, "learning_rate": 1.1470120547143236e-07, "loss": 2.4937, "step": 3768 }, { "epoch": 8.53, "learning_rate": 1.1402060225297561e-07, "loss": 2.3807, "step": 3770 }, { "epoch": 8.53, "learning_rate": 1.1334190219896155e-07, "loss": 2.5853, "step": 3772 }, { "epoch": 8.54, "learning_rate": 1.1266510676729956e-07, "loss": 2.4372, "step": 3774 }, { "epoch": 8.54, "learning_rate": 1.1199021741180781e-07, "loss": 2.3959, "step": 3776 }, { "epoch": 8.55, "learning_rate": 1.1131723558220996e-07, "loss": 2.3788, "step": 3778 }, { "epoch": 8.55, "learning_rate": 1.1064616272413262e-07, "loss": 2.4389, "step": 3780 }, { "epoch": 8.55, "learning_rate": 1.0997700027910128e-07, "loss": 2.1827, "step": 3782 }, { "epoch": 8.56, "learning_rate": 1.0930974968453777e-07, "loss": 2.3847, "step": 3784 }, { "epoch": 8.56, "learning_rate": 1.0864441237375698e-07, "loss": 2.2319, "step": 3786 }, { "epoch": 8.57, "learning_rate": 1.0798098977596448e-07, "loss": 2.1224, "step": 3788 }, { "epoch": 8.57, "learning_rate": 1.0731948331625195e-07, "loss": 2.3572, "step": 3790 }, { "epoch": 8.58, "learning_rate": 1.0665989441559586e-07, "loss": 2.2529, "step": 3792 }, { "epoch": 8.58, "learning_rate": 1.0600222449085294e-07, "loss": 2.4526, "step": 3794 }, { "epoch": 8.59, "learning_rate": 1.0534647495475857e-07, "loss": 2.1667, "step": 3796 }, { "epoch": 8.59, "learning_rate": 1.0469264721592197e-07, "loss": 2.6476, "step": 3798 }, { "epoch": 8.59, "learning_rate": 1.0404074267882502e-07, "loss": 2.3072, "step": 3800 }, { "epoch": 8.6, "learning_rate": 1.0339076274381787e-07, "loss": 2.5948, "step": 3802 }, { "epoch": 8.6, "learning_rate": 1.0274270880711666e-07, "loss": 2.2217, "step": 3804 }, { "epoch": 8.61, "learning_rate": 1.0209658226080032e-07, "loss": 2.5243, "step": 3806 }, { "epoch": 8.61, "learning_rate": 1.0145238449280724e-07, "loss": 2.3269, "step": 3808 }, { "epoch": 8.62, "learning_rate": 1.0081011688693341e-07, "loss": 2.2734, "step": 3810 }, { "epoch": 8.62, "learning_rate": 1.0016978082282779e-07, "loss": 2.1973, "step": 3812 }, { "epoch": 8.63, "learning_rate": 9.953137767599073e-08, "loss": 2.1697, "step": 3814 }, { "epoch": 8.63, "learning_rate": 9.889490881777018e-08, "loss": 2.2464, "step": 3816 }, { "epoch": 8.64, "learning_rate": 9.826037561535938e-08, "loss": 2.4254, "step": 3818 }, { "epoch": 8.64, "learning_rate": 9.762777943179312e-08, "loss": 2.3955, "step": 3820 }, { "epoch": 8.64, "learning_rate": 9.699712162594609e-08, "loss": 2.1837, "step": 3822 }, { "epoch": 8.65, "learning_rate": 9.63684035525284e-08, "loss": 2.0977, "step": 3824 }, { "epoch": 8.65, "learning_rate": 9.574162656208384e-08, "loss": 2.4142, "step": 3826 }, { "epoch": 8.66, "learning_rate": 9.511679200098632e-08, "loss": 2.2772, "step": 3828 }, { "epoch": 8.66, "learning_rate": 9.449390121143774e-08, "loss": 2.24, "step": 3830 }, { "epoch": 8.67, "learning_rate": 9.387295553146379e-08, "loss": 2.444, "step": 3832 }, { "epoch": 8.67, "learning_rate": 9.325395629491262e-08, "loss": 2.5115, "step": 3834 }, { "epoch": 8.68, "learning_rate": 9.263690483145103e-08, "loss": 2.2708, "step": 3836 }, { "epoch": 8.68, "learning_rate": 9.202180246656155e-08, "loss": 2.465, "step": 3838 }, { "epoch": 8.69, "learning_rate": 9.140865052154012e-08, "loss": 2.9963, "step": 3840 }, { "epoch": 8.69, "learning_rate": 9.079745031349317e-08, "loss": 2.159, "step": 3842 }, { "epoch": 8.69, "learning_rate": 9.018820315533426e-08, "loss": 2.5147, "step": 3844 }, { "epoch": 8.7, "learning_rate": 8.958091035578174e-08, "loss": 2.3305, "step": 3846 }, { "epoch": 8.7, "learning_rate": 8.89755732193559e-08, "loss": 2.3214, "step": 3848 }, { "epoch": 8.71, "learning_rate": 8.837219304637633e-08, "loss": 2.3948, "step": 3850 }, { "epoch": 8.71, "learning_rate": 8.777077113295794e-08, "loss": 2.428, "step": 3852 }, { "epoch": 8.72, "learning_rate": 8.717130877101031e-08, "loss": 2.279, "step": 3854 }, { "epoch": 8.72, "learning_rate": 8.657380724823294e-08, "loss": 2.2065, "step": 3856 }, { "epoch": 8.73, "learning_rate": 8.597826784811402e-08, "loss": 2.5204, "step": 3858 }, { "epoch": 8.73, "learning_rate": 8.538469184992559e-08, "loss": 2.1397, "step": 3860 }, { "epoch": 8.74, "learning_rate": 8.479308052872359e-08, "loss": 2.4221, "step": 3862 }, { "epoch": 8.74, "learning_rate": 8.42034351553429e-08, "loss": 2.1741, "step": 3864 }, { "epoch": 8.74, "learning_rate": 8.361575699639534e-08, "loss": 2.199, "step": 3866 }, { "epoch": 8.75, "learning_rate": 8.30300473142671e-08, "loss": 2.5473, "step": 3868 }, { "epoch": 8.75, "learning_rate": 8.244630736711621e-08, "loss": 2.536, "step": 3870 }, { "epoch": 8.76, "learning_rate": 8.186453840886897e-08, "loss": 2.2939, "step": 3872 }, { "epoch": 8.76, "learning_rate": 8.128474168921828e-08, "loss": 2.2578, "step": 3874 }, { "epoch": 8.77, "learning_rate": 8.070691845361999e-08, "loss": 2.4713, "step": 3876 }, { "epoch": 8.77, "learning_rate": 8.013106994329155e-08, "loss": 2.2558, "step": 3878 }, { "epoch": 8.78, "learning_rate": 7.95571973952075e-08, "loss": 2.4457, "step": 3880 }, { "epoch": 8.78, "learning_rate": 7.898530204209864e-08, "loss": 2.419, "step": 3882 }, { "epoch": 8.78, "learning_rate": 7.841538511244816e-08, "loss": 2.0561, "step": 3884 }, { "epoch": 8.79, "learning_rate": 7.784744783048958e-08, "loss": 2.2682, "step": 3886 }, { "epoch": 8.79, "learning_rate": 7.7281491416204e-08, "loss": 2.4426, "step": 3888 }, { "epoch": 8.8, "learning_rate": 7.671751708531692e-08, "loss": 2.0675, "step": 3890 }, { "epoch": 8.8, "learning_rate": 7.61555260492972e-08, "loss": 2.0942, "step": 3892 }, { "epoch": 8.81, "learning_rate": 7.559551951535237e-08, "loss": 2.2835, "step": 3894 }, { "epoch": 8.81, "learning_rate": 7.503749868642762e-08, "loss": 2.2816, "step": 3896 }, { "epoch": 8.82, "learning_rate": 7.448146476120232e-08, "loss": 2.3509, "step": 3898 }, { "epoch": 8.82, "learning_rate": 7.392741893408838e-08, "loss": 2.3625, "step": 3900 }, { "epoch": 8.83, "learning_rate": 7.337536239522623e-08, "loss": 2.3323, "step": 3902 }, { "epoch": 8.83, "learning_rate": 7.282529633048385e-08, "loss": 2.3537, "step": 3904 }, { "epoch": 8.83, "learning_rate": 7.227722192145325e-08, "loss": 2.3887, "step": 3906 }, { "epoch": 8.84, "learning_rate": 7.173114034544825e-08, "loss": 2.1123, "step": 3908 }, { "epoch": 8.84, "learning_rate": 7.118705277550142e-08, "loss": 2.197, "step": 3910 }, { "epoch": 8.85, "learning_rate": 7.064496038036293e-08, "loss": 2.387, "step": 3912 }, { "epoch": 8.85, "learning_rate": 7.010486432449636e-08, "loss": 2.4442, "step": 3914 }, { "epoch": 8.86, "learning_rate": 6.95667657680773e-08, "loss": 2.4656, "step": 3916 }, { "epoch": 8.86, "learning_rate": 6.903066586699014e-08, "loss": 2.1219, "step": 3918 }, { "epoch": 8.87, "learning_rate": 6.849656577282703e-08, "loss": 2.4213, "step": 3920 }, { "epoch": 8.87, "learning_rate": 6.79644666328828e-08, "loss": 2.1535, "step": 3922 }, { "epoch": 8.88, "learning_rate": 6.743436959015547e-08, "loss": 2.6134, "step": 3924 }, { "epoch": 8.88, "learning_rate": 6.69062757833414e-08, "loss": 2.198, "step": 3926 }, { "epoch": 8.88, "learning_rate": 6.63801863468344e-08, "loss": 2.2652, "step": 3928 }, { "epoch": 8.89, "learning_rate": 6.585610241072226e-08, "loss": 2.3245, "step": 3930 }, { "epoch": 8.89, "learning_rate": 6.533402510078533e-08, "loss": 2.1564, "step": 3932 }, { "epoch": 8.9, "learning_rate": 6.4813955538493e-08, "loss": 2.2783, "step": 3934 }, { "epoch": 8.9, "learning_rate": 6.429589484100217e-08, "loss": 2.4365, "step": 3936 }, { "epoch": 8.91, "learning_rate": 6.377984412115423e-08, "loss": 2.4163, "step": 3938 }, { "epoch": 8.91, "learning_rate": 6.326580448747343e-08, "loss": 2.1423, "step": 3940 }, { "epoch": 8.92, "learning_rate": 6.27537770441633e-08, "loss": 2.2192, "step": 3942 }, { "epoch": 8.92, "learning_rate": 6.224376289110589e-08, "loss": 2.2585, "step": 3944 }, { "epoch": 8.93, "learning_rate": 6.173576312385765e-08, "loss": 2.2334, "step": 3946 }, { "epoch": 8.93, "learning_rate": 6.122977883364877e-08, "loss": 2.2312, "step": 3948 }, { "epoch": 8.93, "learning_rate": 6.072581110737907e-08, "loss": 2.5184, "step": 3950 }, { "epoch": 8.94, "learning_rate": 6.022386102761756e-08, "loss": 2.5329, "step": 3952 }, { "epoch": 8.94, "learning_rate": 5.972392967259865e-08, "loss": 2.418, "step": 3954 }, { "epoch": 8.95, "learning_rate": 5.92260181162203e-08, "loss": 2.2043, "step": 3956 }, { "epoch": 8.95, "learning_rate": 5.873012742804173e-08, "loss": 2.469, "step": 3958 }, { "epoch": 8.96, "learning_rate": 5.823625867328175e-08, "loss": 2.3175, "step": 3960 }, { "epoch": 8.96, "learning_rate": 5.774441291281518e-08, "loss": 2.5495, "step": 3962 }, { "epoch": 8.97, "learning_rate": 5.725459120317156e-08, "loss": 2.2427, "step": 3964 }, { "epoch": 8.97, "learning_rate": 5.676679459653233e-08, "loss": 2.5635, "step": 3966 }, { "epoch": 8.97, "learning_rate": 5.628102414072933e-08, "loss": 2.2622, "step": 3968 }, { "epoch": 8.98, "learning_rate": 5.579728087924162e-08, "loss": 2.3046, "step": 3970 }, { "epoch": 8.98, "learning_rate": 5.531556585119357e-08, "loss": 2.1263, "step": 3972 }, { "epoch": 8.99, "learning_rate": 5.4835880091353314e-08, "loss": 2.1675, "step": 3974 }, { "epoch": 8.99, "learning_rate": 5.4358224630129404e-08, "loss": 2.579, "step": 3976 }, { "epoch": 9.0, "learning_rate": 5.388260049356919e-08, "loss": 2.4221, "step": 3978 }, { "epoch": 9.0, "learning_rate": 5.3409008703356626e-08, "loss": 2.3077, "step": 3980 }, { "epoch": 9.01, "learning_rate": 5.293745027681029e-08, "loss": 2.4017, "step": 3982 }, { "epoch": 9.01, "learning_rate": 5.246792622688023e-08, "loss": 2.1001, "step": 3984 }, { "epoch": 9.02, "learning_rate": 5.2000437562147225e-08, "loss": 2.3728, "step": 3986 }, { "epoch": 9.02, "learning_rate": 5.15349852868191e-08, "loss": 2.3596, "step": 3988 }, { "epoch": 9.02, "learning_rate": 5.1071570400730405e-08, "loss": 2.3265, "step": 3990 }, { "epoch": 9.03, "learning_rate": 5.061019389933774e-08, "loss": 2.3075, "step": 3992 }, { "epoch": 9.03, "learning_rate": 5.015085677372044e-08, "loss": 2.2687, "step": 3994 }, { "epoch": 9.04, "learning_rate": 4.969356001057612e-08, "loss": 2.3342, "step": 3996 }, { "epoch": 9.04, "learning_rate": 4.9238304592220117e-08, "loss": 2.2943, "step": 3998 }, { "epoch": 9.05, "learning_rate": 4.8785091496582385e-08, "loss": 2.1758, "step": 4000 }, { "epoch": 9.05, "learning_rate": 4.833392169720607e-08, "loss": 2.2717, "step": 4002 }, { "epoch": 9.06, "learning_rate": 4.788479616324481e-08, "loss": 2.3099, "step": 4004 }, { "epoch": 9.06, "learning_rate": 4.743771585946144e-08, "loss": 2.584, "step": 4006 }, { "epoch": 9.07, "learning_rate": 4.699268174622473e-08, "loss": 2.16, "step": 4008 }, { "epoch": 9.07, "learning_rate": 4.6549694779509094e-08, "loss": 2.6906, "step": 4010 }, { "epoch": 9.07, "learning_rate": 4.610875591089025e-08, "loss": 2.1513, "step": 4012 }, { "epoch": 9.08, "learning_rate": 4.566986608754553e-08, "loss": 2.2294, "step": 4014 }, { "epoch": 9.08, "learning_rate": 4.5233026252250005e-08, "loss": 2.1508, "step": 4016 }, { "epoch": 9.09, "learning_rate": 4.479823734337551e-08, "loss": 2.2828, "step": 4018 }, { "epoch": 9.09, "learning_rate": 4.4365500294888056e-08, "loss": 2.3614, "step": 4020 }, { "epoch": 9.1, "learning_rate": 4.3934816036346525e-08, "loss": 2.3381, "step": 4022 }, { "epoch": 9.1, "learning_rate": 4.350618549289986e-08, "loss": 1.9958, "step": 4024 }, { "epoch": 9.11, "learning_rate": 4.307960958528534e-08, "loss": 2.2663, "step": 4026 }, { "epoch": 9.11, "learning_rate": 4.265508922982686e-08, "loss": 2.0485, "step": 4028 }, { "epoch": 9.12, "learning_rate": 4.223262533843319e-08, "loss": 2.4741, "step": 4030 }, { "epoch": 9.12, "learning_rate": 4.1812218818594757e-08, "loss": 2.2407, "step": 4032 }, { "epoch": 9.12, "learning_rate": 4.1393870573383394e-08, "loss": 2.2279, "step": 4034 }, { "epoch": 9.13, "learning_rate": 4.097758150144903e-08, "loss": 2.5935, "step": 4036 }, { "epoch": 9.13, "learning_rate": 4.05633524970187e-08, "loss": 2.4199, "step": 4038 }, { "epoch": 9.14, "learning_rate": 4.015118444989374e-08, "loss": 2.3565, "step": 4040 }, { "epoch": 9.14, "learning_rate": 3.974107824544892e-08, "loss": 2.7166, "step": 4042 }, { "epoch": 9.15, "learning_rate": 3.9333034764629793e-08, "loss": 2.3456, "step": 4044 }, { "epoch": 9.15, "learning_rate": 3.892705488395065e-08, "loss": 2.4363, "step": 4046 }, { "epoch": 9.16, "learning_rate": 3.852313947549335e-08, "loss": 2.2772, "step": 4048 }, { "epoch": 9.16, "learning_rate": 3.812128940690496e-08, "loss": 2.4964, "step": 4050 }, { "epoch": 9.16, "learning_rate": 3.7721505541396305e-08, "loss": 2.3199, "step": 4052 }, { "epoch": 9.17, "learning_rate": 3.7323788737739005e-08, "loss": 2.2969, "step": 4054 }, { "epoch": 9.17, "learning_rate": 3.6928139850265436e-08, "loss": 2.1398, "step": 4056 }, { "epoch": 9.18, "learning_rate": 3.6534559728865324e-08, "loss": 2.2636, "step": 4058 }, { "epoch": 9.18, "learning_rate": 3.6143049218984586e-08, "loss": 2.3242, "step": 4060 }, { "epoch": 9.19, "learning_rate": 3.575360916162329e-08, "loss": 2.4013, "step": 4062 }, { "epoch": 9.19, "learning_rate": 3.536624039333447e-08, "loss": 2.0722, "step": 4064 }, { "epoch": 9.2, "learning_rate": 3.4980943746221295e-08, "loss": 2.2253, "step": 4066 }, { "epoch": 9.2, "learning_rate": 3.459772004793615e-08, "loss": 2.5274, "step": 4068 }, { "epoch": 9.21, "learning_rate": 3.421657012167834e-08, "loss": 2.185, "step": 4070 }, { "epoch": 9.21, "learning_rate": 3.383749478619291e-08, "loss": 2.4211, "step": 4072 }, { "epoch": 9.21, "learning_rate": 3.346049485576774e-08, "loss": 2.2866, "step": 4074 }, { "epoch": 9.22, "learning_rate": 3.308557114023347e-08, "loss": 2.4034, "step": 4076 }, { "epoch": 9.22, "learning_rate": 3.271272444495998e-08, "loss": 2.2678, "step": 4078 }, { "epoch": 9.23, "learning_rate": 3.2341955570856506e-08, "loss": 2.1473, "step": 4080 }, { "epoch": 9.23, "learning_rate": 3.197326531436773e-08, "loss": 1.9607, "step": 4082 }, { "epoch": 9.24, "learning_rate": 3.1606654467474236e-08, "loss": 2.3081, "step": 4084 }, { "epoch": 9.24, "learning_rate": 3.124212381768942e-08, "loss": 2.3196, "step": 4086 }, { "epoch": 9.25, "learning_rate": 3.087967414805848e-08, "loss": 2.2704, "step": 4088 }, { "epoch": 9.25, "learning_rate": 3.051930623715604e-08, "loss": 2.2809, "step": 4090 }, { "epoch": 9.26, "learning_rate": 3.016102085908534e-08, "loss": 2.5148, "step": 4092 }, { "epoch": 9.26, "learning_rate": 2.9804818783476184e-08, "loss": 2.3634, "step": 4094 }, { "epoch": 9.26, "learning_rate": 2.945070077548284e-08, "loss": 2.2721, "step": 4096 }, { "epoch": 9.27, "learning_rate": 2.9098667595782945e-08, "loss": 2.5452, "step": 4098 }, { "epoch": 9.27, "learning_rate": 2.8748720000576265e-08, "loss": 2.4443, "step": 4100 }, { "epoch": 9.28, "learning_rate": 2.8400858741581602e-08, "loss": 2.5627, "step": 4102 }, { "epoch": 9.28, "learning_rate": 2.805508456603689e-08, "loss": 2.5705, "step": 4104 }, { "epoch": 9.29, "learning_rate": 2.7711398216696658e-08, "loss": 2.1941, "step": 4106 }, { "epoch": 9.29, "learning_rate": 2.7369800431830236e-08, "loss": 2.3704, "step": 4108 }, { "epoch": 9.3, "learning_rate": 2.7030291945220885e-08, "loss": 2.5346, "step": 4110 }, { "epoch": 9.3, "learning_rate": 2.669287348616378e-08, "loss": 2.0938, "step": 4112 }, { "epoch": 9.31, "learning_rate": 2.6357545779464584e-08, "loss": 2.6161, "step": 4114 }, { "epoch": 9.31, "learning_rate": 2.602430954543755e-08, "loss": 2.261, "step": 4116 }, { "epoch": 9.31, "learning_rate": 2.5693165499904635e-08, "loss": 2.2991, "step": 4118 }, { "epoch": 9.32, "learning_rate": 2.5364114354193277e-08, "loss": 1.9709, "step": 4120 }, { "epoch": 9.32, "learning_rate": 2.5037156815135408e-08, "loss": 2.7381, "step": 4122 }, { "epoch": 9.33, "learning_rate": 2.4712293585065546e-08, "loss": 2.0963, "step": 4124 }, { "epoch": 9.33, "learning_rate": 2.4389525361819487e-08, "loss": 2.1209, "step": 4126 }, { "epoch": 9.34, "learning_rate": 2.4068852838732945e-08, "loss": 2.2181, "step": 4128 }, { "epoch": 9.34, "learning_rate": 2.3750276704639472e-08, "loss": 2.2637, "step": 4130 }, { "epoch": 9.35, "learning_rate": 2.3433797643869658e-08, "loss": 2.1917, "step": 4132 }, { "epoch": 9.35, "learning_rate": 2.3119416336249588e-08, "loss": 2.1542, "step": 4134 }, { "epoch": 9.35, "learning_rate": 2.2807133457098504e-08, "loss": 2.492, "step": 4136 }, { "epoch": 9.36, "learning_rate": 2.2496949677228927e-08, "loss": 2.3315, "step": 4138 }, { "epoch": 9.36, "learning_rate": 2.2188865662943536e-08, "loss": 2.2493, "step": 4140 }, { "epoch": 9.37, "learning_rate": 2.188288207603517e-08, "loss": 2.5447, "step": 4142 }, { "epoch": 9.37, "learning_rate": 2.1578999573784063e-08, "loss": 2.3446, "step": 4144 }, { "epoch": 9.38, "learning_rate": 2.127721880895783e-08, "loss": 2.1998, "step": 4146 }, { "epoch": 9.38, "learning_rate": 2.0977540429808926e-08, "loss": 2.0618, "step": 4148 }, { "epoch": 9.39, "learning_rate": 2.067996508007386e-08, "loss": 2.1654, "step": 4150 }, { "epoch": 9.39, "learning_rate": 2.0384493398971303e-08, "loss": 2.1868, "step": 4152 }, { "epoch": 9.4, "learning_rate": 2.0091126021201775e-08, "loss": 2.3577, "step": 4154 }, { "epoch": 9.4, "learning_rate": 1.9799863576944853e-08, "loss": 2.1735, "step": 4156 }, { "epoch": 9.4, "learning_rate": 1.9510706691858835e-08, "loss": 2.4039, "step": 4158 }, { "epoch": 9.41, "learning_rate": 1.922365598707909e-08, "loss": 2.4402, "step": 4160 }, { "epoch": 9.41, "learning_rate": 1.893871207921671e-08, "loss": 2.2745, "step": 4162 }, { "epoch": 9.42, "learning_rate": 1.8655875580356974e-08, "loss": 2.5174, "step": 4164 }, { "epoch": 9.42, "learning_rate": 1.8375147098058653e-08, "loss": 2.7251, "step": 4166 }, { "epoch": 9.43, "learning_rate": 1.8096527235351934e-08, "loss": 2.0941, "step": 4168 }, { "epoch": 9.43, "learning_rate": 1.7820016590737842e-08, "loss": 2.4482, "step": 4170 }, { "epoch": 9.44, "learning_rate": 1.754561575818625e-08, "loss": 2.3424, "step": 4172 }, { "epoch": 9.44, "learning_rate": 1.7273325327135215e-08, "loss": 2.3049, "step": 4174 }, { "epoch": 9.45, "learning_rate": 1.700314588248952e-08, "loss": 2.5217, "step": 4176 }, { "epoch": 9.45, "learning_rate": 1.673507800461893e-08, "loss": 2.3833, "step": 4178 }, { "epoch": 9.45, "learning_rate": 1.6469122269357817e-08, "loss": 2.323, "step": 4180 }, { "epoch": 9.46, "learning_rate": 1.6205279248003413e-08, "loss": 2.3853, "step": 4182 }, { "epoch": 9.46, "learning_rate": 1.5943549507314468e-08, "loss": 2.3385, "step": 4184 }, { "epoch": 9.47, "learning_rate": 1.568393360951026e-08, "loss": 2.1541, "step": 4186 }, { "epoch": 9.47, "learning_rate": 1.542643211226946e-08, "loss": 2.5234, "step": 4188 }, { "epoch": 9.48, "learning_rate": 1.5171045568728723e-08, "loss": 1.9526, "step": 4190 }, { "epoch": 9.48, "learning_rate": 1.4917774527481442e-08, "loss": 2.4961, "step": 4192 }, { "epoch": 9.49, "learning_rate": 1.4666619532577196e-08, "loss": 2.3186, "step": 4194 }, { "epoch": 9.49, "learning_rate": 1.441758112351954e-08, "loss": 2.2356, "step": 4196 }, { "epoch": 9.5, "learning_rate": 1.4170659835265774e-08, "loss": 2.155, "step": 4198 }, { "epoch": 9.5, "learning_rate": 1.3925856198225283e-08, "loss": 2.2409, "step": 4200 }, { "epoch": 9.5, "learning_rate": 1.3683170738258532e-08, "loss": 2.4088, "step": 4202 }, { "epoch": 9.51, "learning_rate": 1.344260397667607e-08, "loss": 2.0696, "step": 4204 }, { "epoch": 9.51, "learning_rate": 1.32041564302372e-08, "loss": 2.1969, "step": 4206 }, { "epoch": 9.52, "learning_rate": 1.2967828611148868e-08, "loss": 2.1321, "step": 4208 }, { "epoch": 9.52, "learning_rate": 1.2733621027064989e-08, "loss": 2.1224, "step": 4210 }, { "epoch": 9.53, "learning_rate": 1.2501534181084461e-08, "loss": 2.6715, "step": 4212 }, { "epoch": 9.53, "learning_rate": 1.2271568571751157e-08, "loss": 2.4569, "step": 4214 }, { "epoch": 9.54, "learning_rate": 1.204372469305226e-08, "loss": 2.4008, "step": 4216 }, { "epoch": 9.54, "learning_rate": 1.1818003034416935e-08, "loss": 2.3672, "step": 4218 }, { "epoch": 9.54, "learning_rate": 1.1594404080716103e-08, "loss": 2.4229, "step": 4220 }, { "epoch": 9.55, "learning_rate": 1.1372928312260333e-08, "loss": 2.4505, "step": 4222 }, { "epoch": 9.55, "learning_rate": 1.1153576204800285e-08, "loss": 2.4657, "step": 4224 }, { "epoch": 9.56, "learning_rate": 1.0936348229523717e-08, "loss": 2.6675, "step": 4226 }, { "epoch": 9.56, "learning_rate": 1.0721244853056366e-08, "loss": 2.1615, "step": 4228 }, { "epoch": 9.57, "learning_rate": 1.0508266537459843e-08, "loss": 2.3642, "step": 4230 }, { "epoch": 9.57, "learning_rate": 1.0297413740231076e-08, "loss": 2.2433, "step": 4232 }, { "epoch": 9.58, "learning_rate": 1.008868691430087e-08, "loss": 2.5839, "step": 4234 }, { "epoch": 9.58, "learning_rate": 9.882086508033571e-09, "loss": 2.6095, "step": 4236 }, { "epoch": 9.59, "learning_rate": 9.677612965225734e-09, "loss": 2.4396, "step": 4238 }, { "epoch": 9.59, "learning_rate": 9.475266725105014e-09, "loss": 2.5627, "step": 4240 }, { "epoch": 9.59, "learning_rate": 9.275048222329607e-09, "loss": 2.188, "step": 4242 }, { "epoch": 9.6, "learning_rate": 9.076957886987146e-09, "loss": 2.2413, "step": 4244 }, { "epoch": 9.6, "learning_rate": 8.88099614459381e-09, "loss": 2.3452, "step": 4246 }, { "epoch": 9.61, "learning_rate": 8.68716341609299e-09, "loss": 2.3178, "step": 4248 }, { "epoch": 9.61, "learning_rate": 8.49546011785518e-09, "loss": 2.4892, "step": 4250 }, { "epoch": 9.62, "learning_rate": 8.305886661676752e-09, "loss": 2.4523, "step": 4252 }, { "epoch": 9.62, "learning_rate": 8.118443454778302e-09, "loss": 2.4388, "step": 4254 }, { "epoch": 9.63, "learning_rate": 7.933130899805296e-09, "loss": 2.1198, "step": 4256 }, { "epoch": 9.63, "learning_rate": 7.74994939482576e-09, "loss": 2.5573, "step": 4258 }, { "epoch": 9.64, "learning_rate": 7.56889933333038e-09, "loss": 2.2659, "step": 4260 }, { "epoch": 9.64, "learning_rate": 7.389981104231169e-09, "loss": 2.1934, "step": 4262 }, { "epoch": 9.64, "learning_rate": 7.213195091860691e-09, "loss": 2.2468, "step": 4264 }, { "epoch": 9.65, "learning_rate": 7.038541675971399e-09, "loss": 2.3865, "step": 4266 }, { "epoch": 9.65, "learning_rate": 6.866021231734853e-09, "loss": 2.3968, "step": 4268 }, { "epoch": 9.66, "learning_rate": 6.695634129740613e-09, "loss": 2.3712, "step": 4270 }, { "epoch": 9.66, "learning_rate": 6.52738073599568e-09, "loss": 2.2353, "step": 4272 }, { "epoch": 9.67, "learning_rate": 6.361261411923724e-09, "loss": 2.5092, "step": 4274 }, { "epoch": 9.67, "learning_rate": 6.19727651436408e-09, "loss": 2.2469, "step": 4276 }, { "epoch": 9.68, "learning_rate": 6.035426395571419e-09, "loss": 2.1285, "step": 4278 }, { "epoch": 9.68, "learning_rate": 5.875711403214412e-09, "loss": 2.2678, "step": 4280 }, { "epoch": 9.69, "learning_rate": 5.718131880375398e-09, "loss": 2.3838, "step": 4282 }, { "epoch": 9.69, "learning_rate": 5.5626881655495006e-09, "loss": 2.2587, "step": 4284 }, { "epoch": 9.69, "learning_rate": 5.4093805926440635e-09, "loss": 2.3756, "step": 4286 }, { "epoch": 9.7, "learning_rate": 5.258209490977772e-09, "loss": 2.2056, "step": 4288 }, { "epoch": 9.7, "learning_rate": 5.109175185279757e-09, "loss": 2.1925, "step": 4290 }, { "epoch": 9.71, "learning_rate": 4.96227799568949e-09, "loss": 2.3737, "step": 4292 }, { "epoch": 9.71, "learning_rate": 4.8175182377553355e-09, "loss": 2.2732, "step": 4294 }, { "epoch": 9.72, "learning_rate": 4.674896222434555e-09, "loss": 2.326, "step": 4296 }, { "epoch": 9.72, "learning_rate": 4.5344122560920795e-09, "loss": 2.5249, "step": 4298 }, { "epoch": 9.73, "learning_rate": 4.396066640500406e-09, "loss": 2.2555, "step": 4300 }, { "epoch": 9.73, "learning_rate": 4.259859672838484e-09, "loss": 2.5123, "step": 4302 }, { "epoch": 9.73, "learning_rate": 4.125791645691268e-09, "loss": 2.1902, "step": 4304 }, { "epoch": 9.74, "learning_rate": 3.993862847049167e-09, "loss": 2.2976, "step": 4306 }, { "epoch": 9.74, "learning_rate": 3.864073560307046e-09, "loss": 2.2722, "step": 4308 }, { "epoch": 9.75, "learning_rate": 3.736424064264443e-09, "loss": 2.1837, "step": 4310 }, { "epoch": 9.75, "learning_rate": 3.610914633123685e-09, "loss": 2.5105, "step": 4312 }, { "epoch": 9.76, "learning_rate": 3.487545536490888e-09, "loss": 2.0634, "step": 4314 }, { "epoch": 9.76, "learning_rate": 3.366317039373845e-09, "loss": 2.4091, "step": 4316 }, { "epoch": 9.77, "learning_rate": 3.247229402182472e-09, "loss": 2.4849, "step": 4318 }, { "epoch": 9.77, "learning_rate": 3.130282880727808e-09, "loss": 2.7144, "step": 4320 }, { "epoch": 9.78, "learning_rate": 3.0154777262217934e-09, "loss": 2.5959, "step": 4322 }, { "epoch": 9.78, "learning_rate": 2.902814185276159e-09, "loss": 2.1911, "step": 4324 }, { "epoch": 9.78, "learning_rate": 2.7922924999025375e-09, "loss": 2.4163, "step": 4326 }, { "epoch": 9.79, "learning_rate": 2.6839129075116873e-09, "loss": 2.195, "step": 4328 }, { "epoch": 9.79, "learning_rate": 2.577675640912602e-09, "loss": 2.1792, "step": 4330 }, { "epoch": 9.8, "learning_rate": 2.4735809283128463e-09, "loss": 2.3718, "step": 4332 }, { "epoch": 9.8, "learning_rate": 2.3716289933172208e-09, "loss": 2.4066, "step": 4334 }, { "epoch": 9.81, "learning_rate": 2.2718200549277644e-09, "loss": 2.2239, "step": 4336 }, { "epoch": 9.81, "learning_rate": 2.1741543275431983e-09, "loss": 2.3306, "step": 4338 }, { "epoch": 9.82, "learning_rate": 2.0786320209584817e-09, "loss": 2.4882, "step": 4340 }, { "epoch": 9.82, "learning_rate": 1.9852533403640347e-09, "loss": 2.5039, "step": 4342 }, { "epoch": 9.83, "learning_rate": 1.894018486345961e-09, "loss": 2.3346, "step": 4344 }, { "epoch": 9.83, "learning_rate": 1.8049276548848247e-09, "loss": 2.5675, "step": 4346 }, { "epoch": 9.83, "learning_rate": 1.7179810373560977e-09, "loss": 2.2032, "step": 4348 }, { "epoch": 9.84, "learning_rate": 1.6331788205288245e-09, "loss": 1.9619, "step": 4350 }, { "epoch": 9.84, "learning_rate": 1.5505211865660673e-09, "loss": 2.184, "step": 4352 }, { "epoch": 9.85, "learning_rate": 1.470008313023796e-09, "loss": 2.4945, "step": 4354 }, { "epoch": 9.85, "learning_rate": 1.3916403728509995e-09, "loss": 2.3055, "step": 4356 }, { "epoch": 9.86, "learning_rate": 1.3154175343893514e-09, "loss": 2.3081, "step": 4358 }, { "epoch": 9.86, "learning_rate": 1.241339961372212e-09, "loss": 2.671, "step": 4360 }, { "epoch": 9.87, "learning_rate": 1.1694078129250717e-09, "loss": 2.052, "step": 4362 }, { "epoch": 9.87, "learning_rate": 1.0996212435647745e-09, "loss": 2.3833, "step": 4364 }, { "epoch": 9.88, "learning_rate": 1.0319804031990732e-09, "loss": 2.4489, "step": 4366 }, { "epoch": 9.88, "learning_rate": 9.664854371268516e-10, "loss": 2.2591, "step": 4368 }, { "epoch": 9.88, "learning_rate": 9.03136486037126e-10, "loss": 2.4332, "step": 4370 }, { "epoch": 9.89, "learning_rate": 8.419336860092663e-10, "loss": 2.1944, "step": 4372 }, { "epoch": 9.89, "learning_rate": 7.828771685124413e-10, "loss": 2.1891, "step": 4374 }, { "epoch": 9.9, "learning_rate": 7.259670604053969e-10, "loss": 2.4247, "step": 4376 }, { "epoch": 9.9, "learning_rate": 6.712034839363445e-10, "loss": 2.172, "step": 4378 }, { "epoch": 9.91, "learning_rate": 6.185865567422955e-10, "loss": 2.3444, "step": 4380 }, { "epoch": 9.91, "learning_rate": 5.681163918492827e-10, "loss": 2.3733, "step": 4382 }, { "epoch": 9.92, "learning_rate": 5.197930976718056e-10, "loss": 2.1766, "step": 4384 }, { "epoch": 9.92, "learning_rate": 4.736167780127199e-10, "loss": 2.3506, "step": 4386 }, { "epoch": 9.92, "learning_rate": 4.2958753206290275e-10, "loss": 2.2205, "step": 4388 }, { "epoch": 9.93, "learning_rate": 3.877054544011438e-10, "loss": 2.1202, "step": 4390 }, { "epoch": 9.93, "learning_rate": 3.479706349941436e-10, "loss": 2.5056, "step": 4392 }, { "epoch": 9.94, "learning_rate": 3.1038315919584834e-10, "loss": 2.2159, "step": 4394 }, { "epoch": 9.94, "learning_rate": 2.7494310774744955e-10, "loss": 2.1418, "step": 4396 }, { "epoch": 9.95, "learning_rate": 2.4165055677749515e-10, "loss": 2.5672, "step": 4398 }, { "epoch": 9.95, "learning_rate": 2.105055778014453e-10, "loss": 2.844, "step": 4400 }, { "epoch": 9.96, "learning_rate": 1.8150823772156155e-10, "loss": 2.2968, "step": 4402 }, { "epoch": 9.96, "learning_rate": 1.5465859882668463e-10, "loss": 2.2367, "step": 4404 }, { "epoch": 9.97, "learning_rate": 1.299567187923456e-10, "loss": 2.4825, "step": 4406 }, { "epoch": 9.97, "learning_rate": 1.0740265068043264e-10, "loss": 2.3133, "step": 4408 }, { "epoch": 9.97, "learning_rate": 8.699644293908016e-11, "loss": 2.3099, "step": 4410 }, { "epoch": 9.98, "learning_rate": 6.873813940277973e-11, "loss": 2.3648, "step": 4412 }, { "epoch": 9.98, "learning_rate": 5.262777929193607e-11, "loss": 2.439, "step": 4414 }, { "epoch": 9.99, "learning_rate": 3.8665397213089034e-11, "loss": 2.4944, "step": 4416 }, { "epoch": 9.99, "learning_rate": 2.685102315880261e-11, "loss": 2.1788, "step": 4418 }, { "epoch": 10.0, "learning_rate": 1.718468250733185e-11, "loss": 2.394, "step": 4420 }, { "epoch": 10.0, "step": 4420, "total_flos": 1.0214099064245453e+17, "train_loss": 2.5490156790250027, "train_runtime": 26958.9225, "train_samples_per_second": 10.496, "train_steps_per_second": 0.164 } ], "logging_steps": 2, "max_steps": 4420, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 4000, "total_flos": 1.0214099064245453e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }