diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,13290 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 9.997172745264349, + "eval_steps": 500, + "global_step": 4420, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 0.0, + "loss": 6.8903, + "step": 2 + }, + { + "epoch": 0.01, + "learning_rate": 1.5037593984962404e-08, + "loss": 7.056, + "step": 4 + }, + { + "epoch": 0.01, + "learning_rate": 4.5112781954887216e-08, + "loss": 7.1163, + "step": 6 + }, + { + "epoch": 0.02, + "learning_rate": 7.518796992481202e-08, + "loss": 7.0162, + "step": 8 + }, + { + "epoch": 0.02, + "learning_rate": 1.0526315789473683e-07, + "loss": 7.1292, + "step": 10 + }, + { + "epoch": 0.03, + "learning_rate": 1.3533834586466163e-07, + "loss": 7.0274, + "step": 12 + }, + { + "epoch": 0.03, + "learning_rate": 1.6541353383458646e-07, + "loss": 7.0955, + "step": 14 + }, + { + "epoch": 0.04, + "learning_rate": 1.9548872180451126e-07, + "loss": 7.0135, + "step": 16 + }, + { + "epoch": 0.04, + "learning_rate": 2.2556390977443606e-07, + "loss": 7.191, + "step": 18 + }, + { + "epoch": 0.05, + "learning_rate": 2.556390977443609e-07, + "loss": 6.8574, + "step": 20 + }, + { + "epoch": 0.05, + "learning_rate": 2.857142857142857e-07, + "loss": 7.0637, + "step": 22 + }, + { + "epoch": 0.05, + "learning_rate": 3.157894736842105e-07, + "loss": 6.6183, + "step": 24 + }, + { + "epoch": 0.06, + "learning_rate": 3.458646616541353e-07, + "loss": 6.7347, + "step": 26 + }, + { + "epoch": 0.06, + "learning_rate": 3.759398496240601e-07, + "loss": 6.6015, + "step": 28 + }, + { + "epoch": 0.07, + "learning_rate": 4.060150375939849e-07, + "loss": 6.4449, + "step": 30 + }, + { + "epoch": 0.07, + "learning_rate": 4.3609022556390975e-07, + "loss": 6.1891, + "step": 32 + }, + { + "epoch": 0.08, + "learning_rate": 4.6616541353383456e-07, + "loss": 6.3982, + "step": 34 + }, + { + "epoch": 0.08, + "learning_rate": 4.962406015037593e-07, + "loss": 6.1609, + "step": 36 + }, + { + "epoch": 0.09, + "learning_rate": 5.263157894736842e-07, + "loss": 6.0694, + "step": 38 + }, + { + "epoch": 0.09, + "learning_rate": 5.56390977443609e-07, + "loss": 6.3096, + "step": 40 + }, + { + "epoch": 0.09, + "learning_rate": 5.864661654135338e-07, + "loss": 5.8542, + "step": 42 + }, + { + "epoch": 0.1, + "learning_rate": 6.165413533834586e-07, + "loss": 5.871, + "step": 44 + }, + { + "epoch": 0.1, + "learning_rate": 6.466165413533834e-07, + "loss": 5.7045, + "step": 46 + }, + { + "epoch": 0.11, + "learning_rate": 6.766917293233082e-07, + "loss": 5.9885, + "step": 48 + }, + { + "epoch": 0.11, + "learning_rate": 7.06766917293233e-07, + "loss": 5.7822, + "step": 50 + }, + { + "epoch": 0.12, + "learning_rate": 7.368421052631578e-07, + "loss": 5.5907, + "step": 52 + }, + { + "epoch": 0.12, + "learning_rate": 7.669172932330827e-07, + "loss": 5.3985, + "step": 54 + }, + { + "epoch": 0.13, + "learning_rate": 7.969924812030074e-07, + "loss": 5.5724, + "step": 56 + }, + { + "epoch": 0.13, + "learning_rate": 8.270676691729323e-07, + "loss": 5.0061, + "step": 58 + }, + { + "epoch": 0.14, + "learning_rate": 8.57142857142857e-07, + "loss": 4.6891, + "step": 60 + }, + { + "epoch": 0.14, + "learning_rate": 8.872180451127819e-07, + "loss": 5.3034, + "step": 62 + }, + { + "epoch": 0.14, + "learning_rate": 9.172932330827066e-07, + "loss": 4.9166, + "step": 64 + }, + { + "epoch": 0.15, + "learning_rate": 9.473684210526315e-07, + "loss": 4.9849, + "step": 66 + }, + { + "epoch": 0.15, + "learning_rate": 9.774436090225563e-07, + "loss": 4.9612, + "step": 68 + }, + { + "epoch": 0.16, + "learning_rate": 1.0075187969924813e-06, + "loss": 4.8004, + "step": 70 + }, + { + "epoch": 0.16, + "learning_rate": 1.037593984962406e-06, + "loss": 4.7059, + "step": 72 + }, + { + "epoch": 0.17, + "learning_rate": 1.0676691729323308e-06, + "loss": 4.3691, + "step": 74 + }, + { + "epoch": 0.17, + "learning_rate": 1.0977443609022555e-06, + "loss": 4.5099, + "step": 76 + }, + { + "epoch": 0.18, + "learning_rate": 1.1278195488721805e-06, + "loss": 4.739, + "step": 78 + }, + { + "epoch": 0.18, + "learning_rate": 1.1578947368421053e-06, + "loss": 4.6763, + "step": 80 + }, + { + "epoch": 0.19, + "learning_rate": 1.18796992481203e-06, + "loss": 4.3841, + "step": 82 + }, + { + "epoch": 0.19, + "learning_rate": 1.218045112781955e-06, + "loss": 4.0368, + "step": 84 + }, + { + "epoch": 0.19, + "learning_rate": 1.2481203007518797e-06, + "loss": 4.1215, + "step": 86 + }, + { + "epoch": 0.2, + "learning_rate": 1.2781954887218045e-06, + "loss": 4.5899, + "step": 88 + }, + { + "epoch": 0.2, + "learning_rate": 1.3082706766917292e-06, + "loss": 4.3187, + "step": 90 + }, + { + "epoch": 0.21, + "learning_rate": 1.3383458646616542e-06, + "loss": 4.0256, + "step": 92 + }, + { + "epoch": 0.21, + "learning_rate": 1.368421052631579e-06, + "loss": 4.127, + "step": 94 + }, + { + "epoch": 0.22, + "learning_rate": 1.3984962406015037e-06, + "loss": 4.4956, + "step": 96 + }, + { + "epoch": 0.22, + "learning_rate": 1.4285714285714286e-06, + "loss": 3.9016, + "step": 98 + }, + { + "epoch": 0.23, + "learning_rate": 1.4586466165413534e-06, + "loss": 3.762, + "step": 100 + }, + { + "epoch": 0.23, + "learning_rate": 1.4887218045112781e-06, + "loss": 4.0754, + "step": 102 + }, + { + "epoch": 0.24, + "learning_rate": 1.5187969924812029e-06, + "loss": 4.006, + "step": 104 + }, + { + "epoch": 0.24, + "learning_rate": 1.5488721804511278e-06, + "loss": 3.9874, + "step": 106 + }, + { + "epoch": 0.24, + "learning_rate": 1.5789473684210526e-06, + "loss": 3.9949, + "step": 108 + }, + { + "epoch": 0.25, + "learning_rate": 1.6090225563909773e-06, + "loss": 3.7541, + "step": 110 + }, + { + "epoch": 0.25, + "learning_rate": 1.6390977443609023e-06, + "loss": 4.2143, + "step": 112 + }, + { + "epoch": 0.26, + "learning_rate": 1.669172932330827e-06, + "loss": 3.8826, + "step": 114 + }, + { + "epoch": 0.26, + "learning_rate": 1.6992481203007518e-06, + "loss": 4.4338, + "step": 116 + }, + { + "epoch": 0.27, + "learning_rate": 1.7293233082706765e-06, + "loss": 3.6651, + "step": 118 + }, + { + "epoch": 0.27, + "learning_rate": 1.7593984962406015e-06, + "loss": 3.9413, + "step": 120 + }, + { + "epoch": 0.28, + "learning_rate": 1.7894736842105262e-06, + "loss": 4.2412, + "step": 122 + }, + { + "epoch": 0.28, + "learning_rate": 1.819548872180451e-06, + "loss": 4.0753, + "step": 124 + }, + { + "epoch": 0.28, + "learning_rate": 1.849624060150376e-06, + "loss": 3.5147, + "step": 126 + }, + { + "epoch": 0.29, + "learning_rate": 1.8796992481203007e-06, + "loss": 3.5877, + "step": 128 + }, + { + "epoch": 0.29, + "learning_rate": 1.9097744360902255e-06, + "loss": 3.9202, + "step": 130 + }, + { + "epoch": 0.3, + "learning_rate": 1.9398496240601504e-06, + "loss": 4.2452, + "step": 132 + }, + { + "epoch": 0.3, + "learning_rate": 1.969924812030075e-06, + "loss": 4.2966, + "step": 134 + }, + { + "epoch": 0.31, + "learning_rate": 2e-06, + "loss": 4.0258, + "step": 136 + }, + { + "epoch": 0.31, + "learning_rate": 1.9999989259544593e-06, + "loss": 3.2049, + "step": 138 + }, + { + "epoch": 0.32, + "learning_rate": 1.9999957038201444e-06, + "loss": 3.8649, + "step": 140 + }, + { + "epoch": 0.32, + "learning_rate": 1.999990333603977e-06, + "loss": 3.9265, + "step": 142 + }, + { + "epoch": 0.33, + "learning_rate": 1.999982815317493e-06, + "loss": 3.3749, + "step": 144 + }, + { + "epoch": 0.33, + "learning_rate": 1.999973148976841e-06, + "loss": 3.6495, + "step": 146 + }, + { + "epoch": 0.33, + "learning_rate": 1.999961334602787e-06, + "loss": 3.789, + "step": 148 + }, + { + "epoch": 0.34, + "learning_rate": 1.999947372220708e-06, + "loss": 3.4248, + "step": 150 + }, + { + "epoch": 0.34, + "learning_rate": 1.9999312618605972e-06, + "loss": 3.3628, + "step": 152 + }, + { + "epoch": 0.35, + "learning_rate": 1.999913003557061e-06, + "loss": 3.669, + "step": 154 + }, + { + "epoch": 0.35, + "learning_rate": 1.9998925973493196e-06, + "loss": 3.5372, + "step": 156 + }, + { + "epoch": 0.36, + "learning_rate": 1.9998700432812073e-06, + "loss": 3.2437, + "step": 158 + }, + { + "epoch": 0.36, + "learning_rate": 1.999845341401173e-06, + "loss": 3.9666, + "step": 160 + }, + { + "epoch": 0.37, + "learning_rate": 1.999818491762278e-06, + "loss": 3.5522, + "step": 162 + }, + { + "epoch": 0.37, + "learning_rate": 1.9997894944221986e-06, + "loss": 3.765, + "step": 164 + }, + { + "epoch": 0.38, + "learning_rate": 1.9997583494432226e-06, + "loss": 3.489, + "step": 166 + }, + { + "epoch": 0.38, + "learning_rate": 1.999725056892252e-06, + "loss": 3.7294, + "step": 168 + }, + { + "epoch": 0.38, + "learning_rate": 1.999689616840804e-06, + "loss": 3.3408, + "step": 170 + }, + { + "epoch": 0.39, + "learning_rate": 1.9996520293650056e-06, + "loss": 3.5929, + "step": 172 + }, + { + "epoch": 0.39, + "learning_rate": 1.9996122945455987e-06, + "loss": 3.5767, + "step": 174 + }, + { + "epoch": 0.4, + "learning_rate": 1.9995704124679367e-06, + "loss": 3.167, + "step": 176 + }, + { + "epoch": 0.4, + "learning_rate": 1.999526383221987e-06, + "loss": 3.651, + "step": 178 + }, + { + "epoch": 0.41, + "learning_rate": 1.999480206902328e-06, + "loss": 3.3083, + "step": 180 + }, + { + "epoch": 0.41, + "learning_rate": 1.9994318836081506e-06, + "loss": 3.3985, + "step": 182 + }, + { + "epoch": 0.42, + "learning_rate": 1.9993814134432575e-06, + "loss": 3.2612, + "step": 184 + }, + { + "epoch": 0.42, + "learning_rate": 1.9993287965160636e-06, + "loss": 3.7523, + "step": 186 + }, + { + "epoch": 0.43, + "learning_rate": 1.9992740329395945e-06, + "loss": 3.9041, + "step": 188 + }, + { + "epoch": 0.43, + "learning_rate": 1.9992171228314873e-06, + "loss": 3.5194, + "step": 190 + }, + { + "epoch": 0.43, + "learning_rate": 1.9991580663139904e-06, + "loss": 3.3409, + "step": 192 + }, + { + "epoch": 0.44, + "learning_rate": 1.999096863513963e-06, + "loss": 3.3884, + "step": 194 + }, + { + "epoch": 0.44, + "learning_rate": 1.999033514562873e-06, + "loss": 3.6586, + "step": 196 + }, + { + "epoch": 0.45, + "learning_rate": 1.9989680195968008e-06, + "loss": 3.4453, + "step": 198 + }, + { + "epoch": 0.45, + "learning_rate": 1.998900378756435e-06, + "loss": 3.7887, + "step": 200 + }, + { + "epoch": 0.46, + "learning_rate": 1.9988305921870747e-06, + "loss": 3.2259, + "step": 202 + }, + { + "epoch": 0.46, + "learning_rate": 1.998758660038628e-06, + "loss": 3.926, + "step": 204 + }, + { + "epoch": 0.47, + "learning_rate": 1.998684582465611e-06, + "loss": 3.5587, + "step": 206 + }, + { + "epoch": 0.47, + "learning_rate": 1.998608359627149e-06, + "loss": 3.2248, + "step": 208 + }, + { + "epoch": 0.47, + "learning_rate": 1.998529991686976e-06, + "loss": 3.4838, + "step": 210 + }, + { + "epoch": 0.48, + "learning_rate": 1.998449478813434e-06, + "loss": 3.4091, + "step": 212 + }, + { + "epoch": 0.48, + "learning_rate": 1.998366821179471e-06, + "loss": 3.2509, + "step": 214 + }, + { + "epoch": 0.49, + "learning_rate": 1.998282018962644e-06, + "loss": 3.3953, + "step": 216 + }, + { + "epoch": 0.49, + "learning_rate": 1.9981950723451152e-06, + "loss": 3.3451, + "step": 218 + }, + { + "epoch": 0.5, + "learning_rate": 1.998105981513654e-06, + "loss": 3.3634, + "step": 220 + }, + { + "epoch": 0.5, + "learning_rate": 1.998014746659636e-06, + "loss": 3.2317, + "step": 222 + }, + { + "epoch": 0.51, + "learning_rate": 1.9979213679790414e-06, + "loss": 3.1458, + "step": 224 + }, + { + "epoch": 0.51, + "learning_rate": 1.9978258456724567e-06, + "loss": 3.6836, + "step": 226 + }, + { + "epoch": 0.52, + "learning_rate": 1.997728179945072e-06, + "loss": 3.2641, + "step": 228 + }, + { + "epoch": 0.52, + "learning_rate": 1.997628371006683e-06, + "loss": 3.4994, + "step": 230 + }, + { + "epoch": 0.52, + "learning_rate": 1.997526419071687e-06, + "loss": 3.314, + "step": 232 + }, + { + "epoch": 0.53, + "learning_rate": 1.997422324359087e-06, + "loss": 3.4686, + "step": 234 + }, + { + "epoch": 0.53, + "learning_rate": 1.9973160870924883e-06, + "loss": 3.5634, + "step": 236 + }, + { + "epoch": 0.54, + "learning_rate": 1.9972077075000974e-06, + "loss": 3.3327, + "step": 238 + }, + { + "epoch": 0.54, + "learning_rate": 1.9970971858147235e-06, + "loss": 3.5988, + "step": 240 + }, + { + "epoch": 0.55, + "learning_rate": 1.996984522273778e-06, + "loss": 3.6566, + "step": 242 + }, + { + "epoch": 0.55, + "learning_rate": 1.996869717119272e-06, + "loss": 3.5185, + "step": 244 + }, + { + "epoch": 0.56, + "learning_rate": 1.9967527705978177e-06, + "loss": 3.1404, + "step": 246 + }, + { + "epoch": 0.56, + "learning_rate": 1.996633682960626e-06, + "loss": 3.1059, + "step": 248 + }, + { + "epoch": 0.57, + "learning_rate": 1.996512454463509e-06, + "loss": 3.5391, + "step": 250 + }, + { + "epoch": 0.57, + "learning_rate": 1.9963890853668764e-06, + "loss": 3.5021, + "step": 252 + }, + { + "epoch": 0.57, + "learning_rate": 1.9962635759357355e-06, + "loss": 2.8194, + "step": 254 + }, + { + "epoch": 0.58, + "learning_rate": 1.996135926439693e-06, + "loss": 3.0764, + "step": 256 + }, + { + "epoch": 0.58, + "learning_rate": 1.9960061371529506e-06, + "loss": 3.1193, + "step": 258 + }, + { + "epoch": 0.59, + "learning_rate": 1.9958742083543086e-06, + "loss": 3.2474, + "step": 260 + }, + { + "epoch": 0.59, + "learning_rate": 1.9957401403271615e-06, + "loss": 2.932, + "step": 262 + }, + { + "epoch": 0.6, + "learning_rate": 1.995603933359499e-06, + "loss": 2.8882, + "step": 264 + }, + { + "epoch": 0.6, + "learning_rate": 1.995465587743908e-06, + "loss": 3.2823, + "step": 266 + }, + { + "epoch": 0.61, + "learning_rate": 1.9953251037775655e-06, + "loss": 3.1695, + "step": 268 + }, + { + "epoch": 0.61, + "learning_rate": 1.9951824817622444e-06, + "loss": 3.1922, + "step": 270 + }, + { + "epoch": 0.62, + "learning_rate": 1.9950377220043103e-06, + "loss": 3.1263, + "step": 272 + }, + { + "epoch": 0.62, + "learning_rate": 1.9948908248147202e-06, + "loss": 2.8974, + "step": 274 + }, + { + "epoch": 0.62, + "learning_rate": 1.994741790509022e-06, + "loss": 3.3318, + "step": 276 + }, + { + "epoch": 0.63, + "learning_rate": 1.994590619407356e-06, + "loss": 3.1373, + "step": 278 + }, + { + "epoch": 0.63, + "learning_rate": 1.9944373118344505e-06, + "loss": 3.0683, + "step": 280 + }, + { + "epoch": 0.64, + "learning_rate": 1.9942818681196243e-06, + "loss": 3.1446, + "step": 282 + }, + { + "epoch": 0.64, + "learning_rate": 1.9941242885967856e-06, + "loss": 3.1867, + "step": 284 + }, + { + "epoch": 0.65, + "learning_rate": 1.9939645736044283e-06, + "loss": 3.2701, + "step": 286 + }, + { + "epoch": 0.65, + "learning_rate": 1.993802723485636e-06, + "loss": 3.0124, + "step": 288 + }, + { + "epoch": 0.66, + "learning_rate": 1.9936387385880763e-06, + "loss": 3.1993, + "step": 290 + }, + { + "epoch": 0.66, + "learning_rate": 1.993472619264004e-06, + "loss": 2.841, + "step": 292 + }, + { + "epoch": 0.66, + "learning_rate": 1.993304365870259e-06, + "loss": 3.2096, + "step": 294 + }, + { + "epoch": 0.67, + "learning_rate": 1.993133978768265e-06, + "loss": 2.9372, + "step": 296 + }, + { + "epoch": 0.67, + "learning_rate": 1.9929614583240286e-06, + "loss": 3.1033, + "step": 298 + }, + { + "epoch": 0.68, + "learning_rate": 1.9927868049081394e-06, + "loss": 2.9037, + "step": 300 + }, + { + "epoch": 0.68, + "learning_rate": 1.992610018895769e-06, + "loss": 2.9596, + "step": 302 + }, + { + "epoch": 0.69, + "learning_rate": 1.9924311006666695e-06, + "loss": 2.8743, + "step": 304 + }, + { + "epoch": 0.69, + "learning_rate": 1.992250050605174e-06, + "loss": 2.7332, + "step": 306 + }, + { + "epoch": 0.7, + "learning_rate": 1.9920668691001946e-06, + "loss": 3.3899, + "step": 308 + }, + { + "epoch": 0.7, + "learning_rate": 1.9918815565452215e-06, + "loss": 2.8847, + "step": 310 + }, + { + "epoch": 0.71, + "learning_rate": 1.9916941133383232e-06, + "loss": 3.1096, + "step": 312 + }, + { + "epoch": 0.71, + "learning_rate": 1.9915045398821445e-06, + "loss": 2.9963, + "step": 314 + }, + { + "epoch": 0.71, + "learning_rate": 1.991312836583907e-06, + "loss": 3.1046, + "step": 316 + }, + { + "epoch": 0.72, + "learning_rate": 1.991119003855406e-06, + "loss": 3.2598, + "step": 318 + }, + { + "epoch": 0.72, + "learning_rate": 1.9909230421130126e-06, + "loss": 2.7556, + "step": 320 + }, + { + "epoch": 0.73, + "learning_rate": 1.99072495177767e-06, + "loss": 2.9834, + "step": 322 + }, + { + "epoch": 0.73, + "learning_rate": 1.990524733274895e-06, + "loss": 3.0658, + "step": 324 + }, + { + "epoch": 0.74, + "learning_rate": 1.9903223870347743e-06, + "loss": 3.1869, + "step": 326 + }, + { + "epoch": 0.74, + "learning_rate": 1.9901179134919664e-06, + "loss": 2.9748, + "step": 328 + }, + { + "epoch": 0.75, + "learning_rate": 1.989911313085699e-06, + "loss": 2.9126, + "step": 330 + }, + { + "epoch": 0.75, + "learning_rate": 1.989702586259769e-06, + "loss": 3.136, + "step": 332 + }, + { + "epoch": 0.76, + "learning_rate": 1.98949173346254e-06, + "loss": 3.0548, + "step": 334 + }, + { + "epoch": 0.76, + "learning_rate": 1.9892787551469436e-06, + "loss": 3.0525, + "step": 336 + }, + { + "epoch": 0.76, + "learning_rate": 1.9890636517704765e-06, + "loss": 3.0175, + "step": 338 + }, + { + "epoch": 0.77, + "learning_rate": 1.9888464237952e-06, + "loss": 3.0134, + "step": 340 + }, + { + "epoch": 0.77, + "learning_rate": 1.9886270716877395e-06, + "loss": 3.0958, + "step": 342 + }, + { + "epoch": 0.78, + "learning_rate": 1.988405595919284e-06, + "loss": 3.0671, + "step": 344 + }, + { + "epoch": 0.78, + "learning_rate": 1.988181996965583e-06, + "loss": 3.3007, + "step": 346 + }, + { + "epoch": 0.79, + "learning_rate": 1.9879562753069475e-06, + "loss": 3.0881, + "step": 348 + }, + { + "epoch": 0.79, + "learning_rate": 1.987728431428249e-06, + "loss": 2.901, + "step": 350 + }, + { + "epoch": 0.8, + "learning_rate": 1.987498465818915e-06, + "loss": 3.126, + "step": 352 + }, + { + "epoch": 0.8, + "learning_rate": 1.9872663789729353e-06, + "loss": 2.8135, + "step": 354 + }, + { + "epoch": 0.81, + "learning_rate": 1.9870321713888513e-06, + "loss": 2.8591, + "step": 356 + }, + { + "epoch": 0.81, + "learning_rate": 1.9867958435697627e-06, + "loss": 3.0549, + "step": 358 + }, + { + "epoch": 0.81, + "learning_rate": 1.9865573960233237e-06, + "loss": 3.0025, + "step": 360 + }, + { + "epoch": 0.82, + "learning_rate": 1.9863168292617415e-06, + "loss": 2.8908, + "step": 362 + }, + { + "epoch": 0.82, + "learning_rate": 1.9860741438017745e-06, + "loss": 2.753, + "step": 364 + }, + { + "epoch": 0.83, + "learning_rate": 1.985829340164734e-06, + "loss": 3.3022, + "step": 366 + }, + { + "epoch": 0.83, + "learning_rate": 1.9855824188764805e-06, + "loss": 2.6739, + "step": 368 + }, + { + "epoch": 0.84, + "learning_rate": 1.9853333804674227e-06, + "loss": 2.9021, + "step": 370 + }, + { + "epoch": 0.84, + "learning_rate": 1.9850822254725187e-06, + "loss": 2.796, + "step": 372 + }, + { + "epoch": 0.85, + "learning_rate": 1.9848289544312713e-06, + "loss": 2.9392, + "step": 374 + }, + { + "epoch": 0.85, + "learning_rate": 1.9845735678877307e-06, + "loss": 3.1737, + "step": 376 + }, + { + "epoch": 0.85, + "learning_rate": 1.9843160663904896e-06, + "loss": 2.7822, + "step": 378 + }, + { + "epoch": 0.86, + "learning_rate": 1.9840564504926856e-06, + "loss": 3.1038, + "step": 380 + }, + { + "epoch": 0.86, + "learning_rate": 1.9837947207519966e-06, + "loss": 2.881, + "step": 382 + }, + { + "epoch": 0.87, + "learning_rate": 1.983530877730642e-06, + "loss": 2.5971, + "step": 384 + }, + { + "epoch": 0.87, + "learning_rate": 1.983264921995381e-06, + "loss": 2.7485, + "step": 386 + }, + { + "epoch": 0.88, + "learning_rate": 1.9829968541175103e-06, + "loss": 2.6596, + "step": 388 + }, + { + "epoch": 0.88, + "learning_rate": 1.9827266746728644e-06, + "loss": 2.7252, + "step": 390 + }, + { + "epoch": 0.89, + "learning_rate": 1.9824543842418137e-06, + "loss": 2.8186, + "step": 392 + }, + { + "epoch": 0.89, + "learning_rate": 1.982179983409262e-06, + "loss": 2.7891, + "step": 394 + }, + { + "epoch": 0.9, + "learning_rate": 1.981903472764648e-06, + "loss": 2.6887, + "step": 396 + }, + { + "epoch": 0.9, + "learning_rate": 1.981624852901941e-06, + "loss": 2.5659, + "step": 398 + }, + { + "epoch": 0.9, + "learning_rate": 1.981344124419643e-06, + "loss": 2.8682, + "step": 400 + }, + { + "epoch": 0.91, + "learning_rate": 1.9810612879207835e-06, + "loss": 2.423, + "step": 402 + }, + { + "epoch": 0.91, + "learning_rate": 1.980776344012921e-06, + "loss": 2.8891, + "step": 404 + }, + { + "epoch": 0.92, + "learning_rate": 1.9804892933081412e-06, + "loss": 2.84, + "step": 406 + }, + { + "epoch": 0.92, + "learning_rate": 1.980200136423055e-06, + "loss": 2.7778, + "step": 408 + }, + { + "epoch": 0.93, + "learning_rate": 1.979908873978798e-06, + "loss": 3.15, + "step": 410 + }, + { + "epoch": 0.93, + "learning_rate": 1.9796155066010285e-06, + "loss": 2.8711, + "step": 412 + }, + { + "epoch": 0.94, + "learning_rate": 1.9793200349199264e-06, + "loss": 2.9754, + "step": 414 + }, + { + "epoch": 0.94, + "learning_rate": 1.979022459570191e-06, + "loss": 2.6299, + "step": 416 + }, + { + "epoch": 0.95, + "learning_rate": 1.978722781191042e-06, + "loss": 2.8453, + "step": 418 + }, + { + "epoch": 0.95, + "learning_rate": 1.978421000426216e-06, + "loss": 2.7152, + "step": 420 + }, + { + "epoch": 0.95, + "learning_rate": 1.978117117923965e-06, + "loss": 3.0451, + "step": 422 + }, + { + "epoch": 0.96, + "learning_rate": 1.9778111343370563e-06, + "loss": 2.8192, + "step": 424 + }, + { + "epoch": 0.96, + "learning_rate": 1.977503050322771e-06, + "loss": 3.0503, + "step": 426 + }, + { + "epoch": 0.97, + "learning_rate": 1.9771928665429016e-06, + "loss": 2.803, + "step": 428 + }, + { + "epoch": 0.97, + "learning_rate": 1.9768805836637507e-06, + "loss": 3.0309, + "step": 430 + }, + { + "epoch": 0.98, + "learning_rate": 1.97656620235613e-06, + "loss": 3.2594, + "step": 432 + }, + { + "epoch": 0.98, + "learning_rate": 1.9762497232953607e-06, + "loss": 2.7724, + "step": 434 + }, + { + "epoch": 0.99, + "learning_rate": 1.975931147161267e-06, + "loss": 2.9467, + "step": 436 + }, + { + "epoch": 0.99, + "learning_rate": 1.9756104746381803e-06, + "loss": 2.4082, + "step": 438 + }, + { + "epoch": 1.0, + "learning_rate": 1.9752877064149344e-06, + "loss": 2.8044, + "step": 440 + }, + { + "epoch": 1.0, + "learning_rate": 1.9749628431848647e-06, + "loss": 2.7142, + "step": 442 + }, + { + "epoch": 1.0, + "learning_rate": 1.9746358856458065e-06, + "loss": 2.9282, + "step": 444 + }, + { + "epoch": 1.01, + "learning_rate": 1.9743068345000954e-06, + "loss": 2.8878, + "step": 446 + }, + { + "epoch": 1.01, + "learning_rate": 1.9739756904545625e-06, + "loss": 2.6279, + "step": 448 + }, + { + "epoch": 1.02, + "learning_rate": 1.9736424542205353e-06, + "loss": 3.3394, + "step": 450 + }, + { + "epoch": 1.02, + "learning_rate": 1.973307126513836e-06, + "loss": 3.0321, + "step": 452 + }, + { + "epoch": 1.03, + "learning_rate": 1.972969708054779e-06, + "loss": 2.6647, + "step": 454 + }, + { + "epoch": 1.03, + "learning_rate": 1.9726301995681697e-06, + "loss": 2.8719, + "step": 456 + }, + { + "epoch": 1.04, + "learning_rate": 1.9722886017833032e-06, + "loss": 2.6796, + "step": 458 + }, + { + "epoch": 1.04, + "learning_rate": 1.971944915433963e-06, + "loss": 2.7943, + "step": 460 + }, + { + "epoch": 1.04, + "learning_rate": 1.9715991412584184e-06, + "loss": 2.5229, + "step": 462 + }, + { + "epoch": 1.05, + "learning_rate": 1.9712512799994236e-06, + "loss": 2.6882, + "step": 464 + }, + { + "epoch": 1.05, + "learning_rate": 1.970901332404217e-06, + "loss": 3.1826, + "step": 466 + }, + { + "epoch": 1.06, + "learning_rate": 1.9705492992245173e-06, + "loss": 2.8889, + "step": 468 + }, + { + "epoch": 1.06, + "learning_rate": 1.9701951812165236e-06, + "loss": 2.9288, + "step": 470 + }, + { + "epoch": 1.07, + "learning_rate": 1.9698389791409147e-06, + "loss": 3.2185, + "step": 472 + }, + { + "epoch": 1.07, + "learning_rate": 1.969480693762844e-06, + "loss": 2.8005, + "step": 474 + }, + { + "epoch": 1.08, + "learning_rate": 1.9691203258519414e-06, + "loss": 2.5454, + "step": 476 + }, + { + "epoch": 1.08, + "learning_rate": 1.9687578761823105e-06, + "loss": 2.9237, + "step": 478 + }, + { + "epoch": 1.09, + "learning_rate": 1.9683933455325258e-06, + "loss": 2.5245, + "step": 480 + }, + { + "epoch": 1.09, + "learning_rate": 1.9680267346856323e-06, + "loss": 2.8414, + "step": 482 + }, + { + "epoch": 1.09, + "learning_rate": 1.967842649434193e-06, + "loss": 2.732, + "step": 484 + }, + { + "epoch": 1.1, + "learning_rate": 1.9674729197696204e-06, + "loss": 3.1263, + "step": 486 + }, + { + "epoch": 1.1, + "learning_rate": 1.967101111885097e-06, + "loss": 2.9437, + "step": 488 + }, + { + "epoch": 1.11, + "learning_rate": 1.966727226579301e-06, + "loss": 2.3668, + "step": 490 + }, + { + "epoch": 1.11, + "learning_rate": 1.966351264655371e-06, + "loss": 2.9765, + "step": 492 + }, + { + "epoch": 1.12, + "learning_rate": 1.9659732269209086e-06, + "loss": 2.9363, + "step": 494 + }, + { + "epoch": 1.12, + "learning_rate": 1.965593114187972e-06, + "loss": 3.0973, + "step": 496 + }, + { + "epoch": 1.13, + "learning_rate": 1.965210927273079e-06, + "loss": 2.8855, + "step": 498 + }, + { + "epoch": 1.13, + "learning_rate": 1.964826666997202e-06, + "loss": 2.6448, + "step": 500 + }, + { + "epoch": 1.14, + "learning_rate": 1.964440334185766e-06, + "loss": 2.9767, + "step": 502 + }, + { + "epoch": 1.14, + "learning_rate": 1.96405192966865e-06, + "loss": 2.8073, + "step": 504 + }, + { + "epoch": 1.14, + "learning_rate": 1.9636614542801816e-06, + "loss": 2.8848, + "step": 506 + }, + { + "epoch": 1.15, + "learning_rate": 1.9632689088591385e-06, + "loss": 2.9247, + "step": 508 + }, + { + "epoch": 1.15, + "learning_rate": 1.9628742942487427e-06, + "loss": 2.8988, + "step": 510 + }, + { + "epoch": 1.16, + "learning_rate": 1.9624776112966636e-06, + "loss": 2.7298, + "step": 512 + }, + { + "epoch": 1.16, + "learning_rate": 1.9620788608550118e-06, + "loss": 2.3712, + "step": 514 + }, + { + "epoch": 1.17, + "learning_rate": 1.961678043780339e-06, + "loss": 2.802, + "step": 516 + }, + { + "epoch": 1.17, + "learning_rate": 1.9612751609336377e-06, + "loss": 2.6516, + "step": 518 + }, + { + "epoch": 1.18, + "learning_rate": 1.9608702131803363e-06, + "loss": 2.5732, + "step": 520 + }, + { + "epoch": 1.18, + "learning_rate": 1.9604632013902996e-06, + "loss": 2.5408, + "step": 522 + }, + { + "epoch": 1.19, + "learning_rate": 1.9600541264378266e-06, + "loss": 2.9325, + "step": 524 + }, + { + "epoch": 1.19, + "learning_rate": 1.9596429892016466e-06, + "loss": 2.7673, + "step": 526 + }, + { + "epoch": 1.19, + "learning_rate": 1.9592297905649202e-06, + "loss": 3.0604, + "step": 528 + }, + { + "epoch": 1.2, + "learning_rate": 1.9588145314152364e-06, + "loss": 2.6299, + "step": 530 + }, + { + "epoch": 1.2, + "learning_rate": 1.958397212644609e-06, + "loss": 2.8273, + "step": 532 + }, + { + "epoch": 1.21, + "learning_rate": 1.9579778351494762e-06, + "loss": 2.5243, + "step": 534 + }, + { + "epoch": 1.21, + "learning_rate": 1.9575563998306996e-06, + "loss": 2.51, + "step": 536 + }, + { + "epoch": 1.22, + "learning_rate": 1.957132907593561e-06, + "loss": 2.7396, + "step": 538 + }, + { + "epoch": 1.22, + "learning_rate": 1.95670735934776e-06, + "loss": 2.5031, + "step": 540 + }, + { + "epoch": 1.23, + "learning_rate": 1.9562797560074133e-06, + "loss": 2.6223, + "step": 542 + }, + { + "epoch": 1.23, + "learning_rate": 1.955850098491051e-06, + "loss": 2.6593, + "step": 544 + }, + { + "epoch": 1.23, + "learning_rate": 1.9554183877216173e-06, + "loss": 2.7128, + "step": 546 + }, + { + "epoch": 1.24, + "learning_rate": 1.954984624626466e-06, + "loss": 2.6417, + "step": 548 + }, + { + "epoch": 1.24, + "learning_rate": 1.95454881013736e-06, + "loss": 2.3611, + "step": 550 + }, + { + "epoch": 1.25, + "learning_rate": 1.954110945190468e-06, + "loss": 2.5021, + "step": 552 + }, + { + "epoch": 1.25, + "learning_rate": 1.9536710307263644e-06, + "loss": 2.7342, + "step": 554 + }, + { + "epoch": 1.26, + "learning_rate": 1.9532290676900247e-06, + "loss": 2.5659, + "step": 556 + }, + { + "epoch": 1.26, + "learning_rate": 1.9527850570308266e-06, + "loss": 2.546, + "step": 558 + }, + { + "epoch": 1.27, + "learning_rate": 1.9523389997025453e-06, + "loss": 2.7346, + "step": 560 + }, + { + "epoch": 1.27, + "learning_rate": 1.951890896663352e-06, + "loss": 2.8895, + "step": 562 + }, + { + "epoch": 1.28, + "learning_rate": 1.9514407488758135e-06, + "loss": 2.5559, + "step": 564 + }, + { + "epoch": 1.28, + "learning_rate": 1.950988557306888e-06, + "loss": 2.5673, + "step": 566 + }, + { + "epoch": 1.28, + "learning_rate": 1.9505343229279237e-06, + "loss": 2.4996, + "step": 568 + }, + { + "epoch": 1.29, + "learning_rate": 1.950078046714658e-06, + "loss": 3.0601, + "step": 570 + }, + { + "epoch": 1.29, + "learning_rate": 1.9496197296472143e-06, + "loss": 2.5457, + "step": 572 + }, + { + "epoch": 1.3, + "learning_rate": 1.949159372710098e-06, + "loss": 2.5823, + "step": 574 + }, + { + "epoch": 1.3, + "learning_rate": 1.9486969768921986e-06, + "loss": 2.6923, + "step": 576 + }, + { + "epoch": 1.31, + "learning_rate": 1.9482325431867846e-06, + "loss": 2.5386, + "step": 578 + }, + { + "epoch": 1.31, + "learning_rate": 1.9477660725915013e-06, + "loss": 2.483, + "step": 580 + }, + { + "epoch": 1.32, + "learning_rate": 1.9472975661083705e-06, + "loss": 2.4662, + "step": 582 + }, + { + "epoch": 1.32, + "learning_rate": 1.9468270247437868e-06, + "loss": 2.5708, + "step": 584 + }, + { + "epoch": 1.33, + "learning_rate": 1.9463544495085153e-06, + "loss": 2.5796, + "step": 586 + }, + { + "epoch": 1.33, + "learning_rate": 1.9458798414176913e-06, + "loss": 2.5566, + "step": 588 + }, + { + "epoch": 1.33, + "learning_rate": 1.945403201490816e-06, + "loss": 2.5595, + "step": 590 + }, + { + "epoch": 1.34, + "learning_rate": 1.9449245307517553e-06, + "loss": 2.4607, + "step": 592 + }, + { + "epoch": 1.34, + "learning_rate": 1.9444438302287374e-06, + "loss": 2.2646, + "step": 594 + }, + { + "epoch": 1.35, + "learning_rate": 1.9439611009543515e-06, + "loss": 2.5673, + "step": 596 + }, + { + "epoch": 1.35, + "learning_rate": 1.943476343965543e-06, + "loss": 2.371, + "step": 598 + }, + { + "epoch": 1.36, + "learning_rate": 1.9429895603036153e-06, + "loss": 2.5064, + "step": 600 + }, + { + "epoch": 1.36, + "learning_rate": 1.9425007510142224e-06, + "loss": 2.7005, + "step": 602 + }, + { + "epoch": 1.37, + "learning_rate": 1.9420099171473732e-06, + "loss": 2.5411, + "step": 604 + }, + { + "epoch": 1.37, + "learning_rate": 1.9415170597574216e-06, + "loss": 2.5514, + "step": 606 + }, + { + "epoch": 1.38, + "learning_rate": 1.9410221799030716e-06, + "loss": 2.5828, + "step": 608 + }, + { + "epoch": 1.38, + "learning_rate": 1.9405252786473694e-06, + "loss": 2.3784, + "step": 610 + }, + { + "epoch": 1.38, + "learning_rate": 1.940026357057705e-06, + "loss": 2.7243, + "step": 612 + }, + { + "epoch": 1.39, + "learning_rate": 1.9395254162058062e-06, + "loss": 2.5178, + "step": 614 + }, + { + "epoch": 1.39, + "learning_rate": 1.9390224571677405e-06, + "loss": 2.5705, + "step": 616 + }, + { + "epoch": 1.4, + "learning_rate": 1.9385174810239096e-06, + "loss": 2.9351, + "step": 618 + }, + { + "epoch": 1.4, + "learning_rate": 1.9380104888590475e-06, + "loss": 2.5937, + "step": 620 + }, + { + "epoch": 1.41, + "learning_rate": 1.9375014817622207e-06, + "loss": 2.7249, + "step": 622 + }, + { + "epoch": 1.41, + "learning_rate": 1.936990460826822e-06, + "loss": 2.7076, + "step": 624 + }, + { + "epoch": 1.42, + "learning_rate": 1.936477427150571e-06, + "loss": 2.7452, + "step": 626 + }, + { + "epoch": 1.42, + "learning_rate": 1.935962381835511e-06, + "loss": 2.8691, + "step": 628 + }, + { + "epoch": 1.42, + "learning_rate": 1.9354453259880065e-06, + "loss": 2.5158, + "step": 630 + }, + { + "epoch": 1.43, + "learning_rate": 1.93492626071874e-06, + "loss": 2.89, + "step": 632 + }, + { + "epoch": 1.43, + "learning_rate": 1.934405187142711e-06, + "loss": 2.2936, + "step": 634 + }, + { + "epoch": 1.44, + "learning_rate": 1.933882106379233e-06, + "loss": 2.8166, + "step": 636 + }, + { + "epoch": 1.44, + "learning_rate": 1.9333570195519318e-06, + "loss": 2.4871, + "step": 638 + }, + { + "epoch": 1.45, + "learning_rate": 1.932829927788741e-06, + "loss": 2.6609, + "step": 640 + }, + { + "epoch": 1.45, + "learning_rate": 1.9323008322219025e-06, + "loss": 2.5998, + "step": 642 + }, + { + "epoch": 1.46, + "learning_rate": 1.9317697339879607e-06, + "loss": 2.4645, + "step": 644 + }, + { + "epoch": 1.46, + "learning_rate": 1.9312366342277637e-06, + "loss": 2.7153, + "step": 646 + }, + { + "epoch": 1.47, + "learning_rate": 1.9307015340864578e-06, + "loss": 2.7173, + "step": 648 + }, + { + "epoch": 1.47, + "learning_rate": 1.9301644347134873e-06, + "loss": 2.4206, + "step": 650 + }, + { + "epoch": 1.47, + "learning_rate": 1.929625337262591e-06, + "loss": 2.6368, + "step": 652 + }, + { + "epoch": 1.48, + "learning_rate": 1.9290842428917983e-06, + "loss": 2.6202, + "step": 654 + }, + { + "epoch": 1.48, + "learning_rate": 1.92854115276343e-06, + "loss": 2.729, + "step": 656 + }, + { + "epoch": 1.49, + "learning_rate": 1.9279960680440924e-06, + "loss": 2.3452, + "step": 658 + }, + { + "epoch": 1.49, + "learning_rate": 1.927448989904678e-06, + "loss": 2.7506, + "step": 660 + }, + { + "epoch": 1.5, + "learning_rate": 1.9268999195203594e-06, + "loss": 2.5662, + "step": 662 + }, + { + "epoch": 1.5, + "learning_rate": 1.926348858070591e-06, + "loss": 2.4619, + "step": 664 + }, + { + "epoch": 1.51, + "learning_rate": 1.9257958067391024e-06, + "loss": 2.5436, + "step": 666 + }, + { + "epoch": 1.51, + "learning_rate": 1.925240766713898e-06, + "loss": 2.1889, + "step": 668 + }, + { + "epoch": 1.52, + "learning_rate": 1.924683739187255e-06, + "loss": 2.8072, + "step": 670 + }, + { + "epoch": 1.52, + "learning_rate": 1.9241247253557184e-06, + "loss": 2.6656, + "step": 672 + }, + { + "epoch": 1.52, + "learning_rate": 1.9235637264201013e-06, + "loss": 2.6298, + "step": 674 + }, + { + "epoch": 1.53, + "learning_rate": 1.923000743585481e-06, + "loss": 2.5295, + "step": 676 + }, + { + "epoch": 1.53, + "learning_rate": 1.922435778061195e-06, + "loss": 2.5569, + "step": 678 + }, + { + "epoch": 1.54, + "learning_rate": 1.921868831060841e-06, + "loss": 2.4206, + "step": 680 + }, + { + "epoch": 1.54, + "learning_rate": 1.921299903802273e-06, + "loss": 2.3958, + "step": 682 + }, + { + "epoch": 1.55, + "learning_rate": 1.9207289975075986e-06, + "loss": 2.4561, + "step": 684 + }, + { + "epoch": 1.55, + "learning_rate": 1.920156113403176e-06, + "loss": 2.7795, + "step": 686 + }, + { + "epoch": 1.56, + "learning_rate": 1.9195812527196133e-06, + "loss": 2.9221, + "step": 688 + }, + { + "epoch": 1.56, + "learning_rate": 1.919004416691763e-06, + "loss": 2.7184, + "step": 690 + }, + { + "epoch": 1.57, + "learning_rate": 1.918425606558721e-06, + "loss": 2.7425, + "step": 692 + }, + { + "epoch": 1.57, + "learning_rate": 1.9178448235638255e-06, + "loss": 2.7597, + "step": 694 + }, + { + "epoch": 1.57, + "learning_rate": 1.91726206895465e-06, + "loss": 2.512, + "step": 696 + }, + { + "epoch": 1.58, + "learning_rate": 1.916677343983005e-06, + "loss": 2.4675, + "step": 698 + }, + { + "epoch": 1.58, + "learning_rate": 1.916090649904933e-06, + "loss": 2.6547, + "step": 700 + }, + { + "epoch": 1.59, + "learning_rate": 1.9155019879807064e-06, + "loss": 2.7396, + "step": 702 + }, + { + "epoch": 1.59, + "learning_rate": 1.9149113594748245e-06, + "loss": 2.8, + "step": 704 + }, + { + "epoch": 1.6, + "learning_rate": 1.914318765656011e-06, + "loss": 2.4821, + "step": 706 + }, + { + "epoch": 1.6, + "learning_rate": 1.913724207797212e-06, + "loss": 2.5723, + "step": 708 + }, + { + "epoch": 1.61, + "learning_rate": 1.9131276871755913e-06, + "loss": 2.7257, + "step": 710 + }, + { + "epoch": 1.61, + "learning_rate": 1.9125292050725296e-06, + "loss": 2.6002, + "step": 712 + }, + { + "epoch": 1.61, + "learning_rate": 1.9119287627736212e-06, + "loss": 2.3223, + "step": 714 + }, + { + "epoch": 1.62, + "learning_rate": 1.9113263615686704e-06, + "loss": 2.6472, + "step": 716 + }, + { + "epoch": 1.62, + "learning_rate": 1.9107220027516902e-06, + "loss": 2.4824, + "step": 718 + }, + { + "epoch": 1.63, + "learning_rate": 1.9101156876208984e-06, + "loss": 2.9242, + "step": 720 + }, + { + "epoch": 1.63, + "learning_rate": 1.9095074174787157e-06, + "loss": 2.6182, + "step": 722 + }, + { + "epoch": 1.64, + "learning_rate": 1.9088971936317603e-06, + "loss": 2.6885, + "step": 724 + }, + { + "epoch": 1.64, + "learning_rate": 1.90828501739085e-06, + "loss": 2.382, + "step": 726 + }, + { + "epoch": 1.65, + "learning_rate": 1.9076708900709945e-06, + "loss": 2.6815, + "step": 728 + }, + { + "epoch": 1.65, + "learning_rate": 1.907054812991395e-06, + "loss": 2.739, + "step": 730 + }, + { + "epoch": 1.66, + "learning_rate": 1.906436787475442e-06, + "loss": 2.8119, + "step": 732 + }, + { + "epoch": 1.66, + "learning_rate": 1.9058168148507097e-06, + "loss": 2.492, + "step": 734 + }, + { + "epoch": 1.66, + "learning_rate": 1.9051948964489562e-06, + "loss": 2.9267, + "step": 736 + }, + { + "epoch": 1.67, + "learning_rate": 1.9045710336061188e-06, + "loss": 2.4951, + "step": 738 + }, + { + "epoch": 1.67, + "learning_rate": 1.9039452276623115e-06, + "loss": 2.6114, + "step": 740 + }, + { + "epoch": 1.68, + "learning_rate": 1.903317479961823e-06, + "loss": 2.7481, + "step": 742 + }, + { + "epoch": 1.68, + "learning_rate": 1.9026877918531122e-06, + "loss": 2.5025, + "step": 744 + }, + { + "epoch": 1.69, + "learning_rate": 1.9020561646888065e-06, + "loss": 2.5243, + "step": 746 + }, + { + "epoch": 1.69, + "learning_rate": 1.9014225998256986e-06, + "loss": 2.7307, + "step": 748 + }, + { + "epoch": 1.7, + "learning_rate": 1.9007870986247436e-06, + "loss": 2.4616, + "step": 750 + }, + { + "epoch": 1.7, + "learning_rate": 1.900149662451056e-06, + "loss": 2.3835, + "step": 752 + }, + { + "epoch": 1.71, + "learning_rate": 1.8995102926739064e-06, + "loss": 2.4583, + "step": 754 + }, + { + "epoch": 1.71, + "learning_rate": 1.8988689906667196e-06, + "loss": 2.4495, + "step": 756 + }, + { + "epoch": 1.71, + "learning_rate": 1.8982257578070708e-06, + "loss": 3.1206, + "step": 758 + }, + { + "epoch": 1.72, + "learning_rate": 1.897580595476683e-06, + "loss": 2.7375, + "step": 760 + }, + { + "epoch": 1.72, + "learning_rate": 1.8969335050614231e-06, + "loss": 2.6551, + "step": 762 + }, + { + "epoch": 1.73, + "learning_rate": 1.8962844879513003e-06, + "loss": 2.3353, + "step": 764 + }, + { + "epoch": 1.73, + "learning_rate": 1.8956335455404626e-06, + "loss": 2.7879, + "step": 766 + }, + { + "epoch": 1.74, + "learning_rate": 1.894980679227194e-06, + "loss": 2.5966, + "step": 768 + }, + { + "epoch": 1.74, + "learning_rate": 1.89432589041391e-06, + "loss": 2.5804, + "step": 770 + }, + { + "epoch": 1.75, + "learning_rate": 1.8936691805071571e-06, + "loss": 2.7343, + "step": 772 + }, + { + "epoch": 1.75, + "learning_rate": 1.8930105509176082e-06, + "loss": 2.5984, + "step": 774 + }, + { + "epoch": 1.76, + "learning_rate": 1.8923500030600592e-06, + "loss": 2.701, + "step": 776 + }, + { + "epoch": 1.76, + "learning_rate": 1.8916875383534274e-06, + "loss": 2.4911, + "step": 778 + }, + { + "epoch": 1.76, + "learning_rate": 1.8910231582207473e-06, + "loss": 2.3311, + "step": 780 + }, + { + "epoch": 1.77, + "learning_rate": 1.8903568640891673e-06, + "loss": 2.6318, + "step": 782 + }, + { + "epoch": 1.77, + "learning_rate": 1.8896886573899487e-06, + "loss": 2.2295, + "step": 784 + }, + { + "epoch": 1.78, + "learning_rate": 1.88901853955846e-06, + "loss": 2.7293, + "step": 786 + }, + { + "epoch": 1.78, + "learning_rate": 1.8883465120341756e-06, + "loss": 2.5001, + "step": 788 + }, + { + "epoch": 1.79, + "learning_rate": 1.8876725762606714e-06, + "loss": 2.2673, + "step": 790 + }, + { + "epoch": 1.79, + "learning_rate": 1.886996733685623e-06, + "loss": 2.9348, + "step": 792 + }, + { + "epoch": 1.8, + "learning_rate": 1.886318985760802e-06, + "loss": 2.6025, + "step": 794 + }, + { + "epoch": 1.8, + "learning_rate": 1.8856393339420724e-06, + "loss": 2.2533, + "step": 796 + }, + { + "epoch": 1.8, + "learning_rate": 1.884957779689388e-06, + "loss": 2.2767, + "step": 798 + }, + { + "epoch": 1.81, + "learning_rate": 1.8842743244667903e-06, + "loss": 2.6349, + "step": 800 + }, + { + "epoch": 1.81, + "learning_rate": 1.8835889697424025e-06, + "loss": 2.5554, + "step": 802 + }, + { + "epoch": 1.82, + "learning_rate": 1.8829017169884293e-06, + "loss": 2.4651, + "step": 804 + }, + { + "epoch": 1.82, + "learning_rate": 1.8822125676811523e-06, + "loss": 2.5952, + "step": 806 + }, + { + "epoch": 1.83, + "learning_rate": 1.881521523300927e-06, + "loss": 2.2587, + "step": 808 + }, + { + "epoch": 1.83, + "learning_rate": 1.8808285853321793e-06, + "loss": 2.3114, + "step": 810 + }, + { + "epoch": 1.84, + "learning_rate": 1.880133755263403e-06, + "loss": 2.7006, + "step": 812 + }, + { + "epoch": 1.84, + "learning_rate": 1.8794370345871574e-06, + "loss": 2.4779, + "step": 814 + }, + { + "epoch": 1.85, + "learning_rate": 1.878738424800061e-06, + "loss": 2.304, + "step": 816 + }, + { + "epoch": 1.85, + "learning_rate": 1.8780379274027914e-06, + "loss": 2.5801, + "step": 818 + }, + { + "epoch": 1.85, + "learning_rate": 1.8773355439000808e-06, + "loss": 2.5185, + "step": 820 + }, + { + "epoch": 1.86, + "learning_rate": 1.876631275800713e-06, + "loss": 2.5946, + "step": 822 + }, + { + "epoch": 1.86, + "learning_rate": 1.8759251246175202e-06, + "loss": 2.7588, + "step": 824 + }, + { + "epoch": 1.87, + "learning_rate": 1.8752170918673794e-06, + "loss": 2.7387, + "step": 826 + }, + { + "epoch": 1.87, + "learning_rate": 1.8745071790712097e-06, + "loss": 2.331, + "step": 828 + }, + { + "epoch": 1.88, + "learning_rate": 1.8737953877539676e-06, + "loss": 2.258, + "step": 830 + }, + { + "epoch": 1.88, + "learning_rate": 1.8730817194446465e-06, + "loss": 2.9485, + "step": 832 + }, + { + "epoch": 1.89, + "learning_rate": 1.8723661756762703e-06, + "loss": 2.3587, + "step": 834 + }, + { + "epoch": 1.89, + "learning_rate": 1.871648757985893e-06, + "loss": 2.6883, + "step": 836 + }, + { + "epoch": 1.9, + "learning_rate": 1.870929467914592e-06, + "loss": 2.8789, + "step": 838 + }, + { + "epoch": 1.9, + "learning_rate": 1.8702083070074692e-06, + "loss": 2.6571, + "step": 840 + }, + { + "epoch": 1.9, + "learning_rate": 1.8694852768136431e-06, + "loss": 2.6182, + "step": 842 + }, + { + "epoch": 1.91, + "learning_rate": 1.8687603788862485e-06, + "loss": 2.3781, + "step": 844 + }, + { + "epoch": 1.91, + "learning_rate": 1.868033614782432e-06, + "loss": 2.6386, + "step": 846 + }, + { + "epoch": 1.92, + "learning_rate": 1.8673049860633496e-06, + "loss": 2.7053, + "step": 848 + }, + { + "epoch": 1.92, + "learning_rate": 1.866574494294162e-06, + "loss": 2.7169, + "step": 850 + }, + { + "epoch": 1.93, + "learning_rate": 1.8658421410440316e-06, + "loss": 2.5135, + "step": 852 + }, + { + "epoch": 1.93, + "learning_rate": 1.8651079278861203e-06, + "loss": 2.5498, + "step": 854 + }, + { + "epoch": 1.94, + "learning_rate": 1.864371856397585e-06, + "loss": 2.6104, + "step": 856 + }, + { + "epoch": 1.94, + "learning_rate": 1.8636339281595739e-06, + "loss": 2.5974, + "step": 858 + }, + { + "epoch": 1.95, + "learning_rate": 1.862894144757224e-06, + "loss": 2.5574, + "step": 860 + }, + { + "epoch": 1.95, + "learning_rate": 1.8621525077796582e-06, + "loss": 2.4691, + "step": 862 + }, + { + "epoch": 1.95, + "learning_rate": 1.8614090188199793e-06, + "loss": 2.7892, + "step": 864 + }, + { + "epoch": 1.96, + "learning_rate": 1.8606636794752699e-06, + "loss": 2.4791, + "step": 866 + }, + { + "epoch": 1.96, + "learning_rate": 1.8599164913465866e-06, + "loss": 2.558, + "step": 868 + }, + { + "epoch": 1.97, + "learning_rate": 1.8591674560389576e-06, + "loss": 2.7124, + "step": 870 + }, + { + "epoch": 1.97, + "learning_rate": 1.858416575161379e-06, + "loss": 2.4838, + "step": 872 + }, + { + "epoch": 1.98, + "learning_rate": 1.8576638503268111e-06, + "loss": 2.6113, + "step": 874 + }, + { + "epoch": 1.98, + "learning_rate": 1.8569092831521757e-06, + "loss": 2.7273, + "step": 876 + }, + { + "epoch": 1.99, + "learning_rate": 1.8561528752583518e-06, + "loss": 2.6104, + "step": 878 + }, + { + "epoch": 1.99, + "learning_rate": 1.8553946282701722e-06, + "loss": 2.6673, + "step": 880 + }, + { + "epoch": 1.99, + "learning_rate": 1.8546345438164206e-06, + "loss": 2.4585, + "step": 882 + }, + { + "epoch": 2.0, + "learning_rate": 1.8538726235298276e-06, + "loss": 2.6186, + "step": 884 + }, + { + "epoch": 2.0, + "learning_rate": 1.8531088690470677e-06, + "loss": 2.627, + "step": 886 + }, + { + "epoch": 2.01, + "learning_rate": 1.8523432820087548e-06, + "loss": 2.4928, + "step": 888 + }, + { + "epoch": 2.01, + "learning_rate": 1.8515758640594394e-06, + "loss": 2.538, + "step": 890 + }, + { + "epoch": 2.02, + "learning_rate": 1.8508066168476057e-06, + "loss": 2.63, + "step": 892 + }, + { + "epoch": 2.02, + "learning_rate": 1.8500355420256662e-06, + "loss": 2.3831, + "step": 894 + }, + { + "epoch": 2.03, + "learning_rate": 1.8492626412499602e-06, + "loss": 2.5052, + "step": 896 + }, + { + "epoch": 2.03, + "learning_rate": 1.848487916180749e-06, + "loss": 2.3827, + "step": 898 + }, + { + "epoch": 2.04, + "learning_rate": 1.847711368482212e-06, + "loss": 2.4427, + "step": 900 + }, + { + "epoch": 2.04, + "learning_rate": 1.8469329998224455e-06, + "loss": 2.4639, + "step": 902 + }, + { + "epoch": 2.04, + "learning_rate": 1.8461528118734551e-06, + "loss": 2.6087, + "step": 904 + }, + { + "epoch": 2.05, + "learning_rate": 1.8453708063111564e-06, + "loss": 2.3654, + "step": 906 + }, + { + "epoch": 2.05, + "learning_rate": 1.8445869848153684e-06, + "loss": 2.6367, + "step": 908 + }, + { + "epoch": 2.06, + "learning_rate": 1.8438013490698105e-06, + "loss": 2.4576, + "step": 910 + }, + { + "epoch": 2.06, + "learning_rate": 1.8430139007621007e-06, + "loss": 2.8134, + "step": 912 + }, + { + "epoch": 2.07, + "learning_rate": 1.842224641583749e-06, + "loss": 2.623, + "step": 914 + }, + { + "epoch": 2.07, + "learning_rate": 1.8414335732301564e-06, + "loss": 2.3569, + "step": 916 + }, + { + "epoch": 2.08, + "learning_rate": 1.8406406974006097e-06, + "loss": 2.3732, + "step": 918 + }, + { + "epoch": 2.08, + "learning_rate": 1.8398460157982783e-06, + "loss": 2.3951, + "step": 920 + }, + { + "epoch": 2.09, + "learning_rate": 1.839049530130211e-06, + "loss": 2.8091, + "step": 922 + }, + { + "epoch": 2.09, + "learning_rate": 1.838251242107331e-06, + "loss": 2.6456, + "step": 924 + }, + { + "epoch": 2.09, + "learning_rate": 1.8374511534444337e-06, + "loss": 2.3516, + "step": 926 + }, + { + "epoch": 2.1, + "learning_rate": 1.8366492658601831e-06, + "loss": 2.6604, + "step": 928 + }, + { + "epoch": 2.1, + "learning_rate": 1.8358455810771062e-06, + "loss": 2.9681, + "step": 930 + }, + { + "epoch": 2.11, + "learning_rate": 1.8350401008215912e-06, + "loss": 2.3861, + "step": 932 + }, + { + "epoch": 2.11, + "learning_rate": 1.8342328268238831e-06, + "loss": 2.6136, + "step": 934 + }, + { + "epoch": 2.12, + "learning_rate": 1.83342376081808e-06, + "loss": 2.2425, + "step": 936 + }, + { + "epoch": 2.12, + "learning_rate": 1.8326129045421295e-06, + "loss": 2.3558, + "step": 938 + }, + { + "epoch": 2.13, + "learning_rate": 1.8318002597378243e-06, + "loss": 2.5504, + "step": 940 + }, + { + "epoch": 2.13, + "learning_rate": 1.8309858281508e-06, + "loss": 2.8984, + "step": 942 + }, + { + "epoch": 2.14, + "learning_rate": 1.8301696115305294e-06, + "loss": 2.607, + "step": 944 + }, + { + "epoch": 2.14, + "learning_rate": 1.8293516116303205e-06, + "loss": 2.6641, + "step": 946 + }, + { + "epoch": 2.14, + "learning_rate": 1.8285318302073113e-06, + "loss": 2.3655, + "step": 948 + }, + { + "epoch": 2.15, + "learning_rate": 1.827710269022467e-06, + "loss": 2.649, + "step": 950 + }, + { + "epoch": 2.15, + "learning_rate": 1.8268869298405762e-06, + "loss": 2.2951, + "step": 952 + }, + { + "epoch": 2.16, + "learning_rate": 1.8260618144302459e-06, + "loss": 2.8344, + "step": 954 + }, + { + "epoch": 2.16, + "learning_rate": 1.8252349245638997e-06, + "loss": 2.3341, + "step": 956 + }, + { + "epoch": 2.17, + "learning_rate": 1.8244062620177721e-06, + "loss": 2.3695, + "step": 958 + }, + { + "epoch": 2.17, + "learning_rate": 1.8235758285719052e-06, + "loss": 2.8024, + "step": 960 + }, + { + "epoch": 2.18, + "learning_rate": 1.8227436260101468e-06, + "loss": 2.5526, + "step": 962 + }, + { + "epoch": 2.18, + "learning_rate": 1.821909656120143e-06, + "loss": 2.47, + "step": 964 + }, + { + "epoch": 2.18, + "learning_rate": 1.821073920693337e-06, + "loss": 2.449, + "step": 966 + }, + { + "epoch": 2.19, + "learning_rate": 1.820236421524965e-06, + "loss": 2.4071, + "step": 968 + }, + { + "epoch": 2.19, + "learning_rate": 1.8193971604140515e-06, + "loss": 2.8168, + "step": 970 + }, + { + "epoch": 2.2, + "learning_rate": 1.8185561391634056e-06, + "loss": 2.561, + "step": 972 + }, + { + "epoch": 2.2, + "learning_rate": 1.8177133595796177e-06, + "loss": 2.2887, + "step": 974 + }, + { + "epoch": 2.21, + "learning_rate": 1.816868823473055e-06, + "loss": 2.2514, + "step": 976 + }, + { + "epoch": 2.21, + "learning_rate": 1.816022532657858e-06, + "loss": 2.6151, + "step": 978 + }, + { + "epoch": 2.22, + "learning_rate": 1.8151744889519365e-06, + "loss": 2.5533, + "step": 980 + }, + { + "epoch": 2.22, + "learning_rate": 1.8143246941769657e-06, + "loss": 2.4734, + "step": 982 + }, + { + "epoch": 2.23, + "learning_rate": 1.8134731501583817e-06, + "loss": 2.2393, + "step": 984 + }, + { + "epoch": 2.23, + "learning_rate": 1.8126198587253794e-06, + "loss": 2.5585, + "step": 986 + }, + { + "epoch": 2.23, + "learning_rate": 1.811764821710906e-06, + "loss": 2.4241, + "step": 988 + }, + { + "epoch": 2.24, + "learning_rate": 1.810908040951659e-06, + "loss": 2.7939, + "step": 990 + }, + { + "epoch": 2.24, + "learning_rate": 1.8100495182880813e-06, + "loss": 2.5703, + "step": 992 + }, + { + "epoch": 2.25, + "learning_rate": 1.809189255564358e-06, + "loss": 2.7578, + "step": 994 + }, + { + "epoch": 2.25, + "learning_rate": 1.808327254628412e-06, + "loss": 2.458, + "step": 996 + }, + { + "epoch": 2.26, + "learning_rate": 1.8074635173318992e-06, + "loss": 2.6176, + "step": 998 + }, + { + "epoch": 2.26, + "learning_rate": 1.8065980455302066e-06, + "loss": 2.365, + "step": 1000 + }, + { + "epoch": 2.27, + "learning_rate": 1.8057308410824463e-06, + "loss": 2.4624, + "step": 1002 + }, + { + "epoch": 2.27, + "learning_rate": 1.8048619058514522e-06, + "loss": 2.7063, + "step": 1004 + }, + { + "epoch": 2.28, + "learning_rate": 1.8039912417037763e-06, + "loss": 2.4129, + "step": 1006 + }, + { + "epoch": 2.28, + "learning_rate": 1.803118850509685e-06, + "loss": 2.6864, + "step": 1008 + }, + { + "epoch": 2.28, + "learning_rate": 1.8022447341431538e-06, + "loss": 3.0802, + "step": 1010 + }, + { + "epoch": 2.29, + "learning_rate": 1.8013688944818638e-06, + "loss": 2.3165, + "step": 1012 + }, + { + "epoch": 2.29, + "learning_rate": 1.8004913334071992e-06, + "loss": 2.4781, + "step": 1014 + }, + { + "epoch": 2.3, + "learning_rate": 1.7996120528042402e-06, + "loss": 2.4112, + "step": 1016 + }, + { + "epoch": 2.3, + "learning_rate": 1.7987310545617622e-06, + "loss": 2.3141, + "step": 1018 + }, + { + "epoch": 2.31, + "learning_rate": 1.7978483405722296e-06, + "loss": 2.2761, + "step": 1020 + }, + { + "epoch": 2.31, + "learning_rate": 1.7969639127317925e-06, + "loss": 2.5459, + "step": 1022 + }, + { + "epoch": 2.32, + "learning_rate": 1.7960777729402823e-06, + "loss": 2.5135, + "step": 1024 + }, + { + "epoch": 2.32, + "learning_rate": 1.795189923101208e-06, + "loss": 2.6812, + "step": 1026 + }, + { + "epoch": 2.33, + "learning_rate": 1.7943003651217522e-06, + "loss": 2.4027, + "step": 1028 + }, + { + "epoch": 2.33, + "learning_rate": 1.7934091009127657e-06, + "loss": 2.5999, + "step": 1030 + }, + { + "epoch": 2.33, + "learning_rate": 1.7925161323887662e-06, + "loss": 2.3978, + "step": 1032 + }, + { + "epoch": 2.34, + "learning_rate": 1.7916214614679306e-06, + "loss": 2.6241, + "step": 1034 + }, + { + "epoch": 2.34, + "learning_rate": 1.790725090072094e-06, + "loss": 2.7711, + "step": 1036 + }, + { + "epoch": 2.35, + "learning_rate": 1.7898270201267436e-06, + "loss": 2.2952, + "step": 1038 + }, + { + "epoch": 2.35, + "learning_rate": 1.7889272535610158e-06, + "loss": 2.5796, + "step": 1040 + }, + { + "epoch": 2.36, + "learning_rate": 1.7880257923076907e-06, + "loss": 2.148, + "step": 1042 + }, + { + "epoch": 2.36, + "learning_rate": 1.7871226383031893e-06, + "loss": 2.727, + "step": 1044 + }, + { + "epoch": 2.37, + "learning_rate": 1.7862177934875687e-06, + "loss": 2.3907, + "step": 1046 + }, + { + "epoch": 2.37, + "learning_rate": 1.785311259804518e-06, + "loss": 2.6242, + "step": 1048 + }, + { + "epoch": 2.37, + "learning_rate": 1.7844030392013538e-06, + "loss": 2.4537, + "step": 1050 + }, + { + "epoch": 2.38, + "learning_rate": 1.7834931336290172e-06, + "loss": 2.7705, + "step": 1052 + }, + { + "epoch": 2.38, + "learning_rate": 1.782581545042068e-06, + "loss": 2.393, + "step": 1054 + }, + { + "epoch": 2.39, + "learning_rate": 1.7816682753986815e-06, + "loss": 2.4858, + "step": 1056 + }, + { + "epoch": 2.39, + "learning_rate": 1.780753326660644e-06, + "loss": 2.3062, + "step": 1058 + }, + { + "epoch": 2.4, + "learning_rate": 1.779836700793349e-06, + "loss": 2.2885, + "step": 1060 + }, + { + "epoch": 2.4, + "learning_rate": 1.7789183997657919e-06, + "loss": 2.5484, + "step": 1062 + }, + { + "epoch": 2.41, + "learning_rate": 1.7779984255505675e-06, + "loss": 2.5681, + "step": 1064 + }, + { + "epoch": 2.41, + "learning_rate": 1.777076780123864e-06, + "loss": 2.3677, + "step": 1066 + }, + { + "epoch": 2.42, + "learning_rate": 1.7761534654654593e-06, + "loss": 2.3538, + "step": 1068 + }, + { + "epoch": 2.42, + "learning_rate": 1.7752284835587182e-06, + "loss": 2.6437, + "step": 1070 + }, + { + "epoch": 2.42, + "learning_rate": 1.774301836390585e-06, + "loss": 2.6875, + "step": 1072 + }, + { + "epoch": 2.43, + "learning_rate": 1.773373525951583e-06, + "loss": 2.413, + "step": 1074 + }, + { + "epoch": 2.43, + "learning_rate": 1.7724435542358078e-06, + "loss": 2.6612, + "step": 1076 + }, + { + "epoch": 2.44, + "learning_rate": 1.7715119232409225e-06, + "loss": 2.2541, + "step": 1078 + }, + { + "epoch": 2.44, + "learning_rate": 1.7705786349681562e-06, + "loss": 2.7833, + "step": 1080 + }, + { + "epoch": 2.45, + "learning_rate": 1.7696436914222965e-06, + "loss": 2.692, + "step": 1082 + }, + { + "epoch": 2.45, + "learning_rate": 1.7687070946116874e-06, + "loss": 2.5163, + "step": 1084 + }, + { + "epoch": 2.46, + "learning_rate": 1.7677688465482244e-06, + "loss": 2.4503, + "step": 1086 + }, + { + "epoch": 2.46, + "learning_rate": 1.7668289492473493e-06, + "loss": 2.2691, + "step": 1088 + }, + { + "epoch": 2.47, + "learning_rate": 1.7658874047280476e-06, + "loss": 2.6636, + "step": 1090 + }, + { + "epoch": 2.47, + "learning_rate": 1.764944215012843e-06, + "loss": 2.2402, + "step": 1092 + }, + { + "epoch": 2.47, + "learning_rate": 1.7639993821277921e-06, + "loss": 3.0009, + "step": 1094 + }, + { + "epoch": 2.48, + "learning_rate": 1.763052908102482e-06, + "loss": 2.3759, + "step": 1096 + }, + { + "epoch": 2.48, + "learning_rate": 1.762104794970026e-06, + "loss": 2.4261, + "step": 1098 + }, + { + "epoch": 2.49, + "learning_rate": 1.7611550447670567e-06, + "loss": 2.3872, + "step": 1100 + }, + { + "epoch": 2.49, + "learning_rate": 1.7602036595337243e-06, + "loss": 2.5853, + "step": 1102 + }, + { + "epoch": 2.5, + "learning_rate": 1.7592506413136906e-06, + "loss": 2.3782, + "step": 1104 + }, + { + "epoch": 2.5, + "learning_rate": 1.7582959921541264e-06, + "loss": 2.3908, + "step": 1106 + }, + { + "epoch": 2.51, + "learning_rate": 1.7573397141057042e-06, + "loss": 2.4113, + "step": 1108 + }, + { + "epoch": 2.51, + "learning_rate": 1.7563818092225967e-06, + "loss": 2.4058, + "step": 1110 + }, + { + "epoch": 2.52, + "learning_rate": 1.7554222795624707e-06, + "loss": 2.5073, + "step": 1112 + }, + { + "epoch": 2.52, + "learning_rate": 1.754461127186483e-06, + "loss": 2.4979, + "step": 1114 + }, + { + "epoch": 2.52, + "learning_rate": 1.7534983541592773e-06, + "loss": 2.491, + "step": 1116 + }, + { + "epoch": 2.53, + "learning_rate": 1.7525339625489772e-06, + "loss": 2.3299, + "step": 1118 + }, + { + "epoch": 2.53, + "learning_rate": 1.7515679544271838e-06, + "loss": 2.6209, + "step": 1120 + }, + { + "epoch": 2.54, + "learning_rate": 1.7506003318689703e-06, + "loss": 2.2329, + "step": 1122 + }, + { + "epoch": 2.54, + "learning_rate": 1.7496310969528785e-06, + "loss": 2.84, + "step": 1124 + }, + { + "epoch": 2.55, + "learning_rate": 1.748660251760913e-06, + "loss": 2.4634, + "step": 1126 + }, + { + "epoch": 2.55, + "learning_rate": 1.7476877983785378e-06, + "loss": 2.2628, + "step": 1128 + }, + { + "epoch": 2.56, + "learning_rate": 1.7467137388946716e-06, + "loss": 2.4435, + "step": 1130 + }, + { + "epoch": 2.56, + "learning_rate": 1.7457380754016823e-06, + "loss": 2.6127, + "step": 1132 + }, + { + "epoch": 2.56, + "learning_rate": 1.7447608099953842e-06, + "loss": 2.6242, + "step": 1134 + }, + { + "epoch": 2.57, + "learning_rate": 1.7437819447750327e-06, + "loss": 2.6045, + "step": 1136 + }, + { + "epoch": 2.57, + "learning_rate": 1.742801481843319e-06, + "loss": 2.4415, + "step": 1138 + }, + { + "epoch": 2.58, + "learning_rate": 1.741819423306367e-06, + "loss": 2.3979, + "step": 1140 + }, + { + "epoch": 2.58, + "learning_rate": 1.740835771273728e-06, + "loss": 2.6006, + "step": 1142 + }, + { + "epoch": 2.59, + "learning_rate": 1.7398505278583758e-06, + "loss": 2.6163, + "step": 1144 + }, + { + "epoch": 2.59, + "learning_rate": 1.7388636951767035e-06, + "loss": 2.396, + "step": 1146 + }, + { + "epoch": 2.6, + "learning_rate": 1.7378752753485171e-06, + "loss": 2.4038, + "step": 1148 + }, + { + "epoch": 2.6, + "learning_rate": 1.7368852704970328e-06, + "loss": 2.3577, + "step": 1150 + }, + { + "epoch": 2.61, + "learning_rate": 1.735893682748871e-06, + "loss": 2.8212, + "step": 1152 + }, + { + "epoch": 2.61, + "learning_rate": 1.7349005142340522e-06, + "loss": 2.6463, + "step": 1154 + }, + { + "epoch": 2.61, + "learning_rate": 1.7339057670859936e-06, + "loss": 2.5239, + "step": 1156 + }, + { + "epoch": 2.62, + "learning_rate": 1.732909443441502e-06, + "loss": 2.5437, + "step": 1158 + }, + { + "epoch": 2.62, + "learning_rate": 1.7319115454407715e-06, + "loss": 2.5785, + "step": 1160 + }, + { + "epoch": 2.63, + "learning_rate": 1.730912075227378e-06, + "loss": 2.2862, + "step": 1162 + }, + { + "epoch": 2.63, + "learning_rate": 1.7299110349482747e-06, + "loss": 2.1492, + "step": 1164 + }, + { + "epoch": 2.64, + "learning_rate": 1.728908426753787e-06, + "loss": 2.428, + "step": 1166 + }, + { + "epoch": 2.64, + "learning_rate": 1.7279042527976085e-06, + "loss": 2.4149, + "step": 1168 + }, + { + "epoch": 2.65, + "learning_rate": 1.7268985152367967e-06, + "loss": 2.5587, + "step": 1170 + }, + { + "epoch": 2.65, + "learning_rate": 1.7258912162317675e-06, + "loss": 2.4955, + "step": 1172 + }, + { + "epoch": 2.66, + "learning_rate": 1.7248823579462904e-06, + "loss": 2.4643, + "step": 1174 + }, + { + "epoch": 2.66, + "learning_rate": 1.7238719425474852e-06, + "loss": 2.4708, + "step": 1176 + }, + { + "epoch": 2.66, + "learning_rate": 1.7228599722058164e-06, + "loss": 2.3787, + "step": 1178 + }, + { + "epoch": 2.67, + "learning_rate": 1.7218464490950884e-06, + "loss": 2.6198, + "step": 1180 + }, + { + "epoch": 2.67, + "learning_rate": 1.7208313753924408e-06, + "loss": 2.3774, + "step": 1182 + }, + { + "epoch": 2.68, + "learning_rate": 1.7198147532783448e-06, + "loss": 2.5964, + "step": 1184 + }, + { + "epoch": 2.68, + "learning_rate": 1.718796584936597e-06, + "loss": 2.4021, + "step": 1186 + }, + { + "epoch": 2.69, + "learning_rate": 1.7177768725543158e-06, + "loss": 2.5438, + "step": 1188 + }, + { + "epoch": 2.69, + "learning_rate": 1.7167556183219362e-06, + "loss": 2.5724, + "step": 1190 + }, + { + "epoch": 2.7, + "learning_rate": 1.7157328244332057e-06, + "loss": 2.6797, + "step": 1192 + }, + { + "epoch": 2.7, + "learning_rate": 1.7147084930851783e-06, + "loss": 2.4916, + "step": 1194 + }, + { + "epoch": 2.71, + "learning_rate": 1.7136826264782114e-06, + "loss": 2.3222, + "step": 1196 + }, + { + "epoch": 2.71, + "learning_rate": 1.7126552268159595e-06, + "loss": 2.1401, + "step": 1198 + }, + { + "epoch": 2.71, + "learning_rate": 1.7116262963053712e-06, + "loss": 2.9593, + "step": 1200 + }, + { + "epoch": 2.72, + "learning_rate": 1.7105958371566823e-06, + "loss": 2.4008, + "step": 1202 + }, + { + "epoch": 2.72, + "learning_rate": 1.709563851583413e-06, + "loss": 2.6847, + "step": 1204 + }, + { + "epoch": 2.73, + "learning_rate": 1.7085303418023626e-06, + "loss": 2.208, + "step": 1206 + }, + { + "epoch": 2.73, + "learning_rate": 1.7074953100336042e-06, + "loss": 2.3636, + "step": 1208 + }, + { + "epoch": 2.74, + "learning_rate": 1.7064587585004806e-06, + "loss": 2.4801, + "step": 1210 + }, + { + "epoch": 2.74, + "learning_rate": 1.705420689429598e-06, + "loss": 2.4984, + "step": 1212 + }, + { + "epoch": 2.75, + "learning_rate": 1.7043811050508244e-06, + "loss": 2.5331, + "step": 1214 + }, + { + "epoch": 2.75, + "learning_rate": 1.703340007597281e-06, + "loss": 2.4208, + "step": 1216 + }, + { + "epoch": 2.75, + "learning_rate": 1.70229739930534e-06, + "loss": 2.3254, + "step": 1218 + }, + { + "epoch": 2.76, + "learning_rate": 1.7012532824146195e-06, + "loss": 2.4997, + "step": 1220 + }, + { + "epoch": 2.76, + "learning_rate": 1.700207659167977e-06, + "loss": 2.3298, + "step": 1222 + }, + { + "epoch": 2.77, + "learning_rate": 1.699160531811507e-06, + "loss": 2.4146, + "step": 1224 + }, + { + "epoch": 2.77, + "learning_rate": 1.698111902594534e-06, + "loss": 2.3754, + "step": 1226 + }, + { + "epoch": 2.78, + "learning_rate": 1.6970617737696096e-06, + "loss": 2.7424, + "step": 1228 + }, + { + "epoch": 2.78, + "learning_rate": 1.696010147592506e-06, + "loss": 2.6457, + "step": 1230 + }, + { + "epoch": 2.79, + "learning_rate": 1.6949570263222116e-06, + "loss": 2.5523, + "step": 1232 + }, + { + "epoch": 2.79, + "learning_rate": 1.6939024122209268e-06, + "loss": 2.424, + "step": 1234 + }, + { + "epoch": 2.8, + "learning_rate": 1.6928463075540594e-06, + "loss": 2.3549, + "step": 1236 + }, + { + "epoch": 2.8, + "learning_rate": 1.6917887145902178e-06, + "loss": 2.2031, + "step": 1238 + }, + { + "epoch": 2.8, + "learning_rate": 1.6907296356012085e-06, + "loss": 2.4517, + "step": 1240 + }, + { + "epoch": 2.81, + "learning_rate": 1.6896690728620296e-06, + "loss": 2.4635, + "step": 1242 + }, + { + "epoch": 2.81, + "learning_rate": 1.6886070286508657e-06, + "loss": 2.5939, + "step": 1244 + }, + { + "epoch": 2.82, + "learning_rate": 1.6875435052490854e-06, + "loss": 2.6403, + "step": 1246 + }, + { + "epoch": 2.82, + "learning_rate": 1.6864785049412334e-06, + "loss": 2.3347, + "step": 1248 + }, + { + "epoch": 2.83, + "learning_rate": 1.6854120300150272e-06, + "loss": 2.4685, + "step": 1250 + }, + { + "epoch": 2.83, + "learning_rate": 1.6843440827613527e-06, + "loss": 2.3795, + "step": 1252 + }, + { + "epoch": 2.84, + "learning_rate": 1.6832746654742573e-06, + "loss": 2.5659, + "step": 1254 + }, + { + "epoch": 2.84, + "learning_rate": 1.6822037804509471e-06, + "loss": 2.4002, + "step": 1256 + }, + { + "epoch": 2.85, + "learning_rate": 1.6811314299917804e-06, + "loss": 2.4262, + "step": 1258 + }, + { + "epoch": 2.85, + "learning_rate": 1.6800576164002635e-06, + "loss": 2.599, + "step": 1260 + }, + { + "epoch": 2.85, + "learning_rate": 1.6789823419830463e-06, + "loss": 2.4544, + "step": 1262 + }, + { + "epoch": 2.86, + "learning_rate": 1.6779056090499158e-06, + "loss": 2.3587, + "step": 1264 + }, + { + "epoch": 2.86, + "learning_rate": 1.676827419913793e-06, + "loss": 2.428, + "step": 1266 + }, + { + "epoch": 2.87, + "learning_rate": 1.6757477768907254e-06, + "loss": 2.4027, + "step": 1268 + }, + { + "epoch": 2.87, + "learning_rate": 1.6746666822998851e-06, + "loss": 2.675, + "step": 1270 + }, + { + "epoch": 2.88, + "learning_rate": 1.6735841384635619e-06, + "loss": 2.3356, + "step": 1272 + }, + { + "epoch": 2.88, + "learning_rate": 1.6725001477071582e-06, + "loss": 2.573, + "step": 1274 + }, + { + "epoch": 2.89, + "learning_rate": 1.6714147123591854e-06, + "loss": 2.2519, + "step": 1276 + }, + { + "epoch": 2.89, + "learning_rate": 1.670327834751257e-06, + "loss": 2.7857, + "step": 1278 + }, + { + "epoch": 2.9, + "learning_rate": 1.6692395172180849e-06, + "loss": 2.6319, + "step": 1280 + }, + { + "epoch": 2.9, + "learning_rate": 1.668149762097475e-06, + "loss": 2.6005, + "step": 1282 + }, + { + "epoch": 2.9, + "learning_rate": 1.6670585717303201e-06, + "loss": 2.4265, + "step": 1284 + }, + { + "epoch": 2.91, + "learning_rate": 1.6659659484605966e-06, + "loss": 2.3283, + "step": 1286 + }, + { + "epoch": 2.91, + "learning_rate": 1.6648718946353584e-06, + "loss": 2.684, + "step": 1288 + }, + { + "epoch": 2.92, + "learning_rate": 1.6637764126047335e-06, + "loss": 2.4864, + "step": 1290 + }, + { + "epoch": 2.92, + "learning_rate": 1.6626795047219168e-06, + "loss": 2.5547, + "step": 1292 + }, + { + "epoch": 2.93, + "learning_rate": 1.661581173343166e-06, + "loss": 2.6756, + "step": 1294 + }, + { + "epoch": 2.93, + "learning_rate": 1.6604814208277972e-06, + "loss": 2.5402, + "step": 1296 + }, + { + "epoch": 2.94, + "learning_rate": 1.659380249538179e-06, + "loss": 2.602, + "step": 1298 + }, + { + "epoch": 2.94, + "learning_rate": 1.6582776618397277e-06, + "loss": 2.4283, + "step": 1300 + }, + { + "epoch": 2.94, + "learning_rate": 1.6571736601009021e-06, + "loss": 2.4714, + "step": 1302 + }, + { + "epoch": 2.95, + "learning_rate": 1.6560682466931981e-06, + "loss": 2.4282, + "step": 1304 + }, + { + "epoch": 2.95, + "learning_rate": 1.654961423991145e-06, + "loss": 2.4965, + "step": 1306 + }, + { + "epoch": 2.96, + "learning_rate": 1.653853194372298e-06, + "loss": 2.4844, + "step": 1308 + }, + { + "epoch": 2.96, + "learning_rate": 1.6527435602172361e-06, + "loss": 2.5985, + "step": 1310 + }, + { + "epoch": 2.97, + "learning_rate": 1.651632523909554e-06, + "loss": 2.2583, + "step": 1312 + }, + { + "epoch": 2.97, + "learning_rate": 1.650520087835859e-06, + "loss": 2.6012, + "step": 1314 + }, + { + "epoch": 2.98, + "learning_rate": 1.6494062543857651e-06, + "loss": 2.5839, + "step": 1316 + }, + { + "epoch": 2.98, + "learning_rate": 1.6482910259518881e-06, + "loss": 2.719, + "step": 1318 + }, + { + "epoch": 2.99, + "learning_rate": 1.64717440492984e-06, + "loss": 2.7217, + "step": 1320 + }, + { + "epoch": 2.99, + "learning_rate": 1.6460563937182248e-06, + "loss": 2.4131, + "step": 1322 + }, + { + "epoch": 2.99, + "learning_rate": 1.6449369947186321e-06, + "loss": 2.4515, + "step": 1324 + }, + { + "epoch": 3.0, + "learning_rate": 1.6438162103356332e-06, + "loss": 2.412, + "step": 1326 + }, + { + "epoch": 3.0, + "learning_rate": 1.642694042976775e-06, + "loss": 2.2864, + "step": 1328 + }, + { + "epoch": 3.01, + "learning_rate": 1.641570495052575e-06, + "loss": 2.315, + "step": 1330 + }, + { + "epoch": 3.01, + "learning_rate": 1.6404455689765165e-06, + "loss": 2.4863, + "step": 1332 + }, + { + "epoch": 3.02, + "learning_rate": 1.6393192671650433e-06, + "loss": 2.7534, + "step": 1334 + }, + { + "epoch": 3.02, + "learning_rate": 1.6381915920375539e-06, + "loss": 2.4737, + "step": 1336 + }, + { + "epoch": 3.03, + "learning_rate": 1.637062546016398e-06, + "loss": 2.3454, + "step": 1338 + }, + { + "epoch": 3.03, + "learning_rate": 1.6359321315268683e-06, + "loss": 2.5099, + "step": 1340 + }, + { + "epoch": 3.04, + "learning_rate": 1.634800350997199e-06, + "loss": 2.3486, + "step": 1342 + }, + { + "epoch": 3.04, + "learning_rate": 1.633667206858557e-06, + "loss": 2.2076, + "step": 1344 + }, + { + "epoch": 3.04, + "learning_rate": 1.6325327015450396e-06, + "loss": 2.2752, + "step": 1346 + }, + { + "epoch": 3.05, + "learning_rate": 1.6313968374936673e-06, + "loss": 2.3892, + "step": 1348 + }, + { + "epoch": 3.05, + "learning_rate": 1.6302596171443795e-06, + "loss": 2.3764, + "step": 1350 + }, + { + "epoch": 3.06, + "learning_rate": 1.6291210429400296e-06, + "loss": 2.2785, + "step": 1352 + }, + { + "epoch": 3.06, + "learning_rate": 1.627981117326378e-06, + "loss": 2.5743, + "step": 1354 + }, + { + "epoch": 3.07, + "learning_rate": 1.6268398427520894e-06, + "loss": 2.7498, + "step": 1356 + }, + { + "epoch": 3.07, + "learning_rate": 1.6256972216687248e-06, + "loss": 2.1805, + "step": 1358 + }, + { + "epoch": 3.08, + "learning_rate": 1.624553256530739e-06, + "loss": 2.5828, + "step": 1360 + }, + { + "epoch": 3.08, + "learning_rate": 1.6234079497954731e-06, + "loss": 2.6088, + "step": 1362 + }, + { + "epoch": 3.09, + "learning_rate": 1.6222613039231501e-06, + "loss": 2.4188, + "step": 1364 + }, + { + "epoch": 3.09, + "learning_rate": 1.62111332137687e-06, + "loss": 2.2557, + "step": 1366 + }, + { + "epoch": 3.09, + "learning_rate": 1.6199640046226035e-06, + "loss": 2.6659, + "step": 1368 + }, + { + "epoch": 3.1, + "learning_rate": 1.6188133561291884e-06, + "loss": 2.0596, + "step": 1370 + }, + { + "epoch": 3.1, + "learning_rate": 1.6176613783683218e-06, + "loss": 2.5042, + "step": 1372 + }, + { + "epoch": 3.11, + "learning_rate": 1.616508073814557e-06, + "loss": 2.3132, + "step": 1374 + }, + { + "epoch": 3.11, + "learning_rate": 1.6153534449452972e-06, + "loss": 2.413, + "step": 1376 + }, + { + "epoch": 3.12, + "learning_rate": 1.6141974942407907e-06, + "loss": 2.596, + "step": 1378 + }, + { + "epoch": 3.12, + "learning_rate": 1.6130402241841247e-06, + "loss": 2.3572, + "step": 1380 + }, + { + "epoch": 3.13, + "learning_rate": 1.6118816372612207e-06, + "loss": 2.3489, + "step": 1382 + }, + { + "epoch": 3.13, + "learning_rate": 1.6107217359608287e-06, + "loss": 2.5228, + "step": 1384 + }, + { + "epoch": 3.13, + "learning_rate": 1.6095605227745229e-06, + "loss": 2.3761, + "step": 1386 + }, + { + "epoch": 3.14, + "learning_rate": 1.608398000196694e-06, + "loss": 2.5952, + "step": 1388 + }, + { + "epoch": 3.14, + "learning_rate": 1.6072341707245474e-06, + "loss": 2.1189, + "step": 1390 + }, + { + "epoch": 3.15, + "learning_rate": 1.6060690368580944e-06, + "loss": 2.2626, + "step": 1392 + }, + { + "epoch": 3.15, + "learning_rate": 1.6049026011001488e-06, + "loss": 2.5117, + "step": 1394 + }, + { + "epoch": 3.16, + "learning_rate": 1.6037348659563205e-06, + "loss": 2.3999, + "step": 1396 + }, + { + "epoch": 3.16, + "learning_rate": 1.6025658339350112e-06, + "loss": 2.2231, + "step": 1398 + }, + { + "epoch": 3.17, + "learning_rate": 1.6013955075474083e-06, + "loss": 2.3908, + "step": 1400 + }, + { + "epoch": 3.17, + "learning_rate": 1.6002238893074794e-06, + "loss": 2.1988, + "step": 1402 + }, + { + "epoch": 3.18, + "learning_rate": 1.599050981731967e-06, + "loss": 2.3146, + "step": 1404 + }, + { + "epoch": 3.18, + "learning_rate": 1.597876787340383e-06, + "loss": 2.6744, + "step": 1406 + }, + { + "epoch": 3.18, + "learning_rate": 1.596701308655005e-06, + "loss": 2.405, + "step": 1408 + }, + { + "epoch": 3.19, + "learning_rate": 1.5955245482008674e-06, + "loss": 2.3743, + "step": 1410 + }, + { + "epoch": 3.19, + "learning_rate": 1.5943465085057594e-06, + "loss": 2.6347, + "step": 1412 + }, + { + "epoch": 3.2, + "learning_rate": 1.5931671921002172e-06, + "loss": 2.4252, + "step": 1414 + }, + { + "epoch": 3.2, + "learning_rate": 1.5919866015175197e-06, + "loss": 2.6307, + "step": 1416 + }, + { + "epoch": 3.21, + "learning_rate": 1.5908047392936835e-06, + "loss": 2.1272, + "step": 1418 + }, + { + "epoch": 3.21, + "learning_rate": 1.589621607967456e-06, + "loss": 2.6618, + "step": 1420 + }, + { + "epoch": 3.22, + "learning_rate": 1.5884372100803112e-06, + "loss": 2.6405, + "step": 1422 + }, + { + "epoch": 3.22, + "learning_rate": 1.5872515481764436e-06, + "loss": 2.3396, + "step": 1424 + }, + { + "epoch": 3.23, + "learning_rate": 1.5860646248027623e-06, + "loss": 2.7622, + "step": 1426 + }, + { + "epoch": 3.23, + "learning_rate": 1.5848764425088878e-06, + "loss": 2.3926, + "step": 1428 + }, + { + "epoch": 3.23, + "learning_rate": 1.5836870038471436e-06, + "loss": 2.4872, + "step": 1430 + }, + { + "epoch": 3.24, + "learning_rate": 1.582496311372552e-06, + "loss": 2.3206, + "step": 1432 + }, + { + "epoch": 3.24, + "learning_rate": 1.5813043676428293e-06, + "loss": 2.3901, + "step": 1434 + }, + { + "epoch": 3.25, + "learning_rate": 1.5801111752183788e-06, + "loss": 2.4461, + "step": 1436 + }, + { + "epoch": 3.25, + "learning_rate": 1.5789167366622866e-06, + "loss": 2.5871, + "step": 1438 + }, + { + "epoch": 3.26, + "learning_rate": 1.5777210545403154e-06, + "loss": 2.6168, + "step": 1440 + }, + { + "epoch": 3.26, + "learning_rate": 1.5765241314208998e-06, + "loss": 2.4094, + "step": 1442 + }, + { + "epoch": 3.27, + "learning_rate": 1.575325969875139e-06, + "loss": 2.3476, + "step": 1444 + }, + { + "epoch": 3.27, + "learning_rate": 1.5741265724767941e-06, + "loss": 2.2706, + "step": 1446 + }, + { + "epoch": 3.28, + "learning_rate": 1.5729259418022789e-06, + "loss": 2.2549, + "step": 1448 + }, + { + "epoch": 3.28, + "learning_rate": 1.5717240804306578e-06, + "loss": 2.3836, + "step": 1450 + }, + { + "epoch": 3.28, + "learning_rate": 1.5705209909436387e-06, + "loss": 2.273, + "step": 1452 + }, + { + "epoch": 3.29, + "learning_rate": 1.569316675925567e-06, + "loss": 2.2781, + "step": 1454 + }, + { + "epoch": 3.29, + "learning_rate": 1.568111137963422e-06, + "loss": 2.4964, + "step": 1456 + }, + { + "epoch": 3.3, + "learning_rate": 1.5669043796468078e-06, + "loss": 2.452, + "step": 1458 + }, + { + "epoch": 3.3, + "learning_rate": 1.5656964035679518e-06, + "loss": 2.2103, + "step": 1460 + }, + { + "epoch": 3.31, + "learning_rate": 1.5644872123216968e-06, + "loss": 2.3263, + "step": 1462 + }, + { + "epoch": 3.31, + "learning_rate": 1.5632768085054956e-06, + "loss": 2.3993, + "step": 1464 + }, + { + "epoch": 3.32, + "learning_rate": 1.5620651947194054e-06, + "loss": 2.3741, + "step": 1466 + }, + { + "epoch": 3.32, + "learning_rate": 1.5608523735660834e-06, + "loss": 2.7935, + "step": 1468 + }, + { + "epoch": 3.32, + "learning_rate": 1.55963834765078e-06, + "loss": 2.5123, + "step": 1470 + }, + { + "epoch": 3.33, + "learning_rate": 1.5584231195813332e-06, + "loss": 2.5983, + "step": 1472 + }, + { + "epoch": 3.33, + "learning_rate": 1.5572066919681634e-06, + "loss": 2.4809, + "step": 1474 + }, + { + "epoch": 3.34, + "learning_rate": 1.5559890674242687e-06, + "loss": 2.3429, + "step": 1476 + }, + { + "epoch": 3.34, + "learning_rate": 1.5547702485652164e-06, + "loss": 2.5719, + "step": 1478 + }, + { + "epoch": 3.35, + "learning_rate": 1.5535502380091411e-06, + "loss": 2.4216, + "step": 1480 + }, + { + "epoch": 3.35, + "learning_rate": 1.5523290383767366e-06, + "loss": 2.6257, + "step": 1482 + }, + { + "epoch": 3.36, + "learning_rate": 1.551106652291251e-06, + "loss": 2.5949, + "step": 1484 + }, + { + "epoch": 3.36, + "learning_rate": 1.5498830823784808e-06, + "loss": 2.5236, + "step": 1486 + }, + { + "epoch": 3.37, + "learning_rate": 1.5486583312667652e-06, + "loss": 2.2841, + "step": 1488 + }, + { + "epoch": 3.37, + "learning_rate": 1.5474324015869819e-06, + "loss": 2.6608, + "step": 1490 + }, + { + "epoch": 3.37, + "learning_rate": 1.5462052959725388e-06, + "loss": 2.6327, + "step": 1492 + }, + { + "epoch": 3.38, + "learning_rate": 1.5449770170593715e-06, + "loss": 2.3065, + "step": 1494 + }, + { + "epoch": 3.38, + "learning_rate": 1.5437475674859335e-06, + "loss": 2.4162, + "step": 1496 + }, + { + "epoch": 3.39, + "learning_rate": 1.542516949893196e-06, + "loss": 2.5158, + "step": 1498 + }, + { + "epoch": 3.39, + "learning_rate": 1.5412851669246368e-06, + "loss": 2.4369, + "step": 1500 + }, + { + "epoch": 3.4, + "learning_rate": 1.540052221226238e-06, + "loss": 2.6495, + "step": 1502 + }, + { + "epoch": 3.4, + "learning_rate": 1.5388181154464795e-06, + "loss": 2.3576, + "step": 1504 + }, + { + "epoch": 3.41, + "learning_rate": 1.5375828522363325e-06, + "loss": 2.6325, + "step": 1506 + }, + { + "epoch": 3.41, + "learning_rate": 1.5363464342492552e-06, + "loss": 2.4328, + "step": 1508 + }, + { + "epoch": 3.42, + "learning_rate": 1.5351088641411863e-06, + "loss": 2.5354, + "step": 1510 + }, + { + "epoch": 3.42, + "learning_rate": 1.5338701445705385e-06, + "loss": 2.7207, + "step": 1512 + }, + { + "epoch": 3.42, + "learning_rate": 1.5326302781981948e-06, + "loss": 2.3158, + "step": 1514 + }, + { + "epoch": 3.43, + "learning_rate": 1.5313892676875008e-06, + "loss": 2.6291, + "step": 1516 + }, + { + "epoch": 3.43, + "learning_rate": 1.5301471157042603e-06, + "loss": 2.4181, + "step": 1518 + }, + { + "epoch": 3.44, + "learning_rate": 1.5289038249167285e-06, + "loss": 2.4438, + "step": 1520 + }, + { + "epoch": 3.44, + "learning_rate": 1.5276593979956078e-06, + "loss": 2.4456, + "step": 1522 + }, + { + "epoch": 3.45, + "learning_rate": 1.5264138376140405e-06, + "loss": 2.3465, + "step": 1524 + }, + { + "epoch": 3.45, + "learning_rate": 1.5251671464476034e-06, + "loss": 2.6112, + "step": 1526 + }, + { + "epoch": 3.46, + "learning_rate": 1.5239193271743025e-06, + "loss": 2.7099, + "step": 1528 + }, + { + "epoch": 3.46, + "learning_rate": 1.5226703824745682e-06, + "loss": 2.5169, + "step": 1530 + }, + { + "epoch": 3.47, + "learning_rate": 1.5214203150312463e-06, + "loss": 2.6179, + "step": 1532 + }, + { + "epoch": 3.47, + "learning_rate": 1.5201691275295963e-06, + "loss": 2.6206, + "step": 1534 + }, + { + "epoch": 3.47, + "learning_rate": 1.5189168226572825e-06, + "loss": 2.4832, + "step": 1536 + }, + { + "epoch": 3.48, + "learning_rate": 1.5176634031043706e-06, + "loss": 2.4303, + "step": 1538 + }, + { + "epoch": 3.48, + "learning_rate": 1.5164088715633185e-06, + "loss": 2.3131, + "step": 1540 + }, + { + "epoch": 3.49, + "learning_rate": 1.515153230728976e-06, + "loss": 2.4736, + "step": 1542 + }, + { + "epoch": 3.49, + "learning_rate": 1.5138964832985724e-06, + "loss": 2.3829, + "step": 1544 + }, + { + "epoch": 3.5, + "learning_rate": 1.5126386319717166e-06, + "loss": 2.3293, + "step": 1546 + }, + { + "epoch": 3.5, + "learning_rate": 1.5113796794503875e-06, + "loss": 2.5245, + "step": 1548 + }, + { + "epoch": 3.51, + "learning_rate": 1.5101196284389297e-06, + "loss": 2.6711, + "step": 1550 + }, + { + "epoch": 3.51, + "learning_rate": 1.5088584816440482e-06, + "loss": 2.2194, + "step": 1552 + }, + { + "epoch": 3.51, + "learning_rate": 1.5075962417748e-06, + "loss": 2.6407, + "step": 1554 + }, + { + "epoch": 3.52, + "learning_rate": 1.5063329115425923e-06, + "loss": 2.5811, + "step": 1556 + }, + { + "epoch": 3.52, + "learning_rate": 1.505068493661173e-06, + "loss": 2.533, + "step": 1558 + }, + { + "epoch": 3.53, + "learning_rate": 1.503802990846627e-06, + "loss": 2.3742, + "step": 1560 + }, + { + "epoch": 3.53, + "learning_rate": 1.50253640581737e-06, + "loss": 2.4339, + "step": 1562 + }, + { + "epoch": 3.54, + "learning_rate": 1.5012687412941412e-06, + "loss": 2.1867, + "step": 1564 + }, + { + "epoch": 3.54, + "learning_rate": 1.5e-06, + "loss": 2.3181, + "step": 1566 + }, + { + "epoch": 3.55, + "learning_rate": 1.4987301846603183e-06, + "loss": 2.5327, + "step": 1568 + }, + { + "epoch": 3.55, + "learning_rate": 1.4974592980027749e-06, + "loss": 2.4644, + "step": 1570 + }, + { + "epoch": 3.56, + "learning_rate": 1.49618734275735e-06, + "loss": 2.3521, + "step": 1572 + }, + { + "epoch": 3.56, + "learning_rate": 1.4949143216563195e-06, + "loss": 2.323, + "step": 1574 + }, + { + "epoch": 3.56, + "learning_rate": 1.493640237434249e-06, + "loss": 2.4037, + "step": 1576 + }, + { + "epoch": 3.57, + "learning_rate": 1.4923650928279867e-06, + "loss": 2.2227, + "step": 1578 + }, + { + "epoch": 3.57, + "learning_rate": 1.4910888905766602e-06, + "loss": 2.5474, + "step": 1580 + }, + { + "epoch": 3.58, + "learning_rate": 1.4898116334216673e-06, + "loss": 2.165, + "step": 1582 + }, + { + "epoch": 3.58, + "learning_rate": 1.4885333241066734e-06, + "loss": 2.5061, + "step": 1584 + }, + { + "epoch": 3.59, + "learning_rate": 1.4872539653776028e-06, + "loss": 2.3724, + "step": 1586 + }, + { + "epoch": 3.59, + "learning_rate": 1.4859735599826352e-06, + "loss": 2.6099, + "step": 1588 + }, + { + "epoch": 3.6, + "learning_rate": 1.4846921106721974e-06, + "loss": 2.5928, + "step": 1590 + }, + { + "epoch": 3.6, + "learning_rate": 1.4834096201989596e-06, + "loss": 2.2369, + "step": 1592 + }, + { + "epoch": 3.61, + "learning_rate": 1.4821260913178282e-06, + "loss": 2.3638, + "step": 1594 + }, + { + "epoch": 3.61, + "learning_rate": 1.4808415267859398e-06, + "loss": 2.3177, + "step": 1596 + }, + { + "epoch": 3.61, + "learning_rate": 1.4795559293626562e-06, + "loss": 2.2792, + "step": 1598 + }, + { + "epoch": 3.62, + "learning_rate": 1.4782693018095577e-06, + "loss": 2.3882, + "step": 1600 + }, + { + "epoch": 3.62, + "learning_rate": 1.4769816468904378e-06, + "loss": 2.3006, + "step": 1602 + }, + { + "epoch": 3.63, + "learning_rate": 1.4756929673712962e-06, + "loss": 2.2557, + "step": 1604 + }, + { + "epoch": 3.63, + "learning_rate": 1.4744032660203339e-06, + "loss": 2.5488, + "step": 1606 + }, + { + "epoch": 3.64, + "learning_rate": 1.4731125456079467e-06, + "loss": 2.3487, + "step": 1608 + }, + { + "epoch": 3.64, + "learning_rate": 1.47182080890672e-06, + "loss": 2.641, + "step": 1610 + }, + { + "epoch": 3.65, + "learning_rate": 1.4705280586914218e-06, + "loss": 2.1686, + "step": 1612 + }, + { + "epoch": 3.65, + "learning_rate": 1.4692342977389976e-06, + "loss": 2.3053, + "step": 1614 + }, + { + "epoch": 3.66, + "learning_rate": 1.4679395288285627e-06, + "loss": 2.247, + "step": 1616 + }, + { + "epoch": 3.66, + "learning_rate": 1.4666437547413999e-06, + "loss": 2.6372, + "step": 1618 + }, + { + "epoch": 3.66, + "learning_rate": 1.465346978260949e-06, + "loss": 2.6255, + "step": 1620 + }, + { + "epoch": 3.67, + "learning_rate": 1.4640492021728043e-06, + "loss": 2.1904, + "step": 1622 + }, + { + "epoch": 3.67, + "learning_rate": 1.4627504292647074e-06, + "loss": 2.5925, + "step": 1624 + }, + { + "epoch": 3.68, + "learning_rate": 1.4614506623265406e-06, + "loss": 2.6503, + "step": 1626 + }, + { + "epoch": 3.68, + "learning_rate": 1.4601499041503216e-06, + "loss": 2.3066, + "step": 1628 + }, + { + "epoch": 3.69, + "learning_rate": 1.458848157530197e-06, + "loss": 2.4649, + "step": 1630 + }, + { + "epoch": 3.69, + "learning_rate": 1.4575454252624383e-06, + "loss": 2.2839, + "step": 1632 + }, + { + "epoch": 3.7, + "learning_rate": 1.4562417101454316e-06, + "loss": 2.5123, + "step": 1634 + }, + { + "epoch": 3.7, + "learning_rate": 1.4549370149796769e-06, + "loss": 2.3119, + "step": 1636 + }, + { + "epoch": 3.7, + "learning_rate": 1.4536313425677775e-06, + "loss": 2.4356, + "step": 1638 + }, + { + "epoch": 3.71, + "learning_rate": 1.452324695714437e-06, + "loss": 2.5462, + "step": 1640 + }, + { + "epoch": 3.71, + "learning_rate": 1.451017077226452e-06, + "loss": 2.4099, + "step": 1642 + }, + { + "epoch": 3.72, + "learning_rate": 1.4497084899127056e-06, + "loss": 2.4389, + "step": 1644 + }, + { + "epoch": 3.72, + "learning_rate": 1.4483989365841632e-06, + "loss": 2.7143, + "step": 1646 + }, + { + "epoch": 3.73, + "learning_rate": 1.447088420053864e-06, + "loss": 2.5341, + "step": 1648 + }, + { + "epoch": 3.73, + "learning_rate": 1.4457769431369173e-06, + "loss": 2.6033, + "step": 1650 + }, + { + "epoch": 3.74, + "learning_rate": 1.4444645086504947e-06, + "loss": 2.2592, + "step": 1652 + }, + { + "epoch": 3.74, + "learning_rate": 1.443151119413825e-06, + "loss": 2.3131, + "step": 1654 + }, + { + "epoch": 3.75, + "learning_rate": 1.4418367782481882e-06, + "loss": 2.5731, + "step": 1656 + }, + { + "epoch": 3.75, + "learning_rate": 1.4405214879769084e-06, + "loss": 2.6759, + "step": 1658 + }, + { + "epoch": 3.75, + "learning_rate": 1.4392052514253497e-06, + "loss": 2.5662, + "step": 1660 + }, + { + "epoch": 3.76, + "learning_rate": 1.437888071420907e-06, + "loss": 2.365, + "step": 1662 + }, + { + "epoch": 3.76, + "learning_rate": 1.4365699507930039e-06, + "loss": 2.3041, + "step": 1664 + }, + { + "epoch": 3.77, + "learning_rate": 1.4352508923730829e-06, + "loss": 2.2466, + "step": 1666 + }, + { + "epoch": 3.77, + "learning_rate": 1.433930898994602e-06, + "loss": 2.6212, + "step": 1668 + }, + { + "epoch": 3.78, + "learning_rate": 1.432609973493027e-06, + "loss": 2.3997, + "step": 1670 + }, + { + "epoch": 3.78, + "learning_rate": 1.431288118705826e-06, + "loss": 2.4061, + "step": 1672 + }, + { + "epoch": 3.79, + "learning_rate": 1.4299653374724642e-06, + "loss": 2.6446, + "step": 1674 + }, + { + "epoch": 3.79, + "learning_rate": 1.4286416326343957e-06, + "loss": 2.3368, + "step": 1676 + }, + { + "epoch": 3.8, + "learning_rate": 1.4273170070350587e-06, + "loss": 2.2844, + "step": 1678 + }, + { + "epoch": 3.8, + "learning_rate": 1.4259914635198701e-06, + "loss": 2.3913, + "step": 1680 + }, + { + "epoch": 3.8, + "learning_rate": 1.4246650049362185e-06, + "loss": 2.271, + "step": 1682 + }, + { + "epoch": 3.81, + "learning_rate": 1.4233376341334567e-06, + "loss": 2.2982, + "step": 1684 + }, + { + "epoch": 3.81, + "learning_rate": 1.422009353962899e-06, + "loss": 2.1357, + "step": 1686 + }, + { + "epoch": 3.82, + "learning_rate": 1.4206801672778117e-06, + "loss": 2.1893, + "step": 1688 + }, + { + "epoch": 3.82, + "learning_rate": 1.4193500769334088e-06, + "loss": 2.5499, + "step": 1690 + }, + { + "epoch": 3.83, + "learning_rate": 1.4180190857868459e-06, + "loss": 2.4208, + "step": 1692 + }, + { + "epoch": 3.83, + "learning_rate": 1.4166871966972132e-06, + "loss": 2.5682, + "step": 1694 + }, + { + "epoch": 3.84, + "learning_rate": 1.4153544125255291e-06, + "loss": 2.5289, + "step": 1696 + }, + { + "epoch": 3.84, + "learning_rate": 1.4140207361347364e-06, + "loss": 2.622, + "step": 1698 + }, + { + "epoch": 3.85, + "learning_rate": 1.4126861703896926e-06, + "loss": 2.3856, + "step": 1700 + }, + { + "epoch": 3.85, + "learning_rate": 1.4113507181571669e-06, + "loss": 2.3768, + "step": 1702 + }, + { + "epoch": 3.85, + "learning_rate": 1.4100143823058324e-06, + "loss": 2.3536, + "step": 1704 + }, + { + "epoch": 3.86, + "learning_rate": 1.4086771657062597e-06, + "loss": 2.4242, + "step": 1706 + }, + { + "epoch": 3.86, + "learning_rate": 1.4073390712309126e-06, + "loss": 2.5667, + "step": 1708 + }, + { + "epoch": 3.87, + "learning_rate": 1.4060001017541395e-06, + "loss": 2.2116, + "step": 1710 + }, + { + "epoch": 3.87, + "learning_rate": 1.4046602601521688e-06, + "loss": 2.3444, + "step": 1712 + }, + { + "epoch": 3.88, + "learning_rate": 1.4033195493031021e-06, + "loss": 2.4935, + "step": 1714 + }, + { + "epoch": 3.88, + "learning_rate": 1.4019779720869088e-06, + "loss": 2.4833, + "step": 1716 + }, + { + "epoch": 3.89, + "learning_rate": 1.4006355313854189e-06, + "loss": 2.7645, + "step": 1718 + }, + { + "epoch": 3.89, + "learning_rate": 1.3992922300823166e-06, + "loss": 2.5373, + "step": 1720 + }, + { + "epoch": 3.89, + "learning_rate": 1.3979480710631366e-06, + "loss": 2.3545, + "step": 1722 + }, + { + "epoch": 3.9, + "learning_rate": 1.3966030572152542e-06, + "loss": 2.5239, + "step": 1724 + }, + { + "epoch": 3.9, + "learning_rate": 1.395257191427882e-06, + "loss": 2.5872, + "step": 1726 + }, + { + "epoch": 3.91, + "learning_rate": 1.3939104765920616e-06, + "loss": 2.3978, + "step": 1728 + }, + { + "epoch": 3.91, + "learning_rate": 1.39256291560066e-06, + "loss": 2.5142, + "step": 1730 + }, + { + "epoch": 3.92, + "learning_rate": 1.3912145113483606e-06, + "loss": 2.7417, + "step": 1732 + }, + { + "epoch": 3.92, + "learning_rate": 1.3898652667316582e-06, + "loss": 2.3602, + "step": 1734 + }, + { + "epoch": 3.93, + "learning_rate": 1.3885151846488536e-06, + "loss": 2.3813, + "step": 1736 + }, + { + "epoch": 3.93, + "learning_rate": 1.387164268000046e-06, + "loss": 2.5534, + "step": 1738 + }, + { + "epoch": 3.94, + "learning_rate": 1.385812519687127e-06, + "loss": 2.5258, + "step": 1740 + }, + { + "epoch": 3.94, + "learning_rate": 1.3844599426137755e-06, + "loss": 2.6035, + "step": 1742 + }, + { + "epoch": 3.94, + "learning_rate": 1.3831065396854507e-06, + "loss": 2.5751, + "step": 1744 + }, + { + "epoch": 3.95, + "learning_rate": 1.3817523138093842e-06, + "loss": 2.5558, + "step": 1746 + }, + { + "epoch": 3.95, + "learning_rate": 1.3803972678945774e-06, + "loss": 2.0873, + "step": 1748 + }, + { + "epoch": 3.96, + "learning_rate": 1.379041404851792e-06, + "loss": 2.1898, + "step": 1750 + }, + { + "epoch": 3.96, + "learning_rate": 1.3776847275935456e-06, + "loss": 2.5692, + "step": 1752 + }, + { + "epoch": 3.97, + "learning_rate": 1.3763272390341042e-06, + "loss": 2.4294, + "step": 1754 + }, + { + "epoch": 3.97, + "learning_rate": 1.374968942089477e-06, + "loss": 2.429, + "step": 1756 + }, + { + "epoch": 3.98, + "learning_rate": 1.3736098396774094e-06, + "loss": 2.4356, + "step": 1758 + }, + { + "epoch": 3.98, + "learning_rate": 1.3722499347173772e-06, + "loss": 2.2805, + "step": 1760 + }, + { + "epoch": 3.99, + "learning_rate": 1.3708892301305807e-06, + "loss": 2.4228, + "step": 1762 + }, + { + "epoch": 3.99, + "learning_rate": 1.3695277288399365e-06, + "loss": 2.2363, + "step": 1764 + }, + { + "epoch": 3.99, + "learning_rate": 1.3681654337700736e-06, + "loss": 2.426, + "step": 1766 + }, + { + "epoch": 4.0, + "learning_rate": 1.3668023478473264e-06, + "loss": 2.7234, + "step": 1768 + }, + { + "epoch": 4.0, + "learning_rate": 1.365438473999727e-06, + "loss": 2.5752, + "step": 1770 + }, + { + "epoch": 4.01, + "learning_rate": 1.3640738151570005e-06, + "loss": 2.4224, + "step": 1772 + }, + { + "epoch": 4.01, + "learning_rate": 1.362708374250559e-06, + "loss": 2.4265, + "step": 1774 + }, + { + "epoch": 4.02, + "learning_rate": 1.3613421542134936e-06, + "loss": 2.4323, + "step": 1776 + }, + { + "epoch": 4.02, + "learning_rate": 1.3599751579805697e-06, + "loss": 2.2774, + "step": 1778 + }, + { + "epoch": 4.03, + "learning_rate": 1.358607388488219e-06, + "loss": 2.5681, + "step": 1780 + }, + { + "epoch": 4.03, + "learning_rate": 1.357238848674536e-06, + "loss": 2.2788, + "step": 1782 + }, + { + "epoch": 4.04, + "learning_rate": 1.3558695414792677e-06, + "loss": 2.5053, + "step": 1784 + }, + { + "epoch": 4.04, + "learning_rate": 1.3544994698438115e-06, + "loss": 2.4669, + "step": 1786 + }, + { + "epoch": 4.04, + "learning_rate": 1.3531286367112058e-06, + "loss": 2.2683, + "step": 1788 + }, + { + "epoch": 4.05, + "learning_rate": 1.351757045026125e-06, + "loss": 2.2351, + "step": 1790 + }, + { + "epoch": 4.05, + "learning_rate": 1.350384697734873e-06, + "loss": 2.3426, + "step": 1792 + }, + { + "epoch": 4.06, + "learning_rate": 1.3490115977853772e-06, + "loss": 2.3011, + "step": 1794 + }, + { + "epoch": 4.06, + "learning_rate": 1.34763774812718e-06, + "loss": 2.3215, + "step": 1796 + }, + { + "epoch": 4.07, + "learning_rate": 1.3462631517114374e-06, + "loss": 2.4746, + "step": 1798 + }, + { + "epoch": 4.07, + "learning_rate": 1.3448878114909064e-06, + "loss": 2.4723, + "step": 1800 + }, + { + "epoch": 4.08, + "learning_rate": 1.3435117304199434e-06, + "loss": 2.5015, + "step": 1802 + }, + { + "epoch": 4.08, + "learning_rate": 1.3421349114544962e-06, + "loss": 2.3998, + "step": 1804 + }, + { + "epoch": 4.08, + "learning_rate": 1.3407573575520973e-06, + "loss": 2.2199, + "step": 1806 + }, + { + "epoch": 4.09, + "learning_rate": 1.3393790716718577e-06, + "loss": 2.1974, + "step": 1808 + }, + { + "epoch": 4.09, + "learning_rate": 1.3380000567744608e-06, + "loss": 2.4168, + "step": 1810 + }, + { + "epoch": 4.1, + "learning_rate": 1.3366203158221566e-06, + "loss": 2.4152, + "step": 1812 + }, + { + "epoch": 4.1, + "learning_rate": 1.335239851778754e-06, + "loss": 2.4792, + "step": 1814 + }, + { + "epoch": 4.11, + "learning_rate": 1.3338586676096157e-06, + "loss": 2.267, + "step": 1816 + }, + { + "epoch": 4.11, + "learning_rate": 1.3324767662816513e-06, + "loss": 2.2245, + "step": 1818 + }, + { + "epoch": 4.12, + "learning_rate": 1.3310941507633107e-06, + "loss": 2.1719, + "step": 1820 + }, + { + "epoch": 4.12, + "learning_rate": 1.3297108240245775e-06, + "loss": 2.5786, + "step": 1822 + }, + { + "epoch": 4.13, + "learning_rate": 1.3283267890369638e-06, + "loss": 2.4891, + "step": 1824 + }, + { + "epoch": 4.13, + "learning_rate": 1.326942048773503e-06, + "loss": 2.232, + "step": 1826 + }, + { + "epoch": 4.13, + "learning_rate": 1.3255566062087428e-06, + "loss": 2.2262, + "step": 1828 + }, + { + "epoch": 4.14, + "learning_rate": 1.3241704643187408e-06, + "loss": 2.1527, + "step": 1830 + }, + { + "epoch": 4.14, + "learning_rate": 1.3227836260810555e-06, + "loss": 2.5635, + "step": 1832 + }, + { + "epoch": 4.15, + "learning_rate": 1.3213960944747416e-06, + "loss": 2.6371, + "step": 1834 + }, + { + "epoch": 4.15, + "learning_rate": 1.3200078724803437e-06, + "loss": 2.1861, + "step": 1836 + }, + { + "epoch": 4.16, + "learning_rate": 1.3186189630798892e-06, + "loss": 2.2795, + "step": 1838 + }, + { + "epoch": 4.16, + "learning_rate": 1.3172293692568816e-06, + "loss": 2.363, + "step": 1840 + }, + { + "epoch": 4.17, + "learning_rate": 1.3158390939962952e-06, + "loss": 2.8042, + "step": 1842 + }, + { + "epoch": 4.17, + "learning_rate": 1.3144481402845677e-06, + "loss": 2.4579, + "step": 1844 + }, + { + "epoch": 4.18, + "learning_rate": 1.3130565111095947e-06, + "loss": 2.486, + "step": 1846 + }, + { + "epoch": 4.18, + "learning_rate": 1.3116642094607221e-06, + "loss": 2.3256, + "step": 1848 + }, + { + "epoch": 4.18, + "learning_rate": 1.3102712383287411e-06, + "loss": 2.5683, + "step": 1850 + }, + { + "epoch": 4.19, + "learning_rate": 1.30887760070588e-06, + "loss": 2.3407, + "step": 1852 + }, + { + "epoch": 4.19, + "learning_rate": 1.3074832995857996e-06, + "loss": 2.2962, + "step": 1854 + }, + { + "epoch": 4.2, + "learning_rate": 1.3060883379635855e-06, + "loss": 2.7058, + "step": 1856 + }, + { + "epoch": 4.2, + "learning_rate": 1.304692718835743e-06, + "loss": 2.3485, + "step": 1858 + }, + { + "epoch": 4.21, + "learning_rate": 1.3032964452001886e-06, + "loss": 2.6635, + "step": 1860 + }, + { + "epoch": 4.21, + "learning_rate": 1.301899520056245e-06, + "loss": 2.1123, + "step": 1862 + }, + { + "epoch": 4.22, + "learning_rate": 1.3005019464046352e-06, + "loss": 2.4141, + "step": 1864 + }, + { + "epoch": 4.22, + "learning_rate": 1.2991037272474743e-06, + "loss": 2.5197, + "step": 1866 + }, + { + "epoch": 4.23, + "learning_rate": 1.2977048655882644e-06, + "loss": 2.2944, + "step": 1868 + }, + { + "epoch": 4.23, + "learning_rate": 1.296305364431888e-06, + "loss": 2.6415, + "step": 1870 + }, + { + "epoch": 4.23, + "learning_rate": 1.294905226784601e-06, + "loss": 2.3808, + "step": 1872 + }, + { + "epoch": 4.24, + "learning_rate": 1.2935044556540265e-06, + "loss": 2.4877, + "step": 1874 + }, + { + "epoch": 4.24, + "learning_rate": 1.2921030540491482e-06, + "loss": 2.6193, + "step": 1876 + }, + { + "epoch": 4.25, + "learning_rate": 1.2907010249803052e-06, + "loss": 2.3594, + "step": 1878 + }, + { + "epoch": 4.25, + "learning_rate": 1.2892983714591825e-06, + "loss": 2.17, + "step": 1880 + }, + { + "epoch": 4.26, + "learning_rate": 1.2878950964988086e-06, + "loss": 2.4788, + "step": 1882 + }, + { + "epoch": 4.26, + "learning_rate": 1.2864912031135455e-06, + "loss": 2.4027, + "step": 1884 + }, + { + "epoch": 4.27, + "learning_rate": 1.2850866943190843e-06, + "loss": 2.4353, + "step": 1886 + }, + { + "epoch": 4.27, + "learning_rate": 1.2836815731324375e-06, + "loss": 2.6185, + "step": 1888 + }, + { + "epoch": 4.27, + "learning_rate": 1.2822758425719336e-06, + "loss": 2.2183, + "step": 1890 + }, + { + "epoch": 4.28, + "learning_rate": 1.2808695056572098e-06, + "loss": 2.3426, + "step": 1892 + }, + { + "epoch": 4.28, + "learning_rate": 1.2794625654092057e-06, + "loss": 2.2975, + "step": 1894 + }, + { + "epoch": 4.29, + "learning_rate": 1.2780550248501577e-06, + "loss": 2.5333, + "step": 1896 + }, + { + "epoch": 4.29, + "learning_rate": 1.2766468870035905e-06, + "loss": 2.3948, + "step": 1898 + }, + { + "epoch": 4.3, + "learning_rate": 1.2752381548943122e-06, + "loss": 2.161, + "step": 1900 + }, + { + "epoch": 4.3, + "learning_rate": 1.2738288315484088e-06, + "loss": 2.7284, + "step": 1902 + }, + { + "epoch": 4.31, + "learning_rate": 1.272418919993234e-06, + "loss": 2.2664, + "step": 1904 + }, + { + "epoch": 4.31, + "learning_rate": 1.2710084232574073e-06, + "loss": 2.4391, + "step": 1906 + }, + { + "epoch": 4.32, + "learning_rate": 1.2695973443708035e-06, + "loss": 2.5834, + "step": 1908 + }, + { + "epoch": 4.32, + "learning_rate": 1.2681856863645485e-06, + "loss": 2.4076, + "step": 1910 + }, + { + "epoch": 4.32, + "learning_rate": 1.2667734522710128e-06, + "loss": 2.4514, + "step": 1912 + }, + { + "epoch": 4.33, + "learning_rate": 1.2653606451238035e-06, + "loss": 2.2832, + "step": 1914 + }, + { + "epoch": 4.33, + "learning_rate": 1.263947267957759e-06, + "loss": 2.4629, + "step": 1916 + }, + { + "epoch": 4.34, + "learning_rate": 1.2625333238089421e-06, + "loss": 2.5803, + "step": 1918 + }, + { + "epoch": 4.34, + "learning_rate": 1.2611188157146341e-06, + "loss": 2.2946, + "step": 1920 + }, + { + "epoch": 4.35, + "learning_rate": 1.2597037467133267e-06, + "loss": 2.4157, + "step": 1922 + }, + { + "epoch": 4.35, + "learning_rate": 1.2582881198447172e-06, + "loss": 2.4456, + "step": 1924 + }, + { + "epoch": 4.36, + "learning_rate": 1.2568719381497013e-06, + "loss": 1.9894, + "step": 1926 + }, + { + "epoch": 4.36, + "learning_rate": 1.2554552046703658e-06, + "loss": 2.2723, + "step": 1928 + }, + { + "epoch": 4.37, + "learning_rate": 1.2540379224499838e-06, + "loss": 2.5817, + "step": 1930 + }, + { + "epoch": 4.37, + "learning_rate": 1.2526200945330056e-06, + "loss": 2.2494, + "step": 1932 + }, + { + "epoch": 4.37, + "learning_rate": 1.251201723965056e-06, + "loss": 2.4479, + "step": 1934 + }, + { + "epoch": 4.38, + "learning_rate": 1.2497828137929233e-06, + "loss": 2.1995, + "step": 1936 + }, + { + "epoch": 4.38, + "learning_rate": 1.2483633670645562e-06, + "loss": 2.2064, + "step": 1938 + }, + { + "epoch": 4.39, + "learning_rate": 1.2469433868290552e-06, + "loss": 2.1307, + "step": 1940 + }, + { + "epoch": 4.39, + "learning_rate": 1.2455228761366674e-06, + "loss": 2.3635, + "step": 1942 + }, + { + "epoch": 4.4, + "learning_rate": 1.244101838038779e-06, + "loss": 2.2071, + "step": 1944 + }, + { + "epoch": 4.4, + "learning_rate": 1.2426802755879096e-06, + "loss": 2.4216, + "step": 1946 + }, + { + "epoch": 4.41, + "learning_rate": 1.2412581918377045e-06, + "loss": 2.4252, + "step": 1948 + }, + { + "epoch": 4.41, + "learning_rate": 1.2398355898429295e-06, + "loss": 2.7158, + "step": 1950 + }, + { + "epoch": 4.42, + "learning_rate": 1.2384124726594626e-06, + "loss": 2.1449, + "step": 1952 + }, + { + "epoch": 4.42, + "learning_rate": 1.23698884334429e-06, + "loss": 2.2603, + "step": 1954 + }, + { + "epoch": 4.42, + "learning_rate": 1.235564704955496e-06, + "loss": 2.5274, + "step": 1956 + }, + { + "epoch": 4.43, + "learning_rate": 1.2341400605522604e-06, + "loss": 2.4358, + "step": 1958 + }, + { + "epoch": 4.43, + "learning_rate": 1.2327149131948492e-06, + "loss": 2.3367, + "step": 1960 + }, + { + "epoch": 4.44, + "learning_rate": 1.2312892659446083e-06, + "loss": 2.2516, + "step": 1962 + }, + { + "epoch": 4.44, + "learning_rate": 1.2298631218639584e-06, + "loss": 2.5467, + "step": 1964 + }, + { + "epoch": 4.45, + "learning_rate": 1.228436484016386e-06, + "loss": 2.6495, + "step": 1966 + }, + { + "epoch": 4.45, + "learning_rate": 1.22700935546644e-06, + "loss": 2.3837, + "step": 1968 + }, + { + "epoch": 4.46, + "learning_rate": 1.225581739279722e-06, + "loss": 2.4825, + "step": 1970 + }, + { + "epoch": 4.46, + "learning_rate": 1.224153638522882e-06, + "loss": 2.3524, + "step": 1972 + }, + { + "epoch": 4.46, + "learning_rate": 1.2227250562636098e-06, + "loss": 2.6791, + "step": 1974 + }, + { + "epoch": 4.47, + "learning_rate": 1.2212959955706309e-06, + "loss": 2.3547, + "step": 1976 + }, + { + "epoch": 4.47, + "learning_rate": 1.2198664595136977e-06, + "loss": 2.4261, + "step": 1978 + }, + { + "epoch": 4.48, + "learning_rate": 1.2184364511635835e-06, + "loss": 2.2508, + "step": 1980 + }, + { + "epoch": 4.48, + "learning_rate": 1.2170059735920768e-06, + "loss": 2.3813, + "step": 1982 + }, + { + "epoch": 4.49, + "learning_rate": 1.2155750298719736e-06, + "loss": 2.4343, + "step": 1984 + }, + { + "epoch": 4.49, + "learning_rate": 1.2141436230770716e-06, + "loss": 2.2746, + "step": 1986 + }, + { + "epoch": 4.5, + "learning_rate": 1.2127117562821627e-06, + "loss": 2.343, + "step": 1988 + }, + { + "epoch": 4.5, + "learning_rate": 1.2112794325630273e-06, + "loss": 2.436, + "step": 1990 + }, + { + "epoch": 4.51, + "learning_rate": 1.2098466549964268e-06, + "loss": 2.3389, + "step": 1992 + }, + { + "epoch": 4.51, + "learning_rate": 1.2084134266600986e-06, + "loss": 2.7032, + "step": 1994 + }, + { + "epoch": 4.51, + "learning_rate": 1.2069797506327474e-06, + "loss": 2.7591, + "step": 1996 + }, + { + "epoch": 4.52, + "learning_rate": 1.2055456299940397e-06, + "loss": 2.3661, + "step": 1998 + }, + { + "epoch": 4.52, + "learning_rate": 1.2041110678245974e-06, + "loss": 2.2633, + "step": 2000 + }, + { + "epoch": 4.53, + "learning_rate": 1.2026760672059908e-06, + "loss": 2.5338, + "step": 2002 + }, + { + "epoch": 4.53, + "learning_rate": 1.2012406312207316e-06, + "loss": 2.5867, + "step": 2004 + }, + { + "epoch": 4.54, + "learning_rate": 1.199804762952268e-06, + "loss": 2.4737, + "step": 2006 + }, + { + "epoch": 4.54, + "learning_rate": 1.1983684654849741e-06, + "loss": 2.3638, + "step": 2008 + }, + { + "epoch": 4.55, + "learning_rate": 1.1969317419041494e-06, + "loss": 2.2326, + "step": 2010 + }, + { + "epoch": 4.55, + "learning_rate": 1.1954945952960058e-06, + "loss": 2.3512, + "step": 2012 + }, + { + "epoch": 4.56, + "learning_rate": 1.1940570287476661e-06, + "loss": 2.2958, + "step": 2014 + }, + { + "epoch": 4.56, + "learning_rate": 1.1926190453471537e-06, + "loss": 2.1707, + "step": 2016 + }, + { + "epoch": 4.56, + "learning_rate": 1.1911806481833875e-06, + "loss": 2.2141, + "step": 2018 + }, + { + "epoch": 4.57, + "learning_rate": 1.1897418403461762e-06, + "loss": 2.3797, + "step": 2020 + }, + { + "epoch": 4.57, + "learning_rate": 1.1883026249262099e-06, + "loss": 2.4088, + "step": 2022 + }, + { + "epoch": 4.58, + "learning_rate": 1.1868630050150543e-06, + "loss": 2.3545, + "step": 2024 + }, + { + "epoch": 4.58, + "learning_rate": 1.1854229837051443e-06, + "loss": 2.4329, + "step": 2026 + }, + { + "epoch": 4.59, + "learning_rate": 1.1839825640897767e-06, + "loss": 2.3957, + "step": 2028 + }, + { + "epoch": 4.59, + "learning_rate": 1.1825417492631041e-06, + "loss": 2.2269, + "step": 2030 + }, + { + "epoch": 4.6, + "learning_rate": 1.1811005423201281e-06, + "loss": 2.3754, + "step": 2032 + }, + { + "epoch": 4.6, + "learning_rate": 1.1796589463566922e-06, + "loss": 2.5927, + "step": 2034 + }, + { + "epoch": 4.61, + "learning_rate": 1.1782169644694758e-06, + "loss": 2.5426, + "step": 2036 + }, + { + "epoch": 4.61, + "learning_rate": 1.1767745997559877e-06, + "loss": 2.5174, + "step": 2038 + }, + { + "epoch": 4.61, + "learning_rate": 1.1753318553145586e-06, + "loss": 2.3862, + "step": 2040 + }, + { + "epoch": 4.62, + "learning_rate": 1.1738887342443344e-06, + "loss": 2.2668, + "step": 2042 + }, + { + "epoch": 4.62, + "learning_rate": 1.1724452396452713e-06, + "loss": 2.1973, + "step": 2044 + }, + { + "epoch": 4.63, + "learning_rate": 1.171001374618127e-06, + "loss": 2.6375, + "step": 2046 + }, + { + "epoch": 4.63, + "learning_rate": 1.1695571422644549e-06, + "loss": 2.5391, + "step": 2048 + }, + { + "epoch": 4.64, + "learning_rate": 1.1681125456865975e-06, + "loss": 2.7426, + "step": 2050 + }, + { + "epoch": 4.64, + "learning_rate": 1.1666675879876805e-06, + "loss": 2.4048, + "step": 2052 + }, + { + "epoch": 4.65, + "learning_rate": 1.1652222722716039e-06, + "loss": 2.3801, + "step": 2054 + }, + { + "epoch": 4.65, + "learning_rate": 1.1637766016430379e-06, + "loss": 2.3919, + "step": 2056 + }, + { + "epoch": 4.65, + "learning_rate": 1.1623305792074146e-06, + "loss": 2.5378, + "step": 2058 + }, + { + "epoch": 4.66, + "learning_rate": 1.1608842080709217e-06, + "loss": 2.3905, + "step": 2060 + }, + { + "epoch": 4.66, + "learning_rate": 1.1594374913404965e-06, + "loss": 2.2424, + "step": 2062 + }, + { + "epoch": 4.67, + "learning_rate": 1.1579904321238182e-06, + "loss": 2.4612, + "step": 2064 + }, + { + "epoch": 4.67, + "learning_rate": 1.1565430335293016e-06, + "loss": 2.238, + "step": 2066 + }, + { + "epoch": 4.68, + "learning_rate": 1.1550952986660909e-06, + "loss": 2.8252, + "step": 2068 + }, + { + "epoch": 4.68, + "learning_rate": 1.1536472306440526e-06, + "loss": 2.5775, + "step": 2070 + }, + { + "epoch": 4.69, + "learning_rate": 1.1521988325737684e-06, + "loss": 2.4405, + "step": 2072 + }, + { + "epoch": 4.69, + "learning_rate": 1.150750107566529e-06, + "loss": 2.5779, + "step": 2074 + }, + { + "epoch": 4.7, + "learning_rate": 1.1493010587343284e-06, + "loss": 2.303, + "step": 2076 + }, + { + "epoch": 4.7, + "learning_rate": 1.1478516891898548e-06, + "loss": 2.382, + "step": 2078 + }, + { + "epoch": 4.7, + "learning_rate": 1.1464020020464864e-06, + "loss": 2.4109, + "step": 2080 + }, + { + "epoch": 4.71, + "learning_rate": 1.1449520004182833e-06, + "loss": 2.4196, + "step": 2082 + }, + { + "epoch": 4.71, + "learning_rate": 1.1435016874199806e-06, + "loss": 2.3244, + "step": 2084 + }, + { + "epoch": 4.72, + "learning_rate": 1.1420510661669834e-06, + "loss": 2.5645, + "step": 2086 + }, + { + "epoch": 4.72, + "learning_rate": 1.1406001397753576e-06, + "loss": 2.1936, + "step": 2088 + }, + { + "epoch": 4.73, + "learning_rate": 1.1391489113618255e-06, + "loss": 2.4046, + "step": 2090 + }, + { + "epoch": 4.73, + "learning_rate": 1.1376973840437581e-06, + "loss": 2.3622, + "step": 2092 + }, + { + "epoch": 4.74, + "learning_rate": 1.1362455609391678e-06, + "loss": 2.2028, + "step": 2094 + }, + { + "epoch": 4.74, + "learning_rate": 1.1347934451667037e-06, + "loss": 2.1606, + "step": 2096 + }, + { + "epoch": 4.75, + "learning_rate": 1.1333410398456418e-06, + "loss": 2.3497, + "step": 2098 + }, + { + "epoch": 4.75, + "learning_rate": 1.1318883480958816e-06, + "loss": 2.3943, + "step": 2100 + }, + { + "epoch": 4.75, + "learning_rate": 1.130435373037937e-06, + "loss": 2.3429, + "step": 2102 + }, + { + "epoch": 4.76, + "learning_rate": 1.128982117792931e-06, + "loss": 2.1936, + "step": 2104 + }, + { + "epoch": 4.76, + "learning_rate": 1.1275285854825882e-06, + "loss": 2.2376, + "step": 2106 + }, + { + "epoch": 4.77, + "learning_rate": 1.1260747792292283e-06, + "loss": 2.5502, + "step": 2108 + }, + { + "epoch": 4.77, + "learning_rate": 1.1246207021557595e-06, + "loss": 2.3205, + "step": 2110 + }, + { + "epoch": 4.78, + "learning_rate": 1.1231663573856716e-06, + "loss": 2.3361, + "step": 2112 + }, + { + "epoch": 4.78, + "learning_rate": 1.1217117480430302e-06, + "loss": 2.3491, + "step": 2114 + }, + { + "epoch": 4.79, + "learning_rate": 1.1202568772524684e-06, + "loss": 2.3551, + "step": 2116 + }, + { + "epoch": 4.79, + "learning_rate": 1.118801748139181e-06, + "loss": 2.4922, + "step": 2118 + }, + { + "epoch": 4.8, + "learning_rate": 1.117346363828918e-06, + "loss": 2.3158, + "step": 2120 + }, + { + "epoch": 4.8, + "learning_rate": 1.1158907274479772e-06, + "loss": 2.4769, + "step": 2122 + }, + { + "epoch": 4.8, + "learning_rate": 1.1144348421231987e-06, + "loss": 2.4177, + "step": 2124 + }, + { + "epoch": 4.81, + "learning_rate": 1.1129787109819562e-06, + "loss": 2.2472, + "step": 2126 + }, + { + "epoch": 4.81, + "learning_rate": 1.1115223371521524e-06, + "loss": 2.3932, + "step": 2128 + }, + { + "epoch": 4.82, + "learning_rate": 1.1100657237622109e-06, + "loss": 2.4636, + "step": 2130 + }, + { + "epoch": 4.82, + "learning_rate": 1.1086088739410697e-06, + "loss": 2.1997, + "step": 2132 + }, + { + "epoch": 4.83, + "learning_rate": 1.1071517908181752e-06, + "loss": 2.6186, + "step": 2134 + }, + { + "epoch": 4.83, + "learning_rate": 1.105694477523474e-06, + "loss": 2.3176, + "step": 2136 + }, + { + "epoch": 4.84, + "learning_rate": 1.1042369371874088e-06, + "loss": 2.4185, + "step": 2138 + }, + { + "epoch": 4.84, + "learning_rate": 1.1027791729409084e-06, + "loss": 2.6309, + "step": 2140 + }, + { + "epoch": 4.84, + "learning_rate": 1.1013211879153832e-06, + "loss": 2.4281, + "step": 2142 + }, + { + "epoch": 4.85, + "learning_rate": 1.0998629852427179e-06, + "loss": 2.0809, + "step": 2144 + }, + { + "epoch": 4.85, + "learning_rate": 1.0984045680552647e-06, + "loss": 2.3214, + "step": 2146 + }, + { + "epoch": 4.86, + "learning_rate": 1.0969459394858364e-06, + "loss": 2.3554, + "step": 2148 + }, + { + "epoch": 4.86, + "learning_rate": 1.0954871026677001e-06, + "loss": 2.3334, + "step": 2150 + }, + { + "epoch": 4.87, + "learning_rate": 1.0940280607345704e-06, + "loss": 2.6128, + "step": 2152 + }, + { + "epoch": 4.87, + "learning_rate": 1.0925688168206018e-06, + "loss": 2.3866, + "step": 2154 + }, + { + "epoch": 4.88, + "learning_rate": 1.0911093740603836e-06, + "loss": 2.2807, + "step": 2156 + }, + { + "epoch": 4.88, + "learning_rate": 1.0896497355889316e-06, + "loss": 2.2577, + "step": 2158 + }, + { + "epoch": 4.89, + "learning_rate": 1.0881899045416818e-06, + "loss": 2.3046, + "step": 2160 + }, + { + "epoch": 4.89, + "learning_rate": 1.0867298840544849e-06, + "loss": 2.5557, + "step": 2162 + }, + { + "epoch": 4.89, + "learning_rate": 1.085269677263597e-06, + "loss": 2.4749, + "step": 2164 + }, + { + "epoch": 4.9, + "learning_rate": 1.0838092873056768e-06, + "loss": 2.4613, + "step": 2166 + }, + { + "epoch": 4.9, + "learning_rate": 1.0823487173177735e-06, + "loss": 2.4206, + "step": 2168 + }, + { + "epoch": 4.91, + "learning_rate": 1.0808879704373249e-06, + "loss": 2.5469, + "step": 2170 + }, + { + "epoch": 4.91, + "learning_rate": 1.0794270498021486e-06, + "loss": 2.7428, + "step": 2172 + }, + { + "epoch": 4.92, + "learning_rate": 1.077965958550435e-06, + "loss": 2.1805, + "step": 2174 + }, + { + "epoch": 4.92, + "learning_rate": 1.076504699820741e-06, + "loss": 2.4371, + "step": 2176 + }, + { + "epoch": 4.93, + "learning_rate": 1.0750432767519837e-06, + "loss": 2.5389, + "step": 2178 + }, + { + "epoch": 4.93, + "learning_rate": 1.073581692483433e-06, + "loss": 2.3166, + "step": 2180 + }, + { + "epoch": 4.94, + "learning_rate": 1.072119950154705e-06, + "loss": 2.4784, + "step": 2182 + }, + { + "epoch": 4.94, + "learning_rate": 1.0706580529057553e-06, + "loss": 2.2944, + "step": 2184 + }, + { + "epoch": 4.94, + "learning_rate": 1.069196003876872e-06, + "loss": 2.4812, + "step": 2186 + }, + { + "epoch": 4.95, + "learning_rate": 1.06773380620867e-06, + "loss": 2.2998, + "step": 2188 + }, + { + "epoch": 4.95, + "learning_rate": 1.0662714630420833e-06, + "loss": 2.1607, + "step": 2190 + }, + { + "epoch": 4.96, + "learning_rate": 1.0648089775183575e-06, + "loss": 2.5073, + "step": 2192 + }, + { + "epoch": 4.96, + "learning_rate": 1.0633463527790457e-06, + "loss": 2.7015, + "step": 2194 + }, + { + "epoch": 4.97, + "learning_rate": 1.0618835919659981e-06, + "loss": 2.2588, + "step": 2196 + }, + { + "epoch": 4.97, + "learning_rate": 1.0604206982213585e-06, + "loss": 2.5762, + "step": 2198 + }, + { + "epoch": 4.98, + "learning_rate": 1.0589576746875556e-06, + "loss": 2.3897, + "step": 2200 + }, + { + "epoch": 4.98, + "learning_rate": 1.0574945245072979e-06, + "loss": 2.4374, + "step": 2202 + }, + { + "epoch": 4.99, + "learning_rate": 1.0560312508235648e-06, + "loss": 2.1639, + "step": 2204 + }, + { + "epoch": 4.99, + "learning_rate": 1.0545678567796016e-06, + "loss": 2.4232, + "step": 2206 + }, + { + "epoch": 4.99, + "learning_rate": 1.053104345518912e-06, + "loss": 2.1925, + "step": 2208 + }, + { + "epoch": 5.0, + "learning_rate": 1.0516407201852513e-06, + "loss": 2.355, + "step": 2210 + }, + { + "epoch": 5.0, + "learning_rate": 1.0501769839226202e-06, + "loss": 2.6249, + "step": 2212 + }, + { + "epoch": 5.01, + "learning_rate": 1.0487131398752573e-06, + "loss": 2.3273, + "step": 2214 + }, + { + "epoch": 5.01, + "learning_rate": 1.0472491911876332e-06, + "loss": 2.2022, + "step": 2216 + }, + { + "epoch": 5.02, + "learning_rate": 1.045785141004443e-06, + "loss": 2.5215, + "step": 2218 + }, + { + "epoch": 5.02, + "learning_rate": 1.0443209924705999e-06, + "loss": 2.3913, + "step": 2220 + }, + { + "epoch": 5.03, + "learning_rate": 1.0428567487312279e-06, + "loss": 2.0402, + "step": 2222 + }, + { + "epoch": 5.03, + "learning_rate": 1.041392412931656e-06, + "loss": 2.2759, + "step": 2224 + }, + { + "epoch": 5.03, + "learning_rate": 1.0399279882174114e-06, + "loss": 2.3752, + "step": 2226 + }, + { + "epoch": 5.04, + "learning_rate": 1.0384634777342114e-06, + "loss": 2.4845, + "step": 2228 + }, + { + "epoch": 5.04, + "learning_rate": 1.0369988846279576e-06, + "loss": 2.2978, + "step": 2230 + }, + { + "epoch": 5.05, + "learning_rate": 1.03553421204473e-06, + "loss": 2.4405, + "step": 2232 + }, + { + "epoch": 5.05, + "learning_rate": 1.0340694631307786e-06, + "loss": 2.2374, + "step": 2234 + }, + { + "epoch": 5.06, + "learning_rate": 1.0326046410325166e-06, + "loss": 2.3413, + "step": 2236 + }, + { + "epoch": 5.06, + "learning_rate": 1.0311397488965166e-06, + "loss": 2.4933, + "step": 2238 + }, + { + "epoch": 5.07, + "learning_rate": 1.0296747898694994e-06, + "loss": 2.2753, + "step": 2240 + }, + { + "epoch": 5.07, + "learning_rate": 1.028209767098331e-06, + "loss": 2.5901, + "step": 2242 + }, + { + "epoch": 5.08, + "learning_rate": 1.0267446837300133e-06, + "loss": 2.5863, + "step": 2244 + }, + { + "epoch": 5.08, + "learning_rate": 1.0252795429116792e-06, + "loss": 2.432, + "step": 2246 + }, + { + "epoch": 5.08, + "learning_rate": 1.0238143477905842e-06, + "loss": 2.4135, + "step": 2248 + }, + { + "epoch": 5.09, + "learning_rate": 1.0223491015141012e-06, + "loss": 2.2242, + "step": 2250 + }, + { + "epoch": 5.09, + "learning_rate": 1.0208838072297129e-06, + "loss": 2.3453, + "step": 2252 + }, + { + "epoch": 5.1, + "learning_rate": 1.019418468085004e-06, + "loss": 2.3431, + "step": 2254 + }, + { + "epoch": 5.1, + "learning_rate": 1.0179530872276573e-06, + "loss": 2.1643, + "step": 2256 + }, + { + "epoch": 5.11, + "learning_rate": 1.016487667805444e-06, + "loss": 2.2139, + "step": 2258 + }, + { + "epoch": 5.11, + "learning_rate": 1.0150222129662182e-06, + "loss": 2.2468, + "step": 2260 + }, + { + "epoch": 5.12, + "learning_rate": 1.0135567258579111e-06, + "loss": 2.4914, + "step": 2262 + }, + { + "epoch": 5.12, + "learning_rate": 1.0120912096285222e-06, + "loss": 2.3727, + "step": 2264 + }, + { + "epoch": 5.13, + "learning_rate": 1.0106256674261136e-06, + "loss": 2.1403, + "step": 2266 + }, + { + "epoch": 5.13, + "learning_rate": 1.0091601023988031e-06, + "loss": 2.3905, + "step": 2268 + }, + { + "epoch": 5.13, + "learning_rate": 1.007694517694759e-06, + "loss": 2.1131, + "step": 2270 + }, + { + "epoch": 5.14, + "learning_rate": 1.00622891646219e-06, + "loss": 2.3395, + "step": 2272 + }, + { + "epoch": 5.14, + "learning_rate": 1.0047633018493408e-06, + "loss": 2.5324, + "step": 2274 + }, + { + "epoch": 5.15, + "learning_rate": 1.0032976770044854e-06, + "loss": 2.2844, + "step": 2276 + }, + { + "epoch": 5.15, + "learning_rate": 1.0018320450759195e-06, + "loss": 2.4777, + "step": 2278 + }, + { + "epoch": 5.16, + "learning_rate": 1.000366409211954e-06, + "loss": 2.4501, + "step": 2280 + }, + { + "epoch": 5.16, + "learning_rate": 9.989007725609083e-07, + "loss": 2.5747, + "step": 2282 + }, + { + "epoch": 5.17, + "learning_rate": 9.974351382711028e-07, + "loss": 2.5167, + "step": 2284 + }, + { + "epoch": 5.17, + "learning_rate": 9.95969509490854e-07, + "loss": 2.4251, + "step": 2286 + }, + { + "epoch": 5.18, + "learning_rate": 9.945038893684663e-07, + "loss": 2.4821, + "step": 2288 + }, + { + "epoch": 5.18, + "learning_rate": 9.930382810522246e-07, + "loss": 2.2526, + "step": 2290 + }, + { + "epoch": 5.18, + "learning_rate": 9.915726876903891e-07, + "loss": 2.3031, + "step": 2292 + }, + { + "epoch": 5.19, + "learning_rate": 9.901071124311883e-07, + "loss": 2.1421, + "step": 2294 + }, + { + "epoch": 5.19, + "learning_rate": 9.88641558422811e-07, + "loss": 2.6568, + "step": 2296 + }, + { + "epoch": 5.2, + "learning_rate": 9.871760288134006e-07, + "loss": 2.3728, + "step": 2298 + }, + { + "epoch": 5.2, + "learning_rate": 9.857105267510484e-07, + "loss": 2.4358, + "step": 2300 + }, + { + "epoch": 5.21, + "learning_rate": 9.842450553837865e-07, + "loss": 2.4469, + "step": 2302 + }, + { + "epoch": 5.21, + "learning_rate": 9.827796178595805e-07, + "loss": 2.2206, + "step": 2304 + }, + { + "epoch": 5.22, + "learning_rate": 9.813142173263233e-07, + "loss": 2.2388, + "step": 2306 + }, + { + "epoch": 5.22, + "learning_rate": 9.798488569318295e-07, + "loss": 2.5086, + "step": 2308 + }, + { + "epoch": 5.22, + "learning_rate": 9.78383539823827e-07, + "loss": 2.3137, + "step": 2310 + }, + { + "epoch": 5.23, + "learning_rate": 9.76918269149949e-07, + "loss": 2.0895, + "step": 2312 + }, + { + "epoch": 5.23, + "learning_rate": 9.754530480577314e-07, + "loss": 2.4254, + "step": 2314 + }, + { + "epoch": 5.24, + "learning_rate": 9.739878796946027e-07, + "loss": 2.4146, + "step": 2316 + }, + { + "epoch": 5.24, + "learning_rate": 9.725227672078772e-07, + "loss": 2.5433, + "step": 2318 + }, + { + "epoch": 5.25, + "learning_rate": 9.710577137447505e-07, + "loss": 2.4301, + "step": 2320 + }, + { + "epoch": 5.25, + "learning_rate": 9.695927224522907e-07, + "loss": 2.4225, + "step": 2322 + }, + { + "epoch": 5.26, + "learning_rate": 9.68127796477433e-07, + "loss": 2.3673, + "step": 2324 + }, + { + "epoch": 5.26, + "learning_rate": 9.66662938966971e-07, + "loss": 2.2955, + "step": 2326 + }, + { + "epoch": 5.27, + "learning_rate": 9.651981530675524e-07, + "loss": 2.2772, + "step": 2328 + }, + { + "epoch": 5.27, + "learning_rate": 9.637334419256713e-07, + "loss": 2.4733, + "step": 2330 + }, + { + "epoch": 5.27, + "learning_rate": 9.6226880868766e-07, + "loss": 2.8809, + "step": 2332 + }, + { + "epoch": 5.28, + "learning_rate": 9.60804256499684e-07, + "loss": 2.3127, + "step": 2334 + }, + { + "epoch": 5.28, + "learning_rate": 9.593397885077353e-07, + "loss": 2.6099, + "step": 2336 + }, + { + "epoch": 5.29, + "learning_rate": 9.57875407857624e-07, + "loss": 2.3925, + "step": 2338 + }, + { + "epoch": 5.29, + "learning_rate": 9.564111176949734e-07, + "loss": 2.4485, + "step": 2340 + }, + { + "epoch": 5.3, + "learning_rate": 9.549469211652123e-07, + "loss": 2.2315, + "step": 2342 + }, + { + "epoch": 5.3, + "learning_rate": 9.534828214135682e-07, + "loss": 2.2182, + "step": 2344 + }, + { + "epoch": 5.31, + "learning_rate": 9.520188215850602e-07, + "loss": 2.4335, + "step": 2346 + }, + { + "epoch": 5.31, + "learning_rate": 9.505549248244936e-07, + "loss": 2.5005, + "step": 2348 + }, + { + "epoch": 5.32, + "learning_rate": 9.490911342764525e-07, + "loss": 2.2757, + "step": 2350 + }, + { + "epoch": 5.32, + "learning_rate": 9.476274530852913e-07, + "loss": 2.3001, + "step": 2352 + }, + { + "epoch": 5.32, + "learning_rate": 9.46163884395131e-07, + "loss": 2.173, + "step": 2354 + }, + { + "epoch": 5.33, + "learning_rate": 9.447004313498507e-07, + "loss": 2.363, + "step": 2356 + }, + { + "epoch": 5.33, + "learning_rate": 9.432370970930809e-07, + "loss": 2.2835, + "step": 2358 + }, + { + "epoch": 5.34, + "learning_rate": 9.41773884768196e-07, + "loss": 2.307, + "step": 2360 + }, + { + "epoch": 5.34, + "learning_rate": 9.403107975183102e-07, + "loss": 2.2666, + "step": 2362 + }, + { + "epoch": 5.35, + "learning_rate": 9.388478384862682e-07, + "loss": 2.3614, + "step": 2364 + }, + { + "epoch": 5.35, + "learning_rate": 9.373850108146386e-07, + "loss": 2.0857, + "step": 2366 + }, + { + "epoch": 5.36, + "learning_rate": 9.359223176457087e-07, + "loss": 2.5566, + "step": 2368 + }, + { + "epoch": 5.36, + "learning_rate": 9.344597621214771e-07, + "loss": 2.3728, + "step": 2370 + }, + { + "epoch": 5.36, + "learning_rate": 9.329973473836458e-07, + "loss": 2.3273, + "step": 2372 + }, + { + "epoch": 5.37, + "learning_rate": 9.315350765736148e-07, + "loss": 2.1808, + "step": 2374 + }, + { + "epoch": 5.37, + "learning_rate": 9.300729528324756e-07, + "loss": 2.3019, + "step": 2376 + }, + { + "epoch": 5.38, + "learning_rate": 9.286109793010026e-07, + "loss": 2.2624, + "step": 2378 + }, + { + "epoch": 5.38, + "learning_rate": 9.271491591196482e-07, + "loss": 2.4553, + "step": 2380 + }, + { + "epoch": 5.39, + "learning_rate": 9.256874954285354e-07, + "loss": 2.2152, + "step": 2382 + }, + { + "epoch": 5.39, + "learning_rate": 9.242259913674509e-07, + "loss": 2.715, + "step": 2384 + }, + { + "epoch": 5.4, + "learning_rate": 9.227646500758387e-07, + "loss": 2.1977, + "step": 2386 + }, + { + "epoch": 5.4, + "learning_rate": 9.213034746927926e-07, + "loss": 2.5103, + "step": 2388 + }, + { + "epoch": 5.41, + "learning_rate": 9.198424683570505e-07, + "loss": 2.3052, + "step": 2390 + }, + { + "epoch": 5.41, + "learning_rate": 9.183816342069878e-07, + "loss": 2.1901, + "step": 2392 + }, + { + "epoch": 5.41, + "learning_rate": 9.169209753806082e-07, + "loss": 2.2311, + "step": 2394 + }, + { + "epoch": 5.42, + "learning_rate": 9.154604950155404e-07, + "loss": 2.2232, + "step": 2396 + }, + { + "epoch": 5.42, + "learning_rate": 9.140001962490293e-07, + "loss": 2.3053, + "step": 2398 + }, + { + "epoch": 5.43, + "learning_rate": 9.125400822179301e-07, + "loss": 2.4779, + "step": 2400 + }, + { + "epoch": 5.43, + "learning_rate": 9.110801560586998e-07, + "loss": 2.441, + "step": 2402 + }, + { + "epoch": 5.44, + "learning_rate": 9.096204209073933e-07, + "loss": 2.5225, + "step": 2404 + }, + { + "epoch": 5.44, + "learning_rate": 9.081608798996549e-07, + "loss": 2.3408, + "step": 2406 + }, + { + "epoch": 5.45, + "learning_rate": 9.067015361707109e-07, + "loss": 2.3998, + "step": 2408 + }, + { + "epoch": 5.45, + "learning_rate": 9.05242392855365e-07, + "loss": 2.308, + "step": 2410 + }, + { + "epoch": 5.46, + "learning_rate": 9.037834530879897e-07, + "loss": 2.3218, + "step": 2412 + }, + { + "epoch": 5.46, + "learning_rate": 9.023247200025213e-07, + "loss": 2.5219, + "step": 2414 + }, + { + "epoch": 5.46, + "learning_rate": 9.008661967324503e-07, + "loss": 2.346, + "step": 2416 + }, + { + "epoch": 5.47, + "learning_rate": 8.994078864108179e-07, + "loss": 2.0946, + "step": 2418 + }, + { + "epoch": 5.47, + "learning_rate": 8.979497921702081e-07, + "loss": 2.3819, + "step": 2420 + }, + { + "epoch": 5.48, + "learning_rate": 8.964919171427389e-07, + "loss": 2.1452, + "step": 2422 + }, + { + "epoch": 5.48, + "learning_rate": 8.950342644600595e-07, + "loss": 2.0626, + "step": 2424 + }, + { + "epoch": 5.49, + "learning_rate": 8.935768372533408e-07, + "loss": 2.3418, + "step": 2426 + }, + { + "epoch": 5.49, + "learning_rate": 8.921196386532685e-07, + "loss": 2.4828, + "step": 2428 + }, + { + "epoch": 5.5, + "learning_rate": 8.906626717900383e-07, + "loss": 2.2321, + "step": 2430 + }, + { + "epoch": 5.5, + "learning_rate": 8.892059397933476e-07, + "loss": 2.2295, + "step": 2432 + }, + { + "epoch": 5.51, + "learning_rate": 8.877494457923899e-07, + "loss": 2.2563, + "step": 2434 + }, + { + "epoch": 5.51, + "learning_rate": 8.862931929158462e-07, + "loss": 2.4069, + "step": 2436 + }, + { + "epoch": 5.51, + "learning_rate": 8.848371842918804e-07, + "loss": 2.4871, + "step": 2438 + }, + { + "epoch": 5.52, + "learning_rate": 8.833814230481327e-07, + "loss": 2.1938, + "step": 2440 + }, + { + "epoch": 5.52, + "learning_rate": 8.819259123117096e-07, + "loss": 2.5121, + "step": 2442 + }, + { + "epoch": 5.53, + "learning_rate": 8.804706552091811e-07, + "loss": 2.3073, + "step": 2444 + }, + { + "epoch": 5.53, + "learning_rate": 8.790156548665723e-07, + "loss": 2.3042, + "step": 2446 + }, + { + "epoch": 5.54, + "learning_rate": 8.775609144093562e-07, + "loss": 2.0554, + "step": 2448 + }, + { + "epoch": 5.54, + "learning_rate": 8.761064369624477e-07, + "loss": 2.2464, + "step": 2450 + }, + { + "epoch": 5.55, + "learning_rate": 8.746522256501973e-07, + "loss": 2.5404, + "step": 2452 + }, + { + "epoch": 5.55, + "learning_rate": 8.73198283596383e-07, + "loss": 2.7003, + "step": 2454 + }, + { + "epoch": 5.55, + "learning_rate": 8.717446139242044e-07, + "loss": 2.2159, + "step": 2456 + }, + { + "epoch": 5.56, + "learning_rate": 8.702912197562771e-07, + "loss": 2.2748, + "step": 2458 + }, + { + "epoch": 5.56, + "learning_rate": 8.688381042146236e-07, + "loss": 2.3034, + "step": 2460 + }, + { + "epoch": 5.57, + "learning_rate": 8.673852704206689e-07, + "loss": 2.2095, + "step": 2462 + }, + { + "epoch": 5.57, + "learning_rate": 8.65932721495232e-07, + "loss": 2.2086, + "step": 2464 + }, + { + "epoch": 5.58, + "learning_rate": 8.644804605585201e-07, + "loss": 2.2465, + "step": 2466 + }, + { + "epoch": 5.58, + "learning_rate": 8.630284907301225e-07, + "loss": 2.3245, + "step": 2468 + }, + { + "epoch": 5.59, + "learning_rate": 8.615768151290024e-07, + "loss": 2.294, + "step": 2470 + }, + { + "epoch": 5.59, + "learning_rate": 8.601254368734908e-07, + "loss": 2.3998, + "step": 2472 + }, + { + "epoch": 5.6, + "learning_rate": 8.586743590812808e-07, + "loss": 2.5294, + "step": 2474 + }, + { + "epoch": 5.6, + "learning_rate": 8.572235848694202e-07, + "loss": 2.3761, + "step": 2476 + }, + { + "epoch": 5.6, + "learning_rate": 8.557731173543029e-07, + "loss": 2.1873, + "step": 2478 + }, + { + "epoch": 5.61, + "learning_rate": 8.54322959651666e-07, + "loss": 2.6505, + "step": 2480 + }, + { + "epoch": 5.61, + "learning_rate": 8.528731148765806e-07, + "loss": 2.1418, + "step": 2482 + }, + { + "epoch": 5.62, + "learning_rate": 8.521483108101453e-07, + "loss": 2.2971, + "step": 2484 + }, + { + "epoch": 5.62, + "learning_rate": 8.506989412656718e-07, + "loss": 2.178, + "step": 2486 + }, + { + "epoch": 5.63, + "learning_rate": 8.492498924334711e-07, + "loss": 2.1965, + "step": 2488 + }, + { + "epoch": 5.63, + "learning_rate": 8.47801167426232e-07, + "loss": 2.2415, + "step": 2490 + }, + { + "epoch": 5.64, + "learning_rate": 8.463527693559475e-07, + "loss": 2.2727, + "step": 2492 + }, + { + "epoch": 5.64, + "learning_rate": 8.44904701333909e-07, + "loss": 2.4316, + "step": 2494 + }, + { + "epoch": 5.65, + "learning_rate": 8.434569664706982e-07, + "loss": 2.2054, + "step": 2496 + }, + { + "epoch": 5.65, + "learning_rate": 8.420095678761817e-07, + "loss": 2.3326, + "step": 2498 + }, + { + "epoch": 5.65, + "learning_rate": 8.405625086595034e-07, + "loss": 2.4215, + "step": 2500 + }, + { + "epoch": 5.66, + "learning_rate": 8.391157919290781e-07, + "loss": 2.3286, + "step": 2502 + }, + { + "epoch": 5.66, + "learning_rate": 8.376694207925854e-07, + "loss": 2.34, + "step": 2504 + }, + { + "epoch": 5.67, + "learning_rate": 8.362233983569622e-07, + "loss": 2.8653, + "step": 2506 + }, + { + "epoch": 5.67, + "learning_rate": 8.347777277283961e-07, + "loss": 2.6285, + "step": 2508 + }, + { + "epoch": 5.68, + "learning_rate": 8.333324120123194e-07, + "loss": 2.5004, + "step": 2510 + }, + { + "epoch": 5.68, + "learning_rate": 8.318874543134025e-07, + "loss": 2.4015, + "step": 2512 + }, + { + "epoch": 5.69, + "learning_rate": 8.304428577355452e-07, + "loss": 2.2038, + "step": 2514 + }, + { + "epoch": 5.69, + "learning_rate": 8.297206958367709e-07, + "loss": 2.3619, + "step": 2516 + }, + { + "epoch": 5.7, + "learning_rate": 8.282766467586193e-07, + "loss": 2.398, + "step": 2518 + }, + { + "epoch": 5.7, + "learning_rate": 8.268329665578708e-07, + "loss": 2.1842, + "step": 2520 + }, + { + "epoch": 5.7, + "learning_rate": 8.253896583356824e-07, + "loss": 2.4201, + "step": 2522 + }, + { + "epoch": 5.71, + "learning_rate": 8.239467251924118e-07, + "loss": 2.3461, + "step": 2524 + }, + { + "epoch": 5.71, + "learning_rate": 8.225041702276107e-07, + "loss": 2.5341, + "step": 2526 + }, + { + "epoch": 5.72, + "learning_rate": 8.210619965400183e-07, + "loss": 2.5787, + "step": 2528 + }, + { + "epoch": 5.72, + "learning_rate": 8.196202072275554e-07, + "loss": 2.222, + "step": 2530 + }, + { + "epoch": 5.73, + "learning_rate": 8.181788053873167e-07, + "loss": 2.4516, + "step": 2532 + }, + { + "epoch": 5.73, + "learning_rate": 8.167377941155642e-07, + "loss": 2.5411, + "step": 2534 + }, + { + "epoch": 5.74, + "learning_rate": 8.152971765077219e-07, + "loss": 2.5724, + "step": 2536 + }, + { + "epoch": 5.74, + "learning_rate": 8.138569556583672e-07, + "loss": 2.4833, + "step": 2538 + }, + { + "epoch": 5.74, + "learning_rate": 8.124171346612263e-07, + "loss": 2.5704, + "step": 2540 + }, + { + "epoch": 5.75, + "learning_rate": 8.109777166091652e-07, + "loss": 2.5729, + "step": 2542 + }, + { + "epoch": 5.75, + "learning_rate": 8.095387045941852e-07, + "loss": 2.1751, + "step": 2544 + }, + { + "epoch": 5.76, + "learning_rate": 8.081001017074152e-07, + "loss": 2.3624, + "step": 2546 + }, + { + "epoch": 5.76, + "learning_rate": 8.066619110391054e-07, + "loss": 2.4528, + "step": 2548 + }, + { + "epoch": 5.77, + "learning_rate": 8.0522413567862e-07, + "loss": 2.4156, + "step": 2550 + }, + { + "epoch": 5.77, + "learning_rate": 8.037867787144314e-07, + "loss": 2.4374, + "step": 2552 + }, + { + "epoch": 5.78, + "learning_rate": 8.02349843234114e-07, + "loss": 2.2263, + "step": 2554 + }, + { + "epoch": 5.78, + "learning_rate": 8.009133323243351e-07, + "loss": 2.5188, + "step": 2556 + }, + { + "epoch": 5.79, + "learning_rate": 7.994772490708514e-07, + "loss": 2.1436, + "step": 2558 + }, + { + "epoch": 5.79, + "learning_rate": 7.980415965585011e-07, + "loss": 2.4208, + "step": 2560 + }, + { + "epoch": 5.79, + "learning_rate": 7.966063778711956e-07, + "loss": 2.4573, + "step": 2562 + }, + { + "epoch": 5.8, + "learning_rate": 7.951715960919157e-07, + "loss": 2.4805, + "step": 2564 + }, + { + "epoch": 5.8, + "learning_rate": 7.937372543027035e-07, + "loss": 2.3773, + "step": 2566 + }, + { + "epoch": 5.81, + "learning_rate": 7.92303355584656e-07, + "loss": 2.327, + "step": 2568 + }, + { + "epoch": 5.81, + "learning_rate": 7.908699030179177e-07, + "loss": 2.354, + "step": 2570 + }, + { + "epoch": 5.82, + "learning_rate": 7.894368996816755e-07, + "loss": 2.3706, + "step": 2572 + }, + { + "epoch": 5.82, + "learning_rate": 7.880043486541517e-07, + "loss": 2.43, + "step": 2574 + }, + { + "epoch": 5.83, + "learning_rate": 7.865722530125952e-07, + "loss": 2.613, + "step": 2576 + }, + { + "epoch": 5.83, + "learning_rate": 7.851406158332786e-07, + "loss": 2.1504, + "step": 2578 + }, + { + "epoch": 5.84, + "learning_rate": 7.837094401914889e-07, + "loss": 2.5713, + "step": 2580 + }, + { + "epoch": 5.84, + "learning_rate": 7.82278729161522e-07, + "loss": 2.2792, + "step": 2582 + }, + { + "epoch": 5.84, + "learning_rate": 7.808484858166749e-07, + "loss": 2.4921, + "step": 2584 + }, + { + "epoch": 5.85, + "learning_rate": 7.794187132292408e-07, + "loss": 2.4913, + "step": 2586 + }, + { + "epoch": 5.85, + "learning_rate": 7.77989414470502e-07, + "loss": 2.2147, + "step": 2588 + }, + { + "epoch": 5.86, + "learning_rate": 7.765605926107213e-07, + "loss": 2.7114, + "step": 2590 + }, + { + "epoch": 5.86, + "learning_rate": 7.75132250719139e-07, + "loss": 2.3006, + "step": 2592 + }, + { + "epoch": 5.87, + "learning_rate": 7.737043918639634e-07, + "loss": 2.4085, + "step": 2594 + }, + { + "epoch": 5.87, + "learning_rate": 7.722770191123658e-07, + "loss": 2.2064, + "step": 2596 + }, + { + "epoch": 5.88, + "learning_rate": 7.708501355304719e-07, + "loss": 2.5217, + "step": 2598 + }, + { + "epoch": 5.88, + "learning_rate": 7.694237441833584e-07, + "loss": 2.4016, + "step": 2600 + }, + { + "epoch": 5.89, + "learning_rate": 7.679978481350437e-07, + "loss": 2.2748, + "step": 2602 + }, + { + "epoch": 5.89, + "learning_rate": 7.665724504484819e-07, + "loss": 2.7744, + "step": 2604 + }, + { + "epoch": 5.89, + "learning_rate": 7.651475541855577e-07, + "loss": 2.5243, + "step": 2606 + }, + { + "epoch": 5.9, + "learning_rate": 7.637231624070776e-07, + "loss": 2.5171, + "step": 2608 + }, + { + "epoch": 5.9, + "learning_rate": 7.622992781727652e-07, + "loss": 2.6107, + "step": 2610 + }, + { + "epoch": 5.91, + "learning_rate": 7.608759045412534e-07, + "loss": 2.3215, + "step": 2612 + }, + { + "epoch": 5.91, + "learning_rate": 7.594530445700781e-07, + "loss": 2.2903, + "step": 2614 + }, + { + "epoch": 5.92, + "learning_rate": 7.580307013156726e-07, + "loss": 2.4102, + "step": 2616 + }, + { + "epoch": 5.92, + "learning_rate": 7.566088778333597e-07, + "loss": 2.3011, + "step": 2618 + }, + { + "epoch": 5.93, + "learning_rate": 7.551875771773451e-07, + "loss": 2.4777, + "step": 2620 + }, + { + "epoch": 5.93, + "learning_rate": 7.53766802400713e-07, + "loss": 2.2883, + "step": 2622 + }, + { + "epoch": 5.93, + "learning_rate": 7.523465565554165e-07, + "loss": 2.603, + "step": 2624 + }, + { + "epoch": 5.94, + "learning_rate": 7.509268426922729e-07, + "loss": 2.3403, + "step": 2626 + }, + { + "epoch": 5.94, + "learning_rate": 7.495076638609573e-07, + "loss": 2.2766, + "step": 2628 + }, + { + "epoch": 5.95, + "learning_rate": 7.48089023109995e-07, + "loss": 2.5495, + "step": 2630 + }, + { + "epoch": 5.95, + "learning_rate": 7.466709234867551e-07, + "loss": 2.2699, + "step": 2632 + }, + { + "epoch": 5.96, + "learning_rate": 7.452533680374451e-07, + "loss": 2.232, + "step": 2634 + }, + { + "epoch": 5.96, + "learning_rate": 7.438363598071037e-07, + "loss": 2.4567, + "step": 2636 + }, + { + "epoch": 5.97, + "learning_rate": 7.424199018395927e-07, + "loss": 2.3238, + "step": 2638 + }, + { + "epoch": 5.97, + "learning_rate": 7.410039971775932e-07, + "loss": 2.2977, + "step": 2640 + }, + { + "epoch": 5.98, + "learning_rate": 7.395886488625975e-07, + "loss": 2.513, + "step": 2642 + }, + { + "epoch": 5.98, + "learning_rate": 7.38173859934903e-07, + "loss": 2.4299, + "step": 2644 + }, + { + "epoch": 5.98, + "learning_rate": 7.367596334336046e-07, + "loss": 2.2186, + "step": 2646 + }, + { + "epoch": 5.99, + "learning_rate": 7.353459723965896e-07, + "loss": 2.6373, + "step": 2648 + }, + { + "epoch": 5.99, + "learning_rate": 7.339328798605315e-07, + "loss": 2.1771, + "step": 2650 + }, + { + "epoch": 6.0, + "learning_rate": 7.325203588608808e-07, + "loss": 2.3566, + "step": 2652 + }, + { + "epoch": 6.0, + "learning_rate": 7.311084124318617e-07, + "loss": 2.3604, + "step": 2654 + }, + { + "epoch": 6.01, + "learning_rate": 7.296970436064634e-07, + "loss": 2.6705, + "step": 2656 + }, + { + "epoch": 6.01, + "learning_rate": 7.282862554164355e-07, + "loss": 2.306, + "step": 2658 + }, + { + "epoch": 6.02, + "learning_rate": 7.268760508922786e-07, + "loss": 2.3193, + "step": 2660 + }, + { + "epoch": 6.02, + "learning_rate": 7.25466433063241e-07, + "loss": 2.4695, + "step": 2662 + }, + { + "epoch": 6.03, + "learning_rate": 7.240574049573105e-07, + "loss": 2.3212, + "step": 2664 + }, + { + "epoch": 6.03, + "learning_rate": 7.226489696012068e-07, + "loss": 2.3396, + "step": 2666 + }, + { + "epoch": 6.03, + "learning_rate": 7.212411300203778e-07, + "loss": 2.7672, + "step": 2668 + }, + { + "epoch": 6.04, + "learning_rate": 7.198338892389915e-07, + "loss": 2.3035, + "step": 2670 + }, + { + "epoch": 6.04, + "learning_rate": 7.184272502799291e-07, + "loss": 2.2063, + "step": 2672 + }, + { + "epoch": 6.05, + "learning_rate": 7.170212161647789e-07, + "loss": 2.3545, + "step": 2674 + }, + { + "epoch": 6.05, + "learning_rate": 7.156157899138305e-07, + "loss": 2.1973, + "step": 2676 + }, + { + "epoch": 6.06, + "learning_rate": 7.142109745460676e-07, + "loss": 2.3296, + "step": 2678 + }, + { + "epoch": 6.06, + "learning_rate": 7.128067730791611e-07, + "loss": 2.4783, + "step": 2680 + }, + { + "epoch": 6.07, + "learning_rate": 7.114031885294642e-07, + "loss": 2.6186, + "step": 2682 + }, + { + "epoch": 6.07, + "learning_rate": 7.10000223912004e-07, + "loss": 2.1012, + "step": 2684 + }, + { + "epoch": 6.08, + "learning_rate": 7.085978822404766e-07, + "loss": 2.3392, + "step": 2686 + }, + { + "epoch": 6.08, + "learning_rate": 7.071961665272392e-07, + "loss": 2.3101, + "step": 2688 + }, + { + "epoch": 6.08, + "learning_rate": 7.05795079783305e-07, + "loss": 2.2984, + "step": 2690 + }, + { + "epoch": 6.09, + "learning_rate": 7.043946250183361e-07, + "loss": 2.3071, + "step": 2692 + }, + { + "epoch": 6.09, + "learning_rate": 7.02994805240637e-07, + "loss": 2.5653, + "step": 2694 + }, + { + "epoch": 6.1, + "learning_rate": 7.015956234571474e-07, + "loss": 2.4521, + "step": 2696 + }, + { + "epoch": 6.1, + "learning_rate": 7.001970826734378e-07, + "loss": 2.5733, + "step": 2698 + }, + { + "epoch": 6.11, + "learning_rate": 6.987991858937015e-07, + "loss": 2.0661, + "step": 2700 + }, + { + "epoch": 6.11, + "learning_rate": 6.97401936120747e-07, + "loss": 2.5841, + "step": 2702 + }, + { + "epoch": 6.12, + "learning_rate": 6.96005336355995e-07, + "loss": 2.2641, + "step": 2704 + }, + { + "epoch": 6.12, + "learning_rate": 6.94609389599469e-07, + "loss": 2.2637, + "step": 2706 + }, + { + "epoch": 6.12, + "learning_rate": 6.932140988497892e-07, + "loss": 2.2499, + "step": 2708 + }, + { + "epoch": 6.13, + "learning_rate": 6.918194671041674e-07, + "loss": 2.509, + "step": 2710 + }, + { + "epoch": 6.13, + "learning_rate": 6.904254973584002e-07, + "loss": 2.2785, + "step": 2712 + }, + { + "epoch": 6.14, + "learning_rate": 6.890321926068608e-07, + "loss": 2.4755, + "step": 2714 + }, + { + "epoch": 6.14, + "learning_rate": 6.876395558424949e-07, + "loss": 2.4773, + "step": 2716 + }, + { + "epoch": 6.15, + "learning_rate": 6.862475900568132e-07, + "loss": 2.333, + "step": 2718 + }, + { + "epoch": 6.15, + "learning_rate": 6.848562982398855e-07, + "loss": 2.4779, + "step": 2720 + }, + { + "epoch": 6.16, + "learning_rate": 6.834656833803326e-07, + "loss": 2.4282, + "step": 2722 + }, + { + "epoch": 6.16, + "learning_rate": 6.82075748465322e-07, + "loss": 2.104, + "step": 2724 + }, + { + "epoch": 6.17, + "learning_rate": 6.806864964805612e-07, + "loss": 2.483, + "step": 2726 + }, + { + "epoch": 6.17, + "learning_rate": 6.792979304102888e-07, + "loss": 2.2719, + "step": 2728 + }, + { + "epoch": 6.17, + "learning_rate": 6.77910053237272e-07, + "loss": 2.5435, + "step": 2730 + }, + { + "epoch": 6.18, + "learning_rate": 6.765228679427973e-07, + "loss": 2.5445, + "step": 2732 + }, + { + "epoch": 6.18, + "learning_rate": 6.751363775066652e-07, + "loss": 2.3369, + "step": 2734 + }, + { + "epoch": 6.19, + "learning_rate": 6.737505849071828e-07, + "loss": 2.3555, + "step": 2736 + }, + { + "epoch": 6.19, + "learning_rate": 6.723654931211593e-07, + "loss": 2.1379, + "step": 2738 + }, + { + "epoch": 6.2, + "learning_rate": 6.709811051238982e-07, + "loss": 2.1932, + "step": 2740 + }, + { + "epoch": 6.2, + "learning_rate": 6.695974238891906e-07, + "loss": 2.5776, + "step": 2742 + }, + { + "epoch": 6.21, + "learning_rate": 6.682144523893098e-07, + "loss": 2.1765, + "step": 2744 + }, + { + "epoch": 6.21, + "learning_rate": 6.66832193595005e-07, + "loss": 2.1806, + "step": 2746 + }, + { + "epoch": 6.22, + "learning_rate": 6.654506504754937e-07, + "loss": 2.3733, + "step": 2748 + }, + { + "epoch": 6.22, + "learning_rate": 6.640698259984563e-07, + "loss": 2.3373, + "step": 2750 + }, + { + "epoch": 6.22, + "learning_rate": 6.626897231300294e-07, + "loss": 2.2696, + "step": 2752 + }, + { + "epoch": 6.23, + "learning_rate": 6.613103448348004e-07, + "loss": 2.2713, + "step": 2754 + }, + { + "epoch": 6.23, + "learning_rate": 6.599316940757986e-07, + "loss": 2.2902, + "step": 2756 + }, + { + "epoch": 6.24, + "learning_rate": 6.585537738144916e-07, + "loss": 2.3606, + "step": 2758 + }, + { + "epoch": 6.24, + "learning_rate": 6.571765870107782e-07, + "loss": 2.3262, + "step": 2760 + }, + { + "epoch": 6.25, + "learning_rate": 6.558001366229807e-07, + "loss": 2.2879, + "step": 2762 + }, + { + "epoch": 6.25, + "learning_rate": 6.544244256078395e-07, + "loss": 2.2362, + "step": 2764 + }, + { + "epoch": 6.26, + "learning_rate": 6.530494569205078e-07, + "loss": 2.5146, + "step": 2766 + }, + { + "epoch": 6.26, + "learning_rate": 6.516752335145435e-07, + "loss": 2.6295, + "step": 2768 + }, + { + "epoch": 6.27, + "learning_rate": 6.503017583419033e-07, + "loss": 2.291, + "step": 2770 + }, + { + "epoch": 6.27, + "learning_rate": 6.48929034352937e-07, + "loss": 2.3465, + "step": 2772 + }, + { + "epoch": 6.27, + "learning_rate": 6.475570644963811e-07, + "loss": 2.2647, + "step": 2774 + }, + { + "epoch": 6.28, + "learning_rate": 6.461858517193517e-07, + "loss": 2.5567, + "step": 2776 + }, + { + "epoch": 6.28, + "learning_rate": 6.448153989673384e-07, + "loss": 2.3326, + "step": 2778 + }, + { + "epoch": 6.29, + "learning_rate": 6.434457091841988e-07, + "loss": 2.4799, + "step": 2780 + }, + { + "epoch": 6.29, + "learning_rate": 6.420767853121518e-07, + "loss": 2.2515, + "step": 2782 + }, + { + "epoch": 6.3, + "learning_rate": 6.407086302917694e-07, + "loss": 2.0771, + "step": 2784 + }, + { + "epoch": 6.3, + "learning_rate": 6.393412470619741e-07, + "loss": 2.4317, + "step": 2786 + }, + { + "epoch": 6.31, + "learning_rate": 6.379746385600294e-07, + "loss": 2.4658, + "step": 2788 + }, + { + "epoch": 6.31, + "learning_rate": 6.366088077215349e-07, + "loss": 2.333, + "step": 2790 + }, + { + "epoch": 6.31, + "learning_rate": 6.352437574804194e-07, + "loss": 2.3936, + "step": 2792 + }, + { + "epoch": 6.32, + "learning_rate": 6.338794907689351e-07, + "loss": 2.3265, + "step": 2794 + }, + { + "epoch": 6.32, + "learning_rate": 6.325160105176519e-07, + "loss": 2.1475, + "step": 2796 + }, + { + "epoch": 6.33, + "learning_rate": 6.311533196554486e-07, + "loss": 2.2806, + "step": 2798 + }, + { + "epoch": 6.33, + "learning_rate": 6.297914211095097e-07, + "loss": 2.4101, + "step": 2800 + }, + { + "epoch": 6.34, + "learning_rate": 6.284303178053179e-07, + "loss": 2.2414, + "step": 2802 + }, + { + "epoch": 6.34, + "learning_rate": 6.270700126666457e-07, + "loss": 2.3114, + "step": 2804 + }, + { + "epoch": 6.35, + "learning_rate": 6.257105086155537e-07, + "loss": 2.0424, + "step": 2806 + }, + { + "epoch": 6.35, + "learning_rate": 6.243518085723795e-07, + "loss": 2.4947, + "step": 2808 + }, + { + "epoch": 6.36, + "learning_rate": 6.229939154557358e-07, + "loss": 2.1634, + "step": 2810 + }, + { + "epoch": 6.36, + "learning_rate": 6.216368321824992e-07, + "loss": 2.2303, + "step": 2812 + }, + { + "epoch": 6.36, + "learning_rate": 6.20280561667809e-07, + "loss": 2.4121, + "step": 2814 + }, + { + "epoch": 6.37, + "learning_rate": 6.189251068250582e-07, + "loss": 2.3059, + "step": 2816 + }, + { + "epoch": 6.37, + "learning_rate": 6.175704705658861e-07, + "loss": 2.1436, + "step": 2818 + }, + { + "epoch": 6.38, + "learning_rate": 6.162166558001756e-07, + "loss": 2.4704, + "step": 2820 + }, + { + "epoch": 6.38, + "learning_rate": 6.148636654360437e-07, + "loss": 2.3432, + "step": 2822 + }, + { + "epoch": 6.39, + "learning_rate": 6.135115023798378e-07, + "loss": 2.4694, + "step": 2824 + }, + { + "epoch": 6.39, + "learning_rate": 6.12160169536126e-07, + "loss": 2.1774, + "step": 2826 + }, + { + "epoch": 6.4, + "learning_rate": 6.108096698076952e-07, + "loss": 2.3985, + "step": 2828 + }, + { + "epoch": 6.4, + "learning_rate": 6.09460006095542e-07, + "loss": 2.1717, + "step": 2830 + }, + { + "epoch": 6.41, + "learning_rate": 6.08111181298866e-07, + "loss": 2.3996, + "step": 2832 + }, + { + "epoch": 6.41, + "learning_rate": 6.067631983150665e-07, + "loss": 2.3045, + "step": 2834 + }, + { + "epoch": 6.41, + "learning_rate": 6.054160600397336e-07, + "loss": 2.4791, + "step": 2836 + }, + { + "epoch": 6.42, + "learning_rate": 6.04069769366643e-07, + "loss": 2.2783, + "step": 2838 + }, + { + "epoch": 6.42, + "learning_rate": 6.027243291877495e-07, + "loss": 2.3579, + "step": 2840 + }, + { + "epoch": 6.43, + "learning_rate": 6.013797423931813e-07, + "loss": 2.3617, + "step": 2842 + }, + { + "epoch": 6.43, + "learning_rate": 6.000360118712336e-07, + "loss": 2.3048, + "step": 2844 + }, + { + "epoch": 6.44, + "learning_rate": 5.986931405083611e-07, + "loss": 2.4872, + "step": 2846 + }, + { + "epoch": 6.44, + "learning_rate": 5.973511311891747e-07, + "loss": 2.2881, + "step": 2848 + }, + { + "epoch": 6.45, + "learning_rate": 5.960099867964319e-07, + "loss": 2.4967, + "step": 2850 + }, + { + "epoch": 6.45, + "learning_rate": 5.94669710211034e-07, + "loss": 2.6076, + "step": 2852 + }, + { + "epoch": 6.46, + "learning_rate": 5.933303043120162e-07, + "loss": 2.1849, + "step": 2854 + }, + { + "epoch": 6.46, + "learning_rate": 5.919917719765446e-07, + "loss": 2.2264, + "step": 2856 + }, + { + "epoch": 6.46, + "learning_rate": 5.906541160799092e-07, + "loss": 2.6351, + "step": 2858 + }, + { + "epoch": 6.47, + "learning_rate": 5.893173394955158e-07, + "loss": 2.2375, + "step": 2860 + }, + { + "epoch": 6.47, + "learning_rate": 5.879814450948826e-07, + "loss": 2.337, + "step": 2862 + }, + { + "epoch": 6.48, + "learning_rate": 5.866464357476324e-07, + "loss": 2.5032, + "step": 2864 + }, + { + "epoch": 6.48, + "learning_rate": 5.853123143214876e-07, + "loss": 2.6742, + "step": 2866 + }, + { + "epoch": 6.49, + "learning_rate": 5.839790836822614e-07, + "loss": 2.2436, + "step": 2868 + }, + { + "epoch": 6.49, + "learning_rate": 5.826467466938553e-07, + "loss": 2.2489, + "step": 2870 + }, + { + "epoch": 6.5, + "learning_rate": 5.813153062182504e-07, + "loss": 2.4944, + "step": 2872 + }, + { + "epoch": 6.5, + "learning_rate": 5.799847651155022e-07, + "loss": 2.242, + "step": 2874 + }, + { + "epoch": 6.5, + "learning_rate": 5.786551262437341e-07, + "loss": 2.2145, + "step": 2876 + }, + { + "epoch": 6.51, + "learning_rate": 5.773263924591309e-07, + "loss": 2.3368, + "step": 2878 + }, + { + "epoch": 6.51, + "learning_rate": 5.759985666159348e-07, + "loss": 2.3857, + "step": 2880 + }, + { + "epoch": 6.52, + "learning_rate": 5.746716515664363e-07, + "loss": 2.5574, + "step": 2882 + }, + { + "epoch": 6.52, + "learning_rate": 5.733456501609697e-07, + "loss": 2.6827, + "step": 2884 + }, + { + "epoch": 6.53, + "learning_rate": 5.720205652479067e-07, + "loss": 2.3012, + "step": 2886 + }, + { + "epoch": 6.53, + "learning_rate": 5.706963996736504e-07, + "loss": 2.4365, + "step": 2888 + }, + { + "epoch": 6.54, + "learning_rate": 5.693731562826288e-07, + "loss": 2.0728, + "step": 2890 + }, + { + "epoch": 6.54, + "learning_rate": 5.6805083791729e-07, + "loss": 2.3692, + "step": 2892 + }, + { + "epoch": 6.55, + "learning_rate": 5.667294474180937e-07, + "loss": 2.4155, + "step": 2894 + }, + { + "epoch": 6.55, + "learning_rate": 5.654089876235073e-07, + "loss": 2.1968, + "step": 2896 + }, + { + "epoch": 6.55, + "learning_rate": 5.640894613699982e-07, + "loss": 2.0421, + "step": 2898 + }, + { + "epoch": 6.56, + "learning_rate": 5.6277087149203e-07, + "loss": 2.4892, + "step": 2900 + }, + { + "epoch": 6.56, + "learning_rate": 5.614532208220528e-07, + "loss": 2.2188, + "step": 2902 + }, + { + "epoch": 6.57, + "learning_rate": 5.601365121905007e-07, + "loss": 2.3704, + "step": 2904 + }, + { + "epoch": 6.57, + "learning_rate": 5.588207484257839e-07, + "loss": 2.3284, + "step": 2906 + }, + { + "epoch": 6.58, + "learning_rate": 5.575059323542825e-07, + "loss": 2.3581, + "step": 2908 + }, + { + "epoch": 6.58, + "learning_rate": 5.561920668003413e-07, + "loss": 2.2069, + "step": 2910 + }, + { + "epoch": 6.59, + "learning_rate": 5.548791545862632e-07, + "loss": 2.377, + "step": 2912 + }, + { + "epoch": 6.59, + "learning_rate": 5.535671985323034e-07, + "loss": 2.552, + "step": 2914 + }, + { + "epoch": 6.6, + "learning_rate": 5.522562014566628e-07, + "loss": 2.4803, + "step": 2916 + }, + { + "epoch": 6.6, + "learning_rate": 5.509461661754822e-07, + "loss": 2.2848, + "step": 2918 + }, + { + "epoch": 6.6, + "learning_rate": 5.496370955028379e-07, + "loss": 2.3721, + "step": 2920 + }, + { + "epoch": 6.61, + "learning_rate": 5.483289922507312e-07, + "loss": 2.2689, + "step": 2922 + }, + { + "epoch": 6.61, + "learning_rate": 5.470218592290883e-07, + "loss": 2.4126, + "step": 2924 + }, + { + "epoch": 6.62, + "learning_rate": 5.457156992457493e-07, + "loss": 2.4723, + "step": 2926 + }, + { + "epoch": 6.62, + "learning_rate": 5.444105151064657e-07, + "loss": 2.5204, + "step": 2928 + }, + { + "epoch": 6.63, + "learning_rate": 5.431063096148908e-07, + "loss": 2.2726, + "step": 2930 + }, + { + "epoch": 6.63, + "learning_rate": 5.418030855725776e-07, + "loss": 2.3093, + "step": 2932 + }, + { + "epoch": 6.64, + "learning_rate": 5.405008457789697e-07, + "loss": 2.5789, + "step": 2934 + }, + { + "epoch": 6.64, + "learning_rate": 5.391995930313969e-07, + "loss": 2.496, + "step": 2936 + }, + { + "epoch": 6.65, + "learning_rate": 5.378993301250682e-07, + "loss": 2.2452, + "step": 2938 + }, + { + "epoch": 6.65, + "learning_rate": 5.366000598530674e-07, + "loss": 2.3148, + "step": 2940 + }, + { + "epoch": 6.65, + "learning_rate": 5.353017850063452e-07, + "loss": 2.7162, + "step": 2942 + }, + { + "epoch": 6.66, + "learning_rate": 5.340045083737142e-07, + "loss": 2.4087, + "step": 2944 + }, + { + "epoch": 6.66, + "learning_rate": 5.327082327418419e-07, + "loss": 2.2211, + "step": 2946 + }, + { + "epoch": 6.67, + "learning_rate": 5.314129608952484e-07, + "loss": 2.0549, + "step": 2948 + }, + { + "epoch": 6.67, + "learning_rate": 5.301186956162935e-07, + "loss": 2.5625, + "step": 2950 + }, + { + "epoch": 6.68, + "learning_rate": 5.288254396851782e-07, + "loss": 2.2932, + "step": 2952 + }, + { + "epoch": 6.68, + "learning_rate": 5.275331958799335e-07, + "loss": 2.3089, + "step": 2954 + }, + { + "epoch": 6.69, + "learning_rate": 5.262419669764171e-07, + "loss": 2.4793, + "step": 2956 + }, + { + "epoch": 6.69, + "learning_rate": 5.24951755748306e-07, + "loss": 2.2383, + "step": 2958 + }, + { + "epoch": 6.69, + "learning_rate": 5.236625649670913e-07, + "loss": 2.4058, + "step": 2960 + }, + { + "epoch": 6.7, + "learning_rate": 5.223743974020727e-07, + "loss": 2.4452, + "step": 2962 + }, + { + "epoch": 6.7, + "learning_rate": 5.210872558203514e-07, + "loss": 2.232, + "step": 2964 + }, + { + "epoch": 6.71, + "learning_rate": 5.198011429868245e-07, + "loss": 2.6852, + "step": 2966 + }, + { + "epoch": 6.71, + "learning_rate": 5.185160616641793e-07, + "loss": 2.0647, + "step": 2968 + }, + { + "epoch": 6.72, + "learning_rate": 5.172320146128884e-07, + "loss": 2.5407, + "step": 2970 + }, + { + "epoch": 6.72, + "learning_rate": 5.159490045912006e-07, + "loss": 2.313, + "step": 2972 + }, + { + "epoch": 6.73, + "learning_rate": 5.146670343551394e-07, + "loss": 2.291, + "step": 2974 + }, + { + "epoch": 6.73, + "learning_rate": 5.133861066584929e-07, + "loss": 2.2558, + "step": 2976 + }, + { + "epoch": 6.74, + "learning_rate": 5.121062242528109e-07, + "loss": 2.6145, + "step": 2978 + }, + { + "epoch": 6.74, + "learning_rate": 5.108273898873967e-07, + "loss": 2.4894, + "step": 2980 + }, + { + "epoch": 6.74, + "learning_rate": 5.095496063093044e-07, + "loss": 2.5229, + "step": 2982 + }, + { + "epoch": 6.75, + "learning_rate": 5.082728762633278e-07, + "loss": 2.1288, + "step": 2984 + }, + { + "epoch": 6.75, + "learning_rate": 5.069972024920003e-07, + "loss": 2.467, + "step": 2986 + }, + { + "epoch": 6.76, + "learning_rate": 5.057225877355851e-07, + "loss": 2.5339, + "step": 2988 + }, + { + "epoch": 6.76, + "learning_rate": 5.044490347320715e-07, + "loss": 2.123, + "step": 2990 + }, + { + "epoch": 6.77, + "learning_rate": 5.031765462171659e-07, + "loss": 2.5362, + "step": 2992 + }, + { + "epoch": 6.77, + "learning_rate": 5.019051249242907e-07, + "loss": 2.3589, + "step": 2994 + }, + { + "epoch": 6.78, + "learning_rate": 5.006347735845744e-07, + "loss": 2.612, + "step": 2996 + }, + { + "epoch": 6.78, + "learning_rate": 4.993654949268472e-07, + "loss": 2.2357, + "step": 2998 + }, + { + "epoch": 6.79, + "learning_rate": 4.980972916776349e-07, + "loss": 2.0088, + "step": 3000 + }, + { + "epoch": 6.79, + "learning_rate": 4.968301665611545e-07, + "loss": 2.4529, + "step": 3002 + }, + { + "epoch": 6.79, + "learning_rate": 4.955641222993057e-07, + "loss": 2.352, + "step": 3004 + }, + { + "epoch": 6.8, + "learning_rate": 4.942991616116667e-07, + "loss": 2.2461, + "step": 3006 + }, + { + "epoch": 6.8, + "learning_rate": 4.930352872154881e-07, + "loss": 2.4761, + "step": 3008 + }, + { + "epoch": 6.81, + "learning_rate": 4.917725018256885e-07, + "loss": 2.3513, + "step": 3010 + }, + { + "epoch": 6.81, + "learning_rate": 4.905108081548441e-07, + "loss": 2.0589, + "step": 3012 + }, + { + "epoch": 6.82, + "learning_rate": 4.892502089131891e-07, + "loss": 2.3859, + "step": 3014 + }, + { + "epoch": 6.82, + "learning_rate": 4.879907068086047e-07, + "loss": 2.4326, + "step": 3016 + }, + { + "epoch": 6.83, + "learning_rate": 4.867323045466173e-07, + "loss": 2.2585, + "step": 3018 + }, + { + "epoch": 6.83, + "learning_rate": 4.854750048303881e-07, + "loss": 2.2026, + "step": 3020 + }, + { + "epoch": 6.84, + "learning_rate": 4.842188103607126e-07, + "loss": 2.4944, + "step": 3022 + }, + { + "epoch": 6.84, + "learning_rate": 4.829637238360103e-07, + "loss": 2.3826, + "step": 3024 + }, + { + "epoch": 6.84, + "learning_rate": 4.817097479523214e-07, + "loss": 2.4484, + "step": 3026 + }, + { + "epoch": 6.85, + "learning_rate": 4.804568854033003e-07, + "loss": 2.1429, + "step": 3028 + }, + { + "epoch": 6.85, + "learning_rate": 4.792051388802103e-07, + "loss": 2.5137, + "step": 3030 + }, + { + "epoch": 6.86, + "learning_rate": 4.779545110719167e-07, + "loss": 2.6513, + "step": 3032 + }, + { + "epoch": 6.86, + "learning_rate": 4.767050046648818e-07, + "loss": 2.4917, + "step": 3034 + }, + { + "epoch": 6.87, + "learning_rate": 4.7545662234315933e-07, + "loss": 2.3492, + "step": 3036 + }, + { + "epoch": 6.87, + "learning_rate": 4.742093667883881e-07, + "loss": 2.1966, + "step": 3038 + }, + { + "epoch": 6.88, + "learning_rate": 4.729632406797868e-07, + "loss": 2.5651, + "step": 3040 + }, + { + "epoch": 6.88, + "learning_rate": 4.717182466941472e-07, + "loss": 2.2645, + "step": 3042 + }, + { + "epoch": 6.88, + "learning_rate": 4.704743875058307e-07, + "loss": 2.2814, + "step": 3044 + }, + { + "epoch": 6.89, + "learning_rate": 4.6923166578675976e-07, + "loss": 2.247, + "step": 3046 + }, + { + "epoch": 6.89, + "learning_rate": 4.679900842064137e-07, + "loss": 2.3914, + "step": 3048 + }, + { + "epoch": 6.9, + "learning_rate": 4.667496454318225e-07, + "loss": 2.5838, + "step": 3050 + }, + { + "epoch": 6.9, + "learning_rate": 4.6551035212756284e-07, + "loss": 2.348, + "step": 3052 + }, + { + "epoch": 6.91, + "learning_rate": 4.642722069557481e-07, + "loss": 2.4168, + "step": 3054 + }, + { + "epoch": 6.91, + "learning_rate": 4.63035212576028e-07, + "loss": 2.3571, + "step": 3056 + }, + { + "epoch": 6.92, + "learning_rate": 4.6179937164557824e-07, + "loss": 2.1961, + "step": 3058 + }, + { + "epoch": 6.92, + "learning_rate": 4.60564686819099e-07, + "loss": 2.2318, + "step": 3060 + }, + { + "epoch": 6.93, + "learning_rate": 4.593311607488042e-07, + "loss": 2.3089, + "step": 3062 + }, + { + "epoch": 6.93, + "learning_rate": 4.5809879608442127e-07, + "loss": 2.32, + "step": 3064 + }, + { + "epoch": 6.93, + "learning_rate": 4.5686759547318145e-07, + "loss": 2.3229, + "step": 3066 + }, + { + "epoch": 6.94, + "learning_rate": 4.556375615598157e-07, + "loss": 2.1972, + "step": 3068 + }, + { + "epoch": 6.94, + "learning_rate": 4.5440869698654847e-07, + "loss": 2.0506, + "step": 3070 + }, + { + "epoch": 6.95, + "learning_rate": 4.5318100439309383e-07, + "loss": 2.4581, + "step": 3072 + }, + { + "epoch": 6.95, + "learning_rate": 4.519544864166467e-07, + "loss": 2.428, + "step": 3074 + }, + { + "epoch": 6.96, + "learning_rate": 4.5072914569187936e-07, + "loss": 2.3864, + "step": 3076 + }, + { + "epoch": 6.96, + "learning_rate": 4.4950498485093504e-07, + "loss": 2.2819, + "step": 3078 + }, + { + "epoch": 6.97, + "learning_rate": 4.4828200652342376e-07, + "loss": 2.3175, + "step": 3080 + }, + { + "epoch": 6.97, + "learning_rate": 4.4706021333641286e-07, + "loss": 2.4947, + "step": 3082 + }, + { + "epoch": 6.98, + "learning_rate": 4.458396079144267e-07, + "loss": 2.6006, + "step": 3084 + }, + { + "epoch": 6.98, + "learning_rate": 4.4462019287943633e-07, + "loss": 2.1521, + "step": 3086 + }, + { + "epoch": 6.98, + "learning_rate": 4.434019708508563e-07, + "loss": 2.1405, + "step": 3088 + }, + { + "epoch": 6.99, + "learning_rate": 4.421849444455382e-07, + "loss": 2.3048, + "step": 3090 + }, + { + "epoch": 6.99, + "learning_rate": 4.4096911627776633e-07, + "loss": 2.2149, + "step": 3092 + }, + { + "epoch": 7.0, + "learning_rate": 4.397544889592499e-07, + "loss": 2.2301, + "step": 3094 + }, + { + "epoch": 7.0, + "learning_rate": 4.38541065099119e-07, + "loss": 2.2929, + "step": 3096 + }, + { + "epoch": 7.01, + "learning_rate": 4.373288473039185e-07, + "loss": 2.5705, + "step": 3098 + }, + { + "epoch": 7.01, + "learning_rate": 4.361178381776034e-07, + "loss": 2.1664, + "step": 3100 + }, + { + "epoch": 7.02, + "learning_rate": 4.3490804032153027e-07, + "loss": 2.2688, + "step": 3102 + }, + { + "epoch": 7.02, + "learning_rate": 4.3369945633445626e-07, + "loss": 2.391, + "step": 3104 + }, + { + "epoch": 7.03, + "learning_rate": 4.324920888125294e-07, + "loss": 2.2777, + "step": 3106 + }, + { + "epoch": 7.03, + "learning_rate": 4.3128594034928514e-07, + "loss": 2.0721, + "step": 3108 + }, + { + "epoch": 7.03, + "learning_rate": 4.300810135356397e-07, + "loss": 2.3379, + "step": 3110 + }, + { + "epoch": 7.04, + "learning_rate": 4.2887731095988664e-07, + "loss": 2.2214, + "step": 3112 + }, + { + "epoch": 7.04, + "learning_rate": 4.2767483520768833e-07, + "loss": 2.6695, + "step": 3114 + }, + { + "epoch": 7.05, + "learning_rate": 4.26473588862072e-07, + "loss": 2.3539, + "step": 3116 + }, + { + "epoch": 7.05, + "learning_rate": 4.252735745034243e-07, + "loss": 2.3112, + "step": 3118 + }, + { + "epoch": 7.06, + "learning_rate": 4.2407479470948495e-07, + "loss": 2.1182, + "step": 3120 + }, + { + "epoch": 7.06, + "learning_rate": 4.22877252055343e-07, + "loss": 2.1283, + "step": 3122 + }, + { + "epoch": 7.07, + "learning_rate": 4.2168094911342867e-07, + "loss": 2.2434, + "step": 3124 + }, + { + "epoch": 7.07, + "learning_rate": 4.2048588845350974e-07, + "loss": 2.3956, + "step": 3126 + }, + { + "epoch": 7.07, + "learning_rate": 4.192920726426853e-07, + "loss": 2.2923, + "step": 3128 + }, + { + "epoch": 7.08, + "learning_rate": 4.1809950424538043e-07, + "loss": 2.4565, + "step": 3130 + }, + { + "epoch": 7.08, + "learning_rate": 4.169081858233404e-07, + "loss": 2.3932, + "step": 3132 + }, + { + "epoch": 7.09, + "learning_rate": 4.157181199356262e-07, + "loss": 2.1525, + "step": 3134 + }, + { + "epoch": 7.09, + "learning_rate": 4.145293091386076e-07, + "loss": 2.1397, + "step": 3136 + }, + { + "epoch": 7.1, + "learning_rate": 4.1334175598595845e-07, + "loss": 2.2545, + "step": 3138 + }, + { + "epoch": 7.1, + "learning_rate": 4.121554630286506e-07, + "loss": 2.7604, + "step": 3140 + }, + { + "epoch": 7.11, + "learning_rate": 4.109704328149505e-07, + "loss": 2.1315, + "step": 3142 + }, + { + "epoch": 7.11, + "learning_rate": 4.097866678904096e-07, + "loss": 2.3746, + "step": 3144 + }, + { + "epoch": 7.12, + "learning_rate": 4.086041707978638e-07, + "loss": 2.262, + "step": 3146 + }, + { + "epoch": 7.12, + "learning_rate": 4.0742294407742386e-07, + "loss": 2.5298, + "step": 3148 + }, + { + "epoch": 7.12, + "learning_rate": 4.0624299026647335e-07, + "loss": 2.035, + "step": 3150 + }, + { + "epoch": 7.13, + "learning_rate": 4.050643118996593e-07, + "loss": 2.2675, + "step": 3152 + }, + { + "epoch": 7.13, + "learning_rate": 4.038869115088911e-07, + "loss": 2.2519, + "step": 3154 + }, + { + "epoch": 7.14, + "learning_rate": 4.0271079162333175e-07, + "loss": 2.474, + "step": 3156 + }, + { + "epoch": 7.14, + "learning_rate": 4.015359547693938e-07, + "loss": 2.5323, + "step": 3158 + }, + { + "epoch": 7.15, + "learning_rate": 4.003624034707336e-07, + "loss": 2.2379, + "step": 3160 + }, + { + "epoch": 7.15, + "learning_rate": 3.991901402482468e-07, + "loss": 2.2635, + "step": 3162 + }, + { + "epoch": 7.16, + "learning_rate": 3.9801916762006137e-07, + "loss": 2.6183, + "step": 3164 + }, + { + "epoch": 7.16, + "learning_rate": 3.9684948810153305e-07, + "loss": 2.5431, + "step": 3166 + }, + { + "epoch": 7.17, + "learning_rate": 3.9568110420523957e-07, + "loss": 2.3616, + "step": 3168 + }, + { + "epoch": 7.17, + "learning_rate": 3.945140184409772e-07, + "loss": 2.2793, + "step": 3170 + }, + { + "epoch": 7.17, + "learning_rate": 3.9334823331575086e-07, + "loss": 2.224, + "step": 3172 + }, + { + "epoch": 7.18, + "learning_rate": 3.9218375133377445e-07, + "loss": 2.3404, + "step": 3174 + }, + { + "epoch": 7.18, + "learning_rate": 3.910205749964608e-07, + "loss": 2.4168, + "step": 3176 + }, + { + "epoch": 7.19, + "learning_rate": 3.898587068024186e-07, + "loss": 2.3829, + "step": 3178 + }, + { + "epoch": 7.19, + "learning_rate": 3.8869814924744637e-07, + "loss": 2.4255, + "step": 3180 + }, + { + "epoch": 7.2, + "learning_rate": 3.875389048245281e-07, + "loss": 2.2857, + "step": 3182 + }, + { + "epoch": 7.2, + "learning_rate": 3.86380976023826e-07, + "loss": 2.5262, + "step": 3184 + }, + { + "epoch": 7.21, + "learning_rate": 3.8522436533267654e-07, + "loss": 2.2654, + "step": 3186 + }, + { + "epoch": 7.21, + "learning_rate": 3.8406907523558496e-07, + "loss": 2.2859, + "step": 3188 + }, + { + "epoch": 7.22, + "learning_rate": 3.829151082142196e-07, + "loss": 2.5575, + "step": 3190 + }, + { + "epoch": 7.22, + "learning_rate": 3.817624667474062e-07, + "loss": 2.1387, + "step": 3192 + }, + { + "epoch": 7.22, + "learning_rate": 3.806111533111245e-07, + "loss": 2.1788, + "step": 3194 + }, + { + "epoch": 7.23, + "learning_rate": 3.794611703785002e-07, + "loss": 2.3718, + "step": 3196 + }, + { + "epoch": 7.23, + "learning_rate": 3.7831252041980143e-07, + "loss": 2.4759, + "step": 3198 + }, + { + "epoch": 7.24, + "learning_rate": 3.771652059024328e-07, + "loss": 2.2483, + "step": 3200 + }, + { + "epoch": 7.24, + "learning_rate": 3.760192292909303e-07, + "loss": 2.1328, + "step": 3202 + }, + { + "epoch": 7.25, + "learning_rate": 3.7487459304695655e-07, + "loss": 2.521, + "step": 3204 + }, + { + "epoch": 7.25, + "learning_rate": 3.737312996292941e-07, + "loss": 2.1955, + "step": 3206 + }, + { + "epoch": 7.26, + "learning_rate": 3.725893514938414e-07, + "loss": 2.2915, + "step": 3208 + }, + { + "epoch": 7.26, + "learning_rate": 3.7144875109360675e-07, + "loss": 2.4738, + "step": 3210 + }, + { + "epoch": 7.26, + "learning_rate": 3.703095008787047e-07, + "loss": 2.2094, + "step": 3212 + }, + { + "epoch": 7.27, + "learning_rate": 3.6917160329634714e-07, + "loss": 2.3182, + "step": 3214 + }, + { + "epoch": 7.27, + "learning_rate": 3.6803506079084256e-07, + "loss": 2.1551, + "step": 3216 + }, + { + "epoch": 7.28, + "learning_rate": 3.6689987580358773e-07, + "loss": 2.1328, + "step": 3218 + }, + { + "epoch": 7.28, + "learning_rate": 3.657660507730631e-07, + "loss": 2.5026, + "step": 3220 + }, + { + "epoch": 7.29, + "learning_rate": 3.6463358813482805e-07, + "loss": 2.1657, + "step": 3222 + }, + { + "epoch": 7.29, + "learning_rate": 3.6350249032151594e-07, + "loss": 2.2007, + "step": 3224 + }, + { + "epoch": 7.3, + "learning_rate": 3.6237275976282753e-07, + "loss": 2.4264, + "step": 3226 + }, + { + "epoch": 7.3, + "learning_rate": 3.6124439888552725e-07, + "loss": 2.4743, + "step": 3228 + }, + { + "epoch": 7.31, + "learning_rate": 3.6011741011343645e-07, + "loss": 2.1036, + "step": 3230 + }, + { + "epoch": 7.31, + "learning_rate": 3.589917958674307e-07, + "loss": 2.572, + "step": 3232 + }, + { + "epoch": 7.31, + "learning_rate": 3.578675585654305e-07, + "loss": 2.0887, + "step": 3234 + }, + { + "epoch": 7.32, + "learning_rate": 3.5674470062240125e-07, + "loss": 2.4678, + "step": 3236 + }, + { + "epoch": 7.32, + "learning_rate": 3.556232244503432e-07, + "loss": 2.4499, + "step": 3238 + }, + { + "epoch": 7.33, + "learning_rate": 3.5450313245829034e-07, + "loss": 2.3848, + "step": 3240 + }, + { + "epoch": 7.33, + "learning_rate": 3.5338442705230097e-07, + "loss": 2.1436, + "step": 3242 + }, + { + "epoch": 7.34, + "learning_rate": 3.522671106354572e-07, + "loss": 2.5811, + "step": 3244 + }, + { + "epoch": 7.34, + "learning_rate": 3.5115118560785607e-07, + "loss": 2.5358, + "step": 3246 + }, + { + "epoch": 7.35, + "learning_rate": 3.5003665436660636e-07, + "loss": 2.4689, + "step": 3248 + }, + { + "epoch": 7.35, + "learning_rate": 3.489235193058222e-07, + "loss": 2.2282, + "step": 3250 + }, + { + "epoch": 7.36, + "learning_rate": 3.478117828166197e-07, + "loss": 2.2035, + "step": 3252 + }, + { + "epoch": 7.36, + "learning_rate": 3.467014472871099e-07, + "loss": 2.1824, + "step": 3254 + }, + { + "epoch": 7.36, + "learning_rate": 3.455925151023946e-07, + "loss": 2.3074, + "step": 3256 + }, + { + "epoch": 7.37, + "learning_rate": 3.444849886445612e-07, + "loss": 2.1646, + "step": 3258 + }, + { + "epoch": 7.37, + "learning_rate": 3.433788702926772e-07, + "loss": 2.2208, + "step": 3260 + }, + { + "epoch": 7.38, + "learning_rate": 3.422741624227855e-07, + "loss": 2.397, + "step": 3262 + }, + { + "epoch": 7.38, + "learning_rate": 3.4117086740789967e-07, + "loss": 2.2592, + "step": 3264 + }, + { + "epoch": 7.39, + "learning_rate": 3.4006898761799776e-07, + "loss": 2.4963, + "step": 3266 + }, + { + "epoch": 7.39, + "learning_rate": 3.3896852542001777e-07, + "loss": 2.5038, + "step": 3268 + }, + { + "epoch": 7.4, + "learning_rate": 3.378694831778527e-07, + "loss": 2.4851, + "step": 3270 + }, + { + "epoch": 7.4, + "learning_rate": 3.367718632523452e-07, + "loss": 2.43, + "step": 3272 + }, + { + "epoch": 7.41, + "learning_rate": 3.356756680012833e-07, + "loss": 2.5678, + "step": 3274 + }, + { + "epoch": 7.41, + "learning_rate": 3.345808997793942e-07, + "loss": 2.1539, + "step": 3276 + }, + { + "epoch": 7.41, + "learning_rate": 3.3348756093833965e-07, + "loss": 2.1828, + "step": 3278 + }, + { + "epoch": 7.42, + "learning_rate": 3.3239565382671087e-07, + "loss": 2.4646, + "step": 3280 + }, + { + "epoch": 7.42, + "learning_rate": 3.313051807900239e-07, + "loss": 2.1353, + "step": 3282 + }, + { + "epoch": 7.43, + "learning_rate": 3.30216144170714e-07, + "loss": 2.4583, + "step": 3284 + }, + { + "epoch": 7.43, + "learning_rate": 3.2912854630813137e-07, + "loss": 2.2403, + "step": 3286 + }, + { + "epoch": 7.44, + "learning_rate": 3.2804238953853524e-07, + "loss": 2.5755, + "step": 3288 + }, + { + "epoch": 7.44, + "learning_rate": 3.2695767619508905e-07, + "loss": 2.4918, + "step": 3290 + }, + { + "epoch": 7.45, + "learning_rate": 3.2587440860785567e-07, + "loss": 2.4181, + "step": 3292 + }, + { + "epoch": 7.45, + "learning_rate": 3.2479258910379347e-07, + "loss": 2.1952, + "step": 3294 + }, + { + "epoch": 7.45, + "learning_rate": 3.237122200067479e-07, + "loss": 2.3317, + "step": 3296 + }, + { + "epoch": 7.46, + "learning_rate": 3.2263330363745136e-07, + "loss": 2.0153, + "step": 3298 + }, + { + "epoch": 7.46, + "learning_rate": 3.215558423135136e-07, + "loss": 2.2844, + "step": 3300 + }, + { + "epoch": 7.47, + "learning_rate": 3.2047983834942085e-07, + "loss": 2.4493, + "step": 3302 + }, + { + "epoch": 7.47, + "learning_rate": 3.1940529405652627e-07, + "loss": 2.4057, + "step": 3304 + }, + { + "epoch": 7.48, + "learning_rate": 3.183322117430497e-07, + "loss": 2.1862, + "step": 3306 + }, + { + "epoch": 7.48, + "learning_rate": 3.1726059371406953e-07, + "loss": 2.6167, + "step": 3308 + }, + { + "epoch": 7.49, + "learning_rate": 3.1619044227151894e-07, + "loss": 2.4327, + "step": 3310 + }, + { + "epoch": 7.49, + "learning_rate": 3.151217597141802e-07, + "loss": 2.264, + "step": 3312 + }, + { + "epoch": 7.5, + "learning_rate": 3.140545483376815e-07, + "loss": 2.1945, + "step": 3314 + }, + { + "epoch": 7.5, + "learning_rate": 3.1298881043448976e-07, + "loss": 2.5421, + "step": 3316 + }, + { + "epoch": 7.5, + "learning_rate": 3.11924548293907e-07, + "loss": 2.1653, + "step": 3318 + }, + { + "epoch": 7.51, + "learning_rate": 3.108617642020651e-07, + "loss": 2.3909, + "step": 3320 + }, + { + "epoch": 7.51, + "learning_rate": 3.0980046044192187e-07, + "loss": 2.5704, + "step": 3322 + }, + { + "epoch": 7.52, + "learning_rate": 3.087406392932532e-07, + "loss": 2.3593, + "step": 3324 + }, + { + "epoch": 7.52, + "learning_rate": 3.0768230303265254e-07, + "loss": 2.1842, + "step": 3326 + }, + { + "epoch": 7.53, + "learning_rate": 3.066254539335218e-07, + "loss": 2.2524, + "step": 3328 + }, + { + "epoch": 7.53, + "learning_rate": 3.0557009426607007e-07, + "loss": 2.4896, + "step": 3330 + }, + { + "epoch": 7.54, + "learning_rate": 3.045162262973048e-07, + "loss": 2.3859, + "step": 3332 + }, + { + "epoch": 7.54, + "learning_rate": 3.0346385229103125e-07, + "loss": 2.2594, + "step": 3334 + }, + { + "epoch": 7.55, + "learning_rate": 3.0241297450784443e-07, + "loss": 2.3181, + "step": 3336 + }, + { + "epoch": 7.55, + "learning_rate": 3.0136359520512545e-07, + "loss": 2.2603, + "step": 3338 + }, + { + "epoch": 7.55, + "learning_rate": 3.003157166370367e-07, + "loss": 2.2832, + "step": 3340 + }, + { + "epoch": 7.56, + "learning_rate": 2.992693410545165e-07, + "loss": 2.1501, + "step": 3342 + }, + { + "epoch": 7.56, + "learning_rate": 2.9822447070527547e-07, + "loss": 2.5098, + "step": 3344 + }, + { + "epoch": 7.57, + "learning_rate": 2.971811078337899e-07, + "loss": 2.3524, + "step": 3346 + }, + { + "epoch": 7.57, + "learning_rate": 2.961392546812983e-07, + "loss": 2.5895, + "step": 3348 + }, + { + "epoch": 7.58, + "learning_rate": 2.950989134857963e-07, + "loss": 2.3269, + "step": 3350 + }, + { + "epoch": 7.58, + "learning_rate": 2.940600864820314e-07, + "loss": 2.4572, + "step": 3352 + }, + { + "epoch": 7.59, + "learning_rate": 2.9302277590149835e-07, + "loss": 2.3999, + "step": 3354 + }, + { + "epoch": 7.59, + "learning_rate": 2.9198698397243535e-07, + "loss": 2.4635, + "step": 3356 + }, + { + "epoch": 7.6, + "learning_rate": 2.9095271291981747e-07, + "loss": 2.3415, + "step": 3358 + }, + { + "epoch": 7.6, + "learning_rate": 2.899199649653532e-07, + "loss": 2.3258, + "step": 3360 + }, + { + "epoch": 7.6, + "learning_rate": 2.8888874232747896e-07, + "loss": 2.227, + "step": 3362 + }, + { + "epoch": 7.61, + "learning_rate": 2.8785904722135567e-07, + "loss": 2.3294, + "step": 3364 + }, + { + "epoch": 7.61, + "learning_rate": 2.868308818588611e-07, + "loss": 2.5475, + "step": 3366 + }, + { + "epoch": 7.62, + "learning_rate": 2.8580424844858884e-07, + "loss": 2.5204, + "step": 3368 + }, + { + "epoch": 7.62, + "learning_rate": 2.8477914919584076e-07, + "loss": 2.2726, + "step": 3370 + }, + { + "epoch": 7.63, + "learning_rate": 2.837555863026236e-07, + "loss": 2.2483, + "step": 3372 + }, + { + "epoch": 7.63, + "learning_rate": 2.8273356196764307e-07, + "loss": 2.3288, + "step": 3374 + }, + { + "epoch": 7.64, + "learning_rate": 2.8171307838630144e-07, + "loss": 2.4139, + "step": 3376 + }, + { + "epoch": 7.64, + "learning_rate": 2.8069413775068986e-07, + "loss": 2.4272, + "step": 3378 + }, + { + "epoch": 7.64, + "learning_rate": 2.796767422495856e-07, + "loss": 2.1728, + "step": 3380 + }, + { + "epoch": 7.65, + "learning_rate": 2.786608940684467e-07, + "loss": 2.1175, + "step": 3382 + }, + { + "epoch": 7.65, + "learning_rate": 2.776465953894085e-07, + "loss": 2.3987, + "step": 3384 + }, + { + "epoch": 7.66, + "learning_rate": 2.766338483912757e-07, + "loss": 2.4435, + "step": 3386 + }, + { + "epoch": 7.66, + "learning_rate": 2.756226552495219e-07, + "loss": 2.2848, + "step": 3388 + }, + { + "epoch": 7.67, + "learning_rate": 2.7461301813628155e-07, + "loss": 2.3723, + "step": 3390 + }, + { + "epoch": 7.67, + "learning_rate": 2.73604939220348e-07, + "loss": 2.3791, + "step": 3392 + }, + { + "epoch": 7.68, + "learning_rate": 2.7259842066716543e-07, + "loss": 2.4471, + "step": 3394 + }, + { + "epoch": 7.68, + "learning_rate": 2.7159346463882826e-07, + "loss": 2.2133, + "step": 3396 + }, + { + "epoch": 7.69, + "learning_rate": 2.7059007329407326e-07, + "loss": 2.3595, + "step": 3398 + }, + { + "epoch": 7.69, + "learning_rate": 2.6958824878827635e-07, + "loss": 2.3127, + "step": 3400 + }, + { + "epoch": 7.69, + "learning_rate": 2.685879932734476e-07, + "loss": 2.3173, + "step": 3402 + }, + { + "epoch": 7.7, + "learning_rate": 2.675893088982275e-07, + "loss": 2.1802, + "step": 3404 + }, + { + "epoch": 7.7, + "learning_rate": 2.665921978078809e-07, + "loss": 2.3848, + "step": 3406 + }, + { + "epoch": 7.71, + "learning_rate": 2.6559666214429297e-07, + "loss": 2.3093, + "step": 3408 + }, + { + "epoch": 7.71, + "learning_rate": 2.646027040459651e-07, + "loss": 2.1086, + "step": 3410 + }, + { + "epoch": 7.72, + "learning_rate": 2.636103256480102e-07, + "loss": 2.4374, + "step": 3412 + }, + { + "epoch": 7.72, + "learning_rate": 2.626195290821467e-07, + "loss": 2.2971, + "step": 3414 + }, + { + "epoch": 7.73, + "learning_rate": 2.616303164766965e-07, + "loss": 2.42, + "step": 3416 + }, + { + "epoch": 7.73, + "learning_rate": 2.6064268995657823e-07, + "loss": 2.3013, + "step": 3418 + }, + { + "epoch": 7.74, + "learning_rate": 2.5965665164330354e-07, + "loss": 2.3163, + "step": 3420 + }, + { + "epoch": 7.74, + "learning_rate": 2.5867220365497254e-07, + "loss": 2.376, + "step": 3422 + }, + { + "epoch": 7.74, + "learning_rate": 2.5768934810626885e-07, + "loss": 2.3108, + "step": 3424 + }, + { + "epoch": 7.75, + "learning_rate": 2.5670808710845624e-07, + "loss": 2.4786, + "step": 3426 + }, + { + "epoch": 7.75, + "learning_rate": 2.557284227693727e-07, + "loss": 2.1118, + "step": 3428 + }, + { + "epoch": 7.76, + "learning_rate": 2.5475035719342606e-07, + "loss": 2.444, + "step": 3430 + }, + { + "epoch": 7.76, + "learning_rate": 2.5377389248159033e-07, + "loss": 2.1952, + "step": 3432 + }, + { + "epoch": 7.77, + "learning_rate": 2.527990307314013e-07, + "loss": 2.283, + "step": 3434 + }, + { + "epoch": 7.77, + "learning_rate": 2.518257740369497e-07, + "loss": 2.3963, + "step": 3436 + }, + { + "epoch": 7.78, + "learning_rate": 2.5085412448888045e-07, + "loss": 2.1796, + "step": 3438 + }, + { + "epoch": 7.78, + "learning_rate": 2.49884084174385e-07, + "loss": 2.3689, + "step": 3440 + }, + { + "epoch": 7.79, + "learning_rate": 2.489156551771981e-07, + "loss": 2.4479, + "step": 3442 + }, + { + "epoch": 7.79, + "learning_rate": 2.479488395775935e-07, + "loss": 2.2417, + "step": 3444 + }, + { + "epoch": 7.79, + "learning_rate": 2.469836394523794e-07, + "loss": 2.4555, + "step": 3446 + }, + { + "epoch": 7.8, + "learning_rate": 2.460200568748936e-07, + "loss": 2.7688, + "step": 3448 + }, + { + "epoch": 7.8, + "learning_rate": 2.45058093914999e-07, + "loss": 2.7413, + "step": 3450 + }, + { + "epoch": 7.81, + "learning_rate": 2.440977526390795e-07, + "loss": 2.3979, + "step": 3452 + }, + { + "epoch": 7.81, + "learning_rate": 2.431390351100364e-07, + "loss": 2.3415, + "step": 3454 + }, + { + "epoch": 7.82, + "learning_rate": 2.421819433872813e-07, + "loss": 2.6412, + "step": 3456 + }, + { + "epoch": 7.82, + "learning_rate": 2.4122647952673504e-07, + "loss": 2.4298, + "step": 3458 + }, + { + "epoch": 7.83, + "learning_rate": 2.4027264558082085e-07, + "loss": 2.186, + "step": 3460 + }, + { + "epoch": 7.83, + "learning_rate": 2.393204435984608e-07, + "loss": 2.3957, + "step": 3462 + }, + { + "epoch": 7.83, + "learning_rate": 2.3836987562507138e-07, + "loss": 2.5346, + "step": 3464 + }, + { + "epoch": 7.84, + "learning_rate": 2.3742094370255972e-07, + "loss": 2.325, + "step": 3466 + }, + { + "epoch": 7.84, + "learning_rate": 2.364736498693175e-07, + "loss": 2.4938, + "step": 3468 + }, + { + "epoch": 7.85, + "learning_rate": 2.3552799616021845e-07, + "loss": 2.4997, + "step": 3470 + }, + { + "epoch": 7.85, + "learning_rate": 2.3458398460661233e-07, + "loss": 2.3212, + "step": 3472 + }, + { + "epoch": 7.86, + "learning_rate": 2.3364161723632302e-07, + "loss": 2.3675, + "step": 3474 + }, + { + "epoch": 7.86, + "learning_rate": 2.3270089607364018e-07, + "loss": 2.1652, + "step": 3476 + }, + { + "epoch": 7.87, + "learning_rate": 2.3176182313931948e-07, + "loss": 2.4536, + "step": 3478 + }, + { + "epoch": 7.87, + "learning_rate": 2.308244004505745e-07, + "loss": 2.2274, + "step": 3480 + }, + { + "epoch": 7.88, + "learning_rate": 2.2988863002107538e-07, + "loss": 2.4275, + "step": 3482 + }, + { + "epoch": 7.88, + "learning_rate": 2.2895451386094124e-07, + "loss": 2.1462, + "step": 3484 + }, + { + "epoch": 7.88, + "learning_rate": 2.2802205397673935e-07, + "loss": 2.4634, + "step": 3486 + }, + { + "epoch": 7.89, + "learning_rate": 2.270912523714782e-07, + "loss": 2.435, + "step": 3488 + }, + { + "epoch": 7.89, + "learning_rate": 2.2616211104460459e-07, + "loss": 2.4373, + "step": 3490 + }, + { + "epoch": 7.9, + "learning_rate": 2.2523463199199822e-07, + "loss": 2.3186, + "step": 3492 + }, + { + "epoch": 7.9, + "learning_rate": 2.2430881720596927e-07, + "loss": 2.44, + "step": 3494 + }, + { + "epoch": 7.91, + "learning_rate": 2.2338466867525196e-07, + "loss": 2.4308, + "step": 3496 + }, + { + "epoch": 7.91, + "learning_rate": 2.2246218838500143e-07, + "loss": 2.2673, + "step": 3498 + }, + { + "epoch": 7.92, + "learning_rate": 2.2154137831678932e-07, + "loss": 2.2352, + "step": 3500 + }, + { + "epoch": 7.92, + "learning_rate": 2.2062224044859966e-07, + "loss": 2.202, + "step": 3502 + }, + { + "epoch": 7.93, + "learning_rate": 2.1970477675482423e-07, + "loss": 2.2827, + "step": 3504 + }, + { + "epoch": 7.93, + "learning_rate": 2.1878898920625832e-07, + "loss": 2.1762, + "step": 3506 + }, + { + "epoch": 7.93, + "learning_rate": 2.1787487977009765e-07, + "loss": 2.474, + "step": 3508 + }, + { + "epoch": 7.94, + "learning_rate": 2.169624504099321e-07, + "loss": 2.2976, + "step": 3510 + }, + { + "epoch": 7.94, + "learning_rate": 2.1605170308574318e-07, + "loss": 2.1453, + "step": 3512 + }, + { + "epoch": 7.95, + "learning_rate": 2.1514263975389879e-07, + "loss": 2.1924, + "step": 3514 + }, + { + "epoch": 7.95, + "learning_rate": 2.1423526236715052e-07, + "loss": 2.2609, + "step": 3516 + }, + { + "epoch": 7.96, + "learning_rate": 2.1378220651243138e-07, + "loss": 2.4962, + "step": 3518 + }, + { + "epoch": 7.96, + "learning_rate": 2.128773616968108e-07, + "loss": 2.186, + "step": 3520 + }, + { + "epoch": 7.97, + "learning_rate": 2.1197420769230912e-07, + "loss": 2.2364, + "step": 3522 + }, + { + "epoch": 7.97, + "learning_rate": 2.110727464389843e-07, + "loss": 2.5511, + "step": 3524 + }, + { + "epoch": 7.98, + "learning_rate": 2.101729798732561e-07, + "loss": 2.1022, + "step": 3526 + }, + { + "epoch": 7.98, + "learning_rate": 2.0927490992790587e-07, + "loss": 2.501, + "step": 3528 + }, + { + "epoch": 7.98, + "learning_rate": 2.0837853853206934e-07, + "loss": 2.3158, + "step": 3530 + }, + { + "epoch": 7.99, + "learning_rate": 2.0748386761123404e-07, + "loss": 2.3638, + "step": 3532 + }, + { + "epoch": 7.99, + "learning_rate": 2.0659089908723415e-07, + "loss": 2.2822, + "step": 3534 + }, + { + "epoch": 8.0, + "learning_rate": 2.0614505382362125e-07, + "loss": 2.4747, + "step": 3536 + }, + { + "epoch": 8.0, + "learning_rate": 2.0525464249031454e-07, + "loss": 2.1788, + "step": 3538 + }, + { + "epoch": 8.01, + "learning_rate": 2.043659383424222e-07, + "loss": 2.0549, + "step": 3540 + }, + { + "epoch": 8.01, + "learning_rate": 2.0347894328896177e-07, + "loss": 2.1212, + "step": 3542 + }, + { + "epoch": 8.02, + "learning_rate": 2.0259365923527894e-07, + "loss": 2.4062, + "step": 3544 + }, + { + "epoch": 8.02, + "learning_rate": 2.0171008808304547e-07, + "loss": 2.3898, + "step": 3546 + }, + { + "epoch": 8.02, + "learning_rate": 2.0082823173025142e-07, + "loss": 2.2393, + "step": 3548 + }, + { + "epoch": 8.03, + "learning_rate": 1.9994809207120556e-07, + "loss": 2.1458, + "step": 3550 + }, + { + "epoch": 8.03, + "learning_rate": 1.9906967099652771e-07, + "loss": 2.3189, + "step": 3552 + }, + { + "epoch": 8.04, + "learning_rate": 1.981929703931463e-07, + "loss": 2.1485, + "step": 3554 + }, + { + "epoch": 8.04, + "learning_rate": 1.973179921442938e-07, + "loss": 2.2943, + "step": 3556 + }, + { + "epoch": 8.05, + "learning_rate": 1.964447381295038e-07, + "loss": 2.2134, + "step": 3558 + }, + { + "epoch": 8.05, + "learning_rate": 1.9557321022460505e-07, + "loss": 2.2823, + "step": 3560 + }, + { + "epoch": 8.06, + "learning_rate": 1.9470341030171898e-07, + "loss": 2.5778, + "step": 3562 + }, + { + "epoch": 8.06, + "learning_rate": 1.9383534022925484e-07, + "loss": 2.3283, + "step": 3564 + }, + { + "epoch": 8.07, + "learning_rate": 1.929690018719069e-07, + "loss": 2.0778, + "step": 3566 + }, + { + "epoch": 8.07, + "learning_rate": 1.9210439709064785e-07, + "loss": 2.3643, + "step": 3568 + }, + { + "epoch": 8.07, + "learning_rate": 1.9124152774272827e-07, + "loss": 2.1915, + "step": 3570 + }, + { + "epoch": 8.08, + "learning_rate": 1.9038039568166974e-07, + "loss": 2.372, + "step": 3572 + }, + { + "epoch": 8.08, + "learning_rate": 1.8952100275726268e-07, + "loss": 2.1317, + "step": 3574 + }, + { + "epoch": 8.09, + "learning_rate": 1.8866335081556072e-07, + "loss": 2.1938, + "step": 3576 + }, + { + "epoch": 8.09, + "learning_rate": 1.8780744169887918e-07, + "loss": 2.3531, + "step": 3578 + }, + { + "epoch": 8.1, + "learning_rate": 1.8695327724578836e-07, + "loss": 2.0558, + "step": 3580 + }, + { + "epoch": 8.1, + "learning_rate": 1.861008592911113e-07, + "loss": 2.1199, + "step": 3582 + }, + { + "epoch": 8.11, + "learning_rate": 1.8525018966591953e-07, + "loss": 2.1674, + "step": 3584 + }, + { + "epoch": 8.11, + "learning_rate": 1.844012701975286e-07, + "loss": 2.5179, + "step": 3586 + }, + { + "epoch": 8.12, + "learning_rate": 1.8355410270949545e-07, + "loss": 2.2588, + "step": 3588 + }, + { + "epoch": 8.12, + "learning_rate": 1.8270868902161219e-07, + "loss": 2.4867, + "step": 3590 + }, + { + "epoch": 8.12, + "learning_rate": 1.8186503094990512e-07, + "loss": 2.5123, + "step": 3592 + }, + { + "epoch": 8.13, + "learning_rate": 1.810231303066283e-07, + "loss": 2.067, + "step": 3594 + }, + { + "epoch": 8.13, + "learning_rate": 1.8018298890026106e-07, + "loss": 2.1489, + "step": 3596 + }, + { + "epoch": 8.14, + "learning_rate": 1.7934460853550336e-07, + "loss": 2.4436, + "step": 3598 + }, + { + "epoch": 8.14, + "learning_rate": 1.7850799101327318e-07, + "loss": 2.6137, + "step": 3600 + }, + { + "epoch": 8.15, + "learning_rate": 1.7767313813070084e-07, + "loss": 2.4861, + "step": 3602 + }, + { + "epoch": 8.15, + "learning_rate": 1.7684005168112647e-07, + "loss": 2.2893, + "step": 3604 + }, + { + "epoch": 8.16, + "learning_rate": 1.7600873345409517e-07, + "loss": 2.3534, + "step": 3606 + }, + { + "epoch": 8.16, + "learning_rate": 1.7517918523535514e-07, + "loss": 2.479, + "step": 3608 + }, + { + "epoch": 8.17, + "learning_rate": 1.743514088068504e-07, + "loss": 2.2013, + "step": 3610 + }, + { + "epoch": 8.17, + "learning_rate": 1.7352540594672105e-07, + "loss": 2.4611, + "step": 3612 + }, + { + "epoch": 8.17, + "learning_rate": 1.7270117842929577e-07, + "loss": 2.2597, + "step": 3614 + }, + { + "epoch": 8.18, + "learning_rate": 1.7187872802509117e-07, + "loss": 2.5705, + "step": 3616 + }, + { + "epoch": 8.18, + "learning_rate": 1.7105805650080462e-07, + "loss": 2.1431, + "step": 3618 + }, + { + "epoch": 8.19, + "learning_rate": 1.7023916561931405e-07, + "loss": 2.3859, + "step": 3620 + }, + { + "epoch": 8.19, + "learning_rate": 1.6942205713967138e-07, + "loss": 2.4055, + "step": 3622 + }, + { + "epoch": 8.2, + "learning_rate": 1.6860673281710013e-07, + "loss": 2.2329, + "step": 3624 + }, + { + "epoch": 8.2, + "learning_rate": 1.677931944029909e-07, + "loss": 2.5085, + "step": 3626 + }, + { + "epoch": 8.21, + "learning_rate": 1.6698144364489875e-07, + "loss": 2.3044, + "step": 3628 + }, + { + "epoch": 8.21, + "learning_rate": 1.661714822865381e-07, + "loss": 2.5598, + "step": 3630 + }, + { + "epoch": 8.21, + "learning_rate": 1.6536331206777965e-07, + "loss": 2.4912, + "step": 3632 + }, + { + "epoch": 8.22, + "learning_rate": 1.6455693472464638e-07, + "loss": 2.1866, + "step": 3634 + }, + { + "epoch": 8.22, + "learning_rate": 1.6375235198931113e-07, + "loss": 2.245, + "step": 3636 + }, + { + "epoch": 8.23, + "learning_rate": 1.6294956559008976e-07, + "loss": 2.3741, + "step": 3638 + }, + { + "epoch": 8.23, + "learning_rate": 1.6214857725144137e-07, + "loss": 2.4793, + "step": 3640 + }, + { + "epoch": 8.24, + "learning_rate": 1.6134938869396163e-07, + "loss": 2.3812, + "step": 3642 + }, + { + "epoch": 8.24, + "learning_rate": 1.6055200163438032e-07, + "loss": 2.203, + "step": 3644 + }, + { + "epoch": 8.25, + "learning_rate": 1.5975641778555738e-07, + "loss": 2.4009, + "step": 3646 + }, + { + "epoch": 8.25, + "learning_rate": 1.5896263885647964e-07, + "loss": 2.2717, + "step": 3648 + }, + { + "epoch": 8.26, + "learning_rate": 1.581706665522564e-07, + "loss": 2.3982, + "step": 3650 + }, + { + "epoch": 8.26, + "learning_rate": 1.5738050257411628e-07, + "loss": 2.2986, + "step": 3652 + }, + { + "epoch": 8.26, + "learning_rate": 1.5659214861940352e-07, + "loss": 2.3759, + "step": 3654 + }, + { + "epoch": 8.27, + "learning_rate": 1.5580560638157425e-07, + "loss": 2.7631, + "step": 3656 + }, + { + "epoch": 8.27, + "learning_rate": 1.5502087755019245e-07, + "loss": 2.4175, + "step": 3658 + }, + { + "epoch": 8.28, + "learning_rate": 1.542379638109278e-07, + "loss": 2.4338, + "step": 3660 + }, + { + "epoch": 8.28, + "learning_rate": 1.5345686684554994e-07, + "loss": 2.4166, + "step": 3662 + }, + { + "epoch": 8.29, + "learning_rate": 1.5267758833192625e-07, + "loss": 2.1653, + "step": 3664 + }, + { + "epoch": 8.29, + "learning_rate": 1.5190012994401814e-07, + "loss": 2.2565, + "step": 3666 + }, + { + "epoch": 8.3, + "learning_rate": 1.511244933518766e-07, + "loss": 2.2848, + "step": 3668 + }, + { + "epoch": 8.3, + "learning_rate": 1.5035068022164033e-07, + "loss": 2.4534, + "step": 3670 + }, + { + "epoch": 8.31, + "learning_rate": 1.4957869221552998e-07, + "loss": 2.4818, + "step": 3672 + }, + { + "epoch": 8.31, + "learning_rate": 1.4880853099184631e-07, + "loss": 2.5356, + "step": 3674 + }, + { + "epoch": 8.31, + "learning_rate": 1.480401982049655e-07, + "loss": 2.2729, + "step": 3676 + }, + { + "epoch": 8.32, + "learning_rate": 1.4727369550533687e-07, + "loss": 2.3927, + "step": 3678 + }, + { + "epoch": 8.32, + "learning_rate": 1.4650902453947734e-07, + "loss": 2.2466, + "step": 3680 + }, + { + "epoch": 8.33, + "learning_rate": 1.4574618694997032e-07, + "loss": 2.5247, + "step": 3682 + }, + { + "epoch": 8.33, + "learning_rate": 1.4498518437546025e-07, + "loss": 2.3951, + "step": 3684 + }, + { + "epoch": 8.34, + "learning_rate": 1.4422601845065009e-07, + "loss": 2.2343, + "step": 3686 + }, + { + "epoch": 8.34, + "learning_rate": 1.4346869080629698e-07, + "loss": 2.3271, + "step": 3688 + }, + { + "epoch": 8.35, + "learning_rate": 1.427132030692102e-07, + "loss": 2.4791, + "step": 3690 + }, + { + "epoch": 8.35, + "learning_rate": 1.4195955686224604e-07, + "loss": 2.3011, + "step": 3692 + }, + { + "epoch": 8.36, + "learning_rate": 1.4120775380430517e-07, + "loss": 2.5667, + "step": 3694 + }, + { + "epoch": 8.36, + "learning_rate": 1.4045779551032877e-07, + "loss": 2.2304, + "step": 3696 + }, + { + "epoch": 8.36, + "learning_rate": 1.3970968359129632e-07, + "loss": 2.2826, + "step": 3698 + }, + { + "epoch": 8.37, + "learning_rate": 1.3896341965421943e-07, + "loss": 2.5231, + "step": 3700 + }, + { + "epoch": 8.37, + "learning_rate": 1.382190053021416e-07, + "loss": 2.0304, + "step": 3702 + }, + { + "epoch": 8.38, + "learning_rate": 1.3747644213413224e-07, + "loss": 2.4519, + "step": 3704 + }, + { + "epoch": 8.38, + "learning_rate": 1.367357317452854e-07, + "loss": 2.3202, + "step": 3706 + }, + { + "epoch": 8.39, + "learning_rate": 1.3599687572671358e-07, + "loss": 2.3184, + "step": 3708 + }, + { + "epoch": 8.39, + "learning_rate": 1.352598756655474e-07, + "loss": 2.0781, + "step": 3710 + }, + { + "epoch": 8.4, + "learning_rate": 1.3452473314493007e-07, + "loss": 2.3513, + "step": 3712 + }, + { + "epoch": 8.4, + "learning_rate": 1.3379144974401436e-07, + "loss": 2.2974, + "step": 3714 + }, + { + "epoch": 8.4, + "learning_rate": 1.3306002703795983e-07, + "loss": 2.2226, + "step": 3716 + }, + { + "epoch": 8.41, + "learning_rate": 1.3233046659792947e-07, + "loss": 2.2723, + "step": 3718 + }, + { + "epoch": 8.41, + "learning_rate": 1.3160276999108533e-07, + "loss": 2.3053, + "step": 3720 + }, + { + "epoch": 8.42, + "learning_rate": 1.3087693878058591e-07, + "loss": 2.4518, + "step": 3722 + }, + { + "epoch": 8.42, + "learning_rate": 1.3015297452558283e-07, + "loss": 2.2635, + "step": 3724 + }, + { + "epoch": 8.43, + "learning_rate": 1.2943087878121727e-07, + "loss": 2.2041, + "step": 3726 + }, + { + "epoch": 8.43, + "learning_rate": 1.287106530986165e-07, + "loss": 2.196, + "step": 3728 + }, + { + "epoch": 8.44, + "learning_rate": 1.2799229902489117e-07, + "loss": 2.3408, + "step": 3730 + }, + { + "epoch": 8.44, + "learning_rate": 1.2727581810313114e-07, + "loss": 2.5223, + "step": 3732 + }, + { + "epoch": 8.45, + "learning_rate": 1.2656121187240266e-07, + "loss": 2.4009, + "step": 3734 + }, + { + "epoch": 8.45, + "learning_rate": 1.25848481867745e-07, + "loss": 2.4088, + "step": 3736 + }, + { + "epoch": 8.45, + "learning_rate": 1.2513762962016695e-07, + "loss": 2.3094, + "step": 3738 + }, + { + "epoch": 8.46, + "learning_rate": 1.2442865665664414e-07, + "loss": 2.354, + "step": 3740 + }, + { + "epoch": 8.46, + "learning_rate": 1.2372156450011518e-07, + "loss": 2.5011, + "step": 3742 + }, + { + "epoch": 8.47, + "learning_rate": 1.2301635466947812e-07, + "loss": 2.5396, + "step": 3744 + }, + { + "epoch": 8.47, + "learning_rate": 1.2231302867958805e-07, + "loss": 2.2474, + "step": 3746 + }, + { + "epoch": 8.48, + "learning_rate": 1.2161158804125316e-07, + "loss": 2.141, + "step": 3748 + }, + { + "epoch": 8.48, + "learning_rate": 1.2091203426123175e-07, + "loss": 1.9833, + "step": 3750 + }, + { + "epoch": 8.49, + "learning_rate": 1.2021436884222946e-07, + "loss": 2.4734, + "step": 3752 + }, + { + "epoch": 8.49, + "learning_rate": 1.1951859328289493e-07, + "loss": 2.401, + "step": 3754 + }, + { + "epoch": 8.5, + "learning_rate": 1.1882470907781727e-07, + "loss": 2.3442, + "step": 3756 + }, + { + "epoch": 8.5, + "learning_rate": 1.1813271771752298e-07, + "loss": 2.3096, + "step": 3758 + }, + { + "epoch": 8.5, + "learning_rate": 1.17442620688473e-07, + "loss": 2.3408, + "step": 3760 + }, + { + "epoch": 8.51, + "learning_rate": 1.1675441947305787e-07, + "loss": 2.3648, + "step": 3762 + }, + { + "epoch": 8.51, + "learning_rate": 1.1606811554959706e-07, + "loss": 2.0118, + "step": 3764 + }, + { + "epoch": 8.52, + "learning_rate": 1.1538371039233341e-07, + "loss": 2.5944, + "step": 3766 + }, + { + "epoch": 8.52, + "learning_rate": 1.1470120547143236e-07, + "loss": 2.4937, + "step": 3768 + }, + { + "epoch": 8.53, + "learning_rate": 1.1402060225297561e-07, + "loss": 2.3807, + "step": 3770 + }, + { + "epoch": 8.53, + "learning_rate": 1.1334190219896155e-07, + "loss": 2.5853, + "step": 3772 + }, + { + "epoch": 8.54, + "learning_rate": 1.1266510676729956e-07, + "loss": 2.4372, + "step": 3774 + }, + { + "epoch": 8.54, + "learning_rate": 1.1199021741180781e-07, + "loss": 2.3959, + "step": 3776 + }, + { + "epoch": 8.55, + "learning_rate": 1.1131723558220996e-07, + "loss": 2.3788, + "step": 3778 + }, + { + "epoch": 8.55, + "learning_rate": 1.1064616272413262e-07, + "loss": 2.4389, + "step": 3780 + }, + { + "epoch": 8.55, + "learning_rate": 1.0997700027910128e-07, + "loss": 2.1827, + "step": 3782 + }, + { + "epoch": 8.56, + "learning_rate": 1.0930974968453777e-07, + "loss": 2.3847, + "step": 3784 + }, + { + "epoch": 8.56, + "learning_rate": 1.0864441237375698e-07, + "loss": 2.2319, + "step": 3786 + }, + { + "epoch": 8.57, + "learning_rate": 1.0798098977596448e-07, + "loss": 2.1224, + "step": 3788 + }, + { + "epoch": 8.57, + "learning_rate": 1.0731948331625195e-07, + "loss": 2.3572, + "step": 3790 + }, + { + "epoch": 8.58, + "learning_rate": 1.0665989441559586e-07, + "loss": 2.2529, + "step": 3792 + }, + { + "epoch": 8.58, + "learning_rate": 1.0600222449085294e-07, + "loss": 2.4526, + "step": 3794 + }, + { + "epoch": 8.59, + "learning_rate": 1.0534647495475857e-07, + "loss": 2.1667, + "step": 3796 + }, + { + "epoch": 8.59, + "learning_rate": 1.0469264721592197e-07, + "loss": 2.6476, + "step": 3798 + }, + { + "epoch": 8.59, + "learning_rate": 1.0404074267882502e-07, + "loss": 2.3072, + "step": 3800 + }, + { + "epoch": 8.6, + "learning_rate": 1.0339076274381787e-07, + "loss": 2.5948, + "step": 3802 + }, + { + "epoch": 8.6, + "learning_rate": 1.0274270880711666e-07, + "loss": 2.2217, + "step": 3804 + }, + { + "epoch": 8.61, + "learning_rate": 1.0209658226080032e-07, + "loss": 2.5243, + "step": 3806 + }, + { + "epoch": 8.61, + "learning_rate": 1.0145238449280724e-07, + "loss": 2.3269, + "step": 3808 + }, + { + "epoch": 8.62, + "learning_rate": 1.0081011688693341e-07, + "loss": 2.2734, + "step": 3810 + }, + { + "epoch": 8.62, + "learning_rate": 1.0016978082282779e-07, + "loss": 2.1973, + "step": 3812 + }, + { + "epoch": 8.63, + "learning_rate": 9.953137767599073e-08, + "loss": 2.1697, + "step": 3814 + }, + { + "epoch": 8.63, + "learning_rate": 9.889490881777018e-08, + "loss": 2.2464, + "step": 3816 + }, + { + "epoch": 8.64, + "learning_rate": 9.826037561535938e-08, + "loss": 2.4254, + "step": 3818 + }, + { + "epoch": 8.64, + "learning_rate": 9.762777943179312e-08, + "loss": 2.3955, + "step": 3820 + }, + { + "epoch": 8.64, + "learning_rate": 9.699712162594609e-08, + "loss": 2.1837, + "step": 3822 + }, + { + "epoch": 8.65, + "learning_rate": 9.63684035525284e-08, + "loss": 2.0977, + "step": 3824 + }, + { + "epoch": 8.65, + "learning_rate": 9.574162656208384e-08, + "loss": 2.4142, + "step": 3826 + }, + { + "epoch": 8.66, + "learning_rate": 9.511679200098632e-08, + "loss": 2.2772, + "step": 3828 + }, + { + "epoch": 8.66, + "learning_rate": 9.449390121143774e-08, + "loss": 2.24, + "step": 3830 + }, + { + "epoch": 8.67, + "learning_rate": 9.387295553146379e-08, + "loss": 2.444, + "step": 3832 + }, + { + "epoch": 8.67, + "learning_rate": 9.325395629491262e-08, + "loss": 2.5115, + "step": 3834 + }, + { + "epoch": 8.68, + "learning_rate": 9.263690483145103e-08, + "loss": 2.2708, + "step": 3836 + }, + { + "epoch": 8.68, + "learning_rate": 9.202180246656155e-08, + "loss": 2.465, + "step": 3838 + }, + { + "epoch": 8.69, + "learning_rate": 9.140865052154012e-08, + "loss": 2.9963, + "step": 3840 + }, + { + "epoch": 8.69, + "learning_rate": 9.079745031349317e-08, + "loss": 2.159, + "step": 3842 + }, + { + "epoch": 8.69, + "learning_rate": 9.018820315533426e-08, + "loss": 2.5147, + "step": 3844 + }, + { + "epoch": 8.7, + "learning_rate": 8.958091035578174e-08, + "loss": 2.3305, + "step": 3846 + }, + { + "epoch": 8.7, + "learning_rate": 8.89755732193559e-08, + "loss": 2.3214, + "step": 3848 + }, + { + "epoch": 8.71, + "learning_rate": 8.837219304637633e-08, + "loss": 2.3948, + "step": 3850 + }, + { + "epoch": 8.71, + "learning_rate": 8.777077113295794e-08, + "loss": 2.428, + "step": 3852 + }, + { + "epoch": 8.72, + "learning_rate": 8.717130877101031e-08, + "loss": 2.279, + "step": 3854 + }, + { + "epoch": 8.72, + "learning_rate": 8.657380724823294e-08, + "loss": 2.2065, + "step": 3856 + }, + { + "epoch": 8.73, + "learning_rate": 8.597826784811402e-08, + "loss": 2.5204, + "step": 3858 + }, + { + "epoch": 8.73, + "learning_rate": 8.538469184992559e-08, + "loss": 2.1397, + "step": 3860 + }, + { + "epoch": 8.74, + "learning_rate": 8.479308052872359e-08, + "loss": 2.4221, + "step": 3862 + }, + { + "epoch": 8.74, + "learning_rate": 8.42034351553429e-08, + "loss": 2.1741, + "step": 3864 + }, + { + "epoch": 8.74, + "learning_rate": 8.361575699639534e-08, + "loss": 2.199, + "step": 3866 + }, + { + "epoch": 8.75, + "learning_rate": 8.30300473142671e-08, + "loss": 2.5473, + "step": 3868 + }, + { + "epoch": 8.75, + "learning_rate": 8.244630736711621e-08, + "loss": 2.536, + "step": 3870 + }, + { + "epoch": 8.76, + "learning_rate": 8.186453840886897e-08, + "loss": 2.2939, + "step": 3872 + }, + { + "epoch": 8.76, + "learning_rate": 8.128474168921828e-08, + "loss": 2.2578, + "step": 3874 + }, + { + "epoch": 8.77, + "learning_rate": 8.070691845361999e-08, + "loss": 2.4713, + "step": 3876 + }, + { + "epoch": 8.77, + "learning_rate": 8.013106994329155e-08, + "loss": 2.2558, + "step": 3878 + }, + { + "epoch": 8.78, + "learning_rate": 7.95571973952075e-08, + "loss": 2.4457, + "step": 3880 + }, + { + "epoch": 8.78, + "learning_rate": 7.898530204209864e-08, + "loss": 2.419, + "step": 3882 + }, + { + "epoch": 8.78, + "learning_rate": 7.841538511244816e-08, + "loss": 2.0561, + "step": 3884 + }, + { + "epoch": 8.79, + "learning_rate": 7.784744783048958e-08, + "loss": 2.2682, + "step": 3886 + }, + { + "epoch": 8.79, + "learning_rate": 7.7281491416204e-08, + "loss": 2.4426, + "step": 3888 + }, + { + "epoch": 8.8, + "learning_rate": 7.671751708531692e-08, + "loss": 2.0675, + "step": 3890 + }, + { + "epoch": 8.8, + "learning_rate": 7.61555260492972e-08, + "loss": 2.0942, + "step": 3892 + }, + { + "epoch": 8.81, + "learning_rate": 7.559551951535237e-08, + "loss": 2.2835, + "step": 3894 + }, + { + "epoch": 8.81, + "learning_rate": 7.503749868642762e-08, + "loss": 2.2816, + "step": 3896 + }, + { + "epoch": 8.82, + "learning_rate": 7.448146476120232e-08, + "loss": 2.3509, + "step": 3898 + }, + { + "epoch": 8.82, + "learning_rate": 7.392741893408838e-08, + "loss": 2.3625, + "step": 3900 + }, + { + "epoch": 8.83, + "learning_rate": 7.337536239522623e-08, + "loss": 2.3323, + "step": 3902 + }, + { + "epoch": 8.83, + "learning_rate": 7.282529633048385e-08, + "loss": 2.3537, + "step": 3904 + }, + { + "epoch": 8.83, + "learning_rate": 7.227722192145325e-08, + "loss": 2.3887, + "step": 3906 + }, + { + "epoch": 8.84, + "learning_rate": 7.173114034544825e-08, + "loss": 2.1123, + "step": 3908 + }, + { + "epoch": 8.84, + "learning_rate": 7.118705277550142e-08, + "loss": 2.197, + "step": 3910 + }, + { + "epoch": 8.85, + "learning_rate": 7.064496038036293e-08, + "loss": 2.387, + "step": 3912 + }, + { + "epoch": 8.85, + "learning_rate": 7.010486432449636e-08, + "loss": 2.4442, + "step": 3914 + }, + { + "epoch": 8.86, + "learning_rate": 6.95667657680773e-08, + "loss": 2.4656, + "step": 3916 + }, + { + "epoch": 8.86, + "learning_rate": 6.903066586699014e-08, + "loss": 2.1219, + "step": 3918 + }, + { + "epoch": 8.87, + "learning_rate": 6.849656577282703e-08, + "loss": 2.4213, + "step": 3920 + }, + { + "epoch": 8.87, + "learning_rate": 6.79644666328828e-08, + "loss": 2.1535, + "step": 3922 + }, + { + "epoch": 8.88, + "learning_rate": 6.743436959015547e-08, + "loss": 2.6134, + "step": 3924 + }, + { + "epoch": 8.88, + "learning_rate": 6.69062757833414e-08, + "loss": 2.198, + "step": 3926 + }, + { + "epoch": 8.88, + "learning_rate": 6.63801863468344e-08, + "loss": 2.2652, + "step": 3928 + }, + { + "epoch": 8.89, + "learning_rate": 6.585610241072226e-08, + "loss": 2.3245, + "step": 3930 + }, + { + "epoch": 8.89, + "learning_rate": 6.533402510078533e-08, + "loss": 2.1564, + "step": 3932 + }, + { + "epoch": 8.9, + "learning_rate": 6.4813955538493e-08, + "loss": 2.2783, + "step": 3934 + }, + { + "epoch": 8.9, + "learning_rate": 6.429589484100217e-08, + "loss": 2.4365, + "step": 3936 + }, + { + "epoch": 8.91, + "learning_rate": 6.377984412115423e-08, + "loss": 2.4163, + "step": 3938 + }, + { + "epoch": 8.91, + "learning_rate": 6.326580448747343e-08, + "loss": 2.1423, + "step": 3940 + }, + { + "epoch": 8.92, + "learning_rate": 6.27537770441633e-08, + "loss": 2.2192, + "step": 3942 + }, + { + "epoch": 8.92, + "learning_rate": 6.224376289110589e-08, + "loss": 2.2585, + "step": 3944 + }, + { + "epoch": 8.93, + "learning_rate": 6.173576312385765e-08, + "loss": 2.2334, + "step": 3946 + }, + { + "epoch": 8.93, + "learning_rate": 6.122977883364877e-08, + "loss": 2.2312, + "step": 3948 + }, + { + "epoch": 8.93, + "learning_rate": 6.072581110737907e-08, + "loss": 2.5184, + "step": 3950 + }, + { + "epoch": 8.94, + "learning_rate": 6.022386102761756e-08, + "loss": 2.5329, + "step": 3952 + }, + { + "epoch": 8.94, + "learning_rate": 5.972392967259865e-08, + "loss": 2.418, + "step": 3954 + }, + { + "epoch": 8.95, + "learning_rate": 5.92260181162203e-08, + "loss": 2.2043, + "step": 3956 + }, + { + "epoch": 8.95, + "learning_rate": 5.873012742804173e-08, + "loss": 2.469, + "step": 3958 + }, + { + "epoch": 8.96, + "learning_rate": 5.823625867328175e-08, + "loss": 2.3175, + "step": 3960 + }, + { + "epoch": 8.96, + "learning_rate": 5.774441291281518e-08, + "loss": 2.5495, + "step": 3962 + }, + { + "epoch": 8.97, + "learning_rate": 5.725459120317156e-08, + "loss": 2.2427, + "step": 3964 + }, + { + "epoch": 8.97, + "learning_rate": 5.676679459653233e-08, + "loss": 2.5635, + "step": 3966 + }, + { + "epoch": 8.97, + "learning_rate": 5.628102414072933e-08, + "loss": 2.2622, + "step": 3968 + }, + { + "epoch": 8.98, + "learning_rate": 5.579728087924162e-08, + "loss": 2.3046, + "step": 3970 + }, + { + "epoch": 8.98, + "learning_rate": 5.531556585119357e-08, + "loss": 2.1263, + "step": 3972 + }, + { + "epoch": 8.99, + "learning_rate": 5.4835880091353314e-08, + "loss": 2.1675, + "step": 3974 + }, + { + "epoch": 8.99, + "learning_rate": 5.4358224630129404e-08, + "loss": 2.579, + "step": 3976 + }, + { + "epoch": 9.0, + "learning_rate": 5.388260049356919e-08, + "loss": 2.4221, + "step": 3978 + }, + { + "epoch": 9.0, + "learning_rate": 5.3409008703356626e-08, + "loss": 2.3077, + "step": 3980 + }, + { + "epoch": 9.01, + "learning_rate": 5.293745027681029e-08, + "loss": 2.4017, + "step": 3982 + }, + { + "epoch": 9.01, + "learning_rate": 5.246792622688023e-08, + "loss": 2.1001, + "step": 3984 + }, + { + "epoch": 9.02, + "learning_rate": 5.2000437562147225e-08, + "loss": 2.3728, + "step": 3986 + }, + { + "epoch": 9.02, + "learning_rate": 5.15349852868191e-08, + "loss": 2.3596, + "step": 3988 + }, + { + "epoch": 9.02, + "learning_rate": 5.1071570400730405e-08, + "loss": 2.3265, + "step": 3990 + }, + { + "epoch": 9.03, + "learning_rate": 5.061019389933774e-08, + "loss": 2.3075, + "step": 3992 + }, + { + "epoch": 9.03, + "learning_rate": 5.015085677372044e-08, + "loss": 2.2687, + "step": 3994 + }, + { + "epoch": 9.04, + "learning_rate": 4.969356001057612e-08, + "loss": 2.3342, + "step": 3996 + }, + { + "epoch": 9.04, + "learning_rate": 4.9238304592220117e-08, + "loss": 2.2943, + "step": 3998 + }, + { + "epoch": 9.05, + "learning_rate": 4.8785091496582385e-08, + "loss": 2.1758, + "step": 4000 + }, + { + "epoch": 9.05, + "learning_rate": 4.833392169720607e-08, + "loss": 2.2717, + "step": 4002 + }, + { + "epoch": 9.06, + "learning_rate": 4.788479616324481e-08, + "loss": 2.3099, + "step": 4004 + }, + { + "epoch": 9.06, + "learning_rate": 4.743771585946144e-08, + "loss": 2.584, + "step": 4006 + }, + { + "epoch": 9.07, + "learning_rate": 4.699268174622473e-08, + "loss": 2.16, + "step": 4008 + }, + { + "epoch": 9.07, + "learning_rate": 4.6549694779509094e-08, + "loss": 2.6906, + "step": 4010 + }, + { + "epoch": 9.07, + "learning_rate": 4.610875591089025e-08, + "loss": 2.1513, + "step": 4012 + }, + { + "epoch": 9.08, + "learning_rate": 4.566986608754553e-08, + "loss": 2.2294, + "step": 4014 + }, + { + "epoch": 9.08, + "learning_rate": 4.5233026252250005e-08, + "loss": 2.1508, + "step": 4016 + }, + { + "epoch": 9.09, + "learning_rate": 4.479823734337551e-08, + "loss": 2.2828, + "step": 4018 + }, + { + "epoch": 9.09, + "learning_rate": 4.4365500294888056e-08, + "loss": 2.3614, + "step": 4020 + }, + { + "epoch": 9.1, + "learning_rate": 4.3934816036346525e-08, + "loss": 2.3381, + "step": 4022 + }, + { + "epoch": 9.1, + "learning_rate": 4.350618549289986e-08, + "loss": 1.9958, + "step": 4024 + }, + { + "epoch": 9.11, + "learning_rate": 4.307960958528534e-08, + "loss": 2.2663, + "step": 4026 + }, + { + "epoch": 9.11, + "learning_rate": 4.265508922982686e-08, + "loss": 2.0485, + "step": 4028 + }, + { + "epoch": 9.12, + "learning_rate": 4.223262533843319e-08, + "loss": 2.4741, + "step": 4030 + }, + { + "epoch": 9.12, + "learning_rate": 4.1812218818594757e-08, + "loss": 2.2407, + "step": 4032 + }, + { + "epoch": 9.12, + "learning_rate": 4.1393870573383394e-08, + "loss": 2.2279, + "step": 4034 + }, + { + "epoch": 9.13, + "learning_rate": 4.097758150144903e-08, + "loss": 2.5935, + "step": 4036 + }, + { + "epoch": 9.13, + "learning_rate": 4.05633524970187e-08, + "loss": 2.4199, + "step": 4038 + }, + { + "epoch": 9.14, + "learning_rate": 4.015118444989374e-08, + "loss": 2.3565, + "step": 4040 + }, + { + "epoch": 9.14, + "learning_rate": 3.974107824544892e-08, + "loss": 2.7166, + "step": 4042 + }, + { + "epoch": 9.15, + "learning_rate": 3.9333034764629793e-08, + "loss": 2.3456, + "step": 4044 + }, + { + "epoch": 9.15, + "learning_rate": 3.892705488395065e-08, + "loss": 2.4363, + "step": 4046 + }, + { + "epoch": 9.16, + "learning_rate": 3.852313947549335e-08, + "loss": 2.2772, + "step": 4048 + }, + { + "epoch": 9.16, + "learning_rate": 3.812128940690496e-08, + "loss": 2.4964, + "step": 4050 + }, + { + "epoch": 9.16, + "learning_rate": 3.7721505541396305e-08, + "loss": 2.3199, + "step": 4052 + }, + { + "epoch": 9.17, + "learning_rate": 3.7323788737739005e-08, + "loss": 2.2969, + "step": 4054 + }, + { + "epoch": 9.17, + "learning_rate": 3.6928139850265436e-08, + "loss": 2.1398, + "step": 4056 + }, + { + "epoch": 9.18, + "learning_rate": 3.6534559728865324e-08, + "loss": 2.2636, + "step": 4058 + }, + { + "epoch": 9.18, + "learning_rate": 3.6143049218984586e-08, + "loss": 2.3242, + "step": 4060 + }, + { + "epoch": 9.19, + "learning_rate": 3.575360916162329e-08, + "loss": 2.4013, + "step": 4062 + }, + { + "epoch": 9.19, + "learning_rate": 3.536624039333447e-08, + "loss": 2.0722, + "step": 4064 + }, + { + "epoch": 9.2, + "learning_rate": 3.4980943746221295e-08, + "loss": 2.2253, + "step": 4066 + }, + { + "epoch": 9.2, + "learning_rate": 3.459772004793615e-08, + "loss": 2.5274, + "step": 4068 + }, + { + "epoch": 9.21, + "learning_rate": 3.421657012167834e-08, + "loss": 2.185, + "step": 4070 + }, + { + "epoch": 9.21, + "learning_rate": 3.383749478619291e-08, + "loss": 2.4211, + "step": 4072 + }, + { + "epoch": 9.21, + "learning_rate": 3.346049485576774e-08, + "loss": 2.2866, + "step": 4074 + }, + { + "epoch": 9.22, + "learning_rate": 3.308557114023347e-08, + "loss": 2.4034, + "step": 4076 + }, + { + "epoch": 9.22, + "learning_rate": 3.271272444495998e-08, + "loss": 2.2678, + "step": 4078 + }, + { + "epoch": 9.23, + "learning_rate": 3.2341955570856506e-08, + "loss": 2.1473, + "step": 4080 + }, + { + "epoch": 9.23, + "learning_rate": 3.197326531436773e-08, + "loss": 1.9607, + "step": 4082 + }, + { + "epoch": 9.24, + "learning_rate": 3.1606654467474236e-08, + "loss": 2.3081, + "step": 4084 + }, + { + "epoch": 9.24, + "learning_rate": 3.124212381768942e-08, + "loss": 2.3196, + "step": 4086 + }, + { + "epoch": 9.25, + "learning_rate": 3.087967414805848e-08, + "loss": 2.2704, + "step": 4088 + }, + { + "epoch": 9.25, + "learning_rate": 3.051930623715604e-08, + "loss": 2.2809, + "step": 4090 + }, + { + "epoch": 9.26, + "learning_rate": 3.016102085908534e-08, + "loss": 2.5148, + "step": 4092 + }, + { + "epoch": 9.26, + "learning_rate": 2.9804818783476184e-08, + "loss": 2.3634, + "step": 4094 + }, + { + "epoch": 9.26, + "learning_rate": 2.945070077548284e-08, + "loss": 2.2721, + "step": 4096 + }, + { + "epoch": 9.27, + "learning_rate": 2.9098667595782945e-08, + "loss": 2.5452, + "step": 4098 + }, + { + "epoch": 9.27, + "learning_rate": 2.8748720000576265e-08, + "loss": 2.4443, + "step": 4100 + }, + { + "epoch": 9.28, + "learning_rate": 2.8400858741581602e-08, + "loss": 2.5627, + "step": 4102 + }, + { + "epoch": 9.28, + "learning_rate": 2.805508456603689e-08, + "loss": 2.5705, + "step": 4104 + }, + { + "epoch": 9.29, + "learning_rate": 2.7711398216696658e-08, + "loss": 2.1941, + "step": 4106 + }, + { + "epoch": 9.29, + "learning_rate": 2.7369800431830236e-08, + "loss": 2.3704, + "step": 4108 + }, + { + "epoch": 9.3, + "learning_rate": 2.7030291945220885e-08, + "loss": 2.5346, + "step": 4110 + }, + { + "epoch": 9.3, + "learning_rate": 2.669287348616378e-08, + "loss": 2.0938, + "step": 4112 + }, + { + "epoch": 9.31, + "learning_rate": 2.6357545779464584e-08, + "loss": 2.6161, + "step": 4114 + }, + { + "epoch": 9.31, + "learning_rate": 2.602430954543755e-08, + "loss": 2.261, + "step": 4116 + }, + { + "epoch": 9.31, + "learning_rate": 2.5693165499904635e-08, + "loss": 2.2991, + "step": 4118 + }, + { + "epoch": 9.32, + "learning_rate": 2.5364114354193277e-08, + "loss": 1.9709, + "step": 4120 + }, + { + "epoch": 9.32, + "learning_rate": 2.5037156815135408e-08, + "loss": 2.7381, + "step": 4122 + }, + { + "epoch": 9.33, + "learning_rate": 2.4712293585065546e-08, + "loss": 2.0963, + "step": 4124 + }, + { + "epoch": 9.33, + "learning_rate": 2.4389525361819487e-08, + "loss": 2.1209, + "step": 4126 + }, + { + "epoch": 9.34, + "learning_rate": 2.4068852838732945e-08, + "loss": 2.2181, + "step": 4128 + }, + { + "epoch": 9.34, + "learning_rate": 2.3750276704639472e-08, + "loss": 2.2637, + "step": 4130 + }, + { + "epoch": 9.35, + "learning_rate": 2.3433797643869658e-08, + "loss": 2.1917, + "step": 4132 + }, + { + "epoch": 9.35, + "learning_rate": 2.3119416336249588e-08, + "loss": 2.1542, + "step": 4134 + }, + { + "epoch": 9.35, + "learning_rate": 2.2807133457098504e-08, + "loss": 2.492, + "step": 4136 + }, + { + "epoch": 9.36, + "learning_rate": 2.2496949677228927e-08, + "loss": 2.3315, + "step": 4138 + }, + { + "epoch": 9.36, + "learning_rate": 2.2188865662943536e-08, + "loss": 2.2493, + "step": 4140 + }, + { + "epoch": 9.37, + "learning_rate": 2.188288207603517e-08, + "loss": 2.5447, + "step": 4142 + }, + { + "epoch": 9.37, + "learning_rate": 2.1578999573784063e-08, + "loss": 2.3446, + "step": 4144 + }, + { + "epoch": 9.38, + "learning_rate": 2.127721880895783e-08, + "loss": 2.1998, + "step": 4146 + }, + { + "epoch": 9.38, + "learning_rate": 2.0977540429808926e-08, + "loss": 2.0618, + "step": 4148 + }, + { + "epoch": 9.39, + "learning_rate": 2.067996508007386e-08, + "loss": 2.1654, + "step": 4150 + }, + { + "epoch": 9.39, + "learning_rate": 2.0384493398971303e-08, + "loss": 2.1868, + "step": 4152 + }, + { + "epoch": 9.4, + "learning_rate": 2.0091126021201775e-08, + "loss": 2.3577, + "step": 4154 + }, + { + "epoch": 9.4, + "learning_rate": 1.9799863576944853e-08, + "loss": 2.1735, + "step": 4156 + }, + { + "epoch": 9.4, + "learning_rate": 1.9510706691858835e-08, + "loss": 2.4039, + "step": 4158 + }, + { + "epoch": 9.41, + "learning_rate": 1.922365598707909e-08, + "loss": 2.4402, + "step": 4160 + }, + { + "epoch": 9.41, + "learning_rate": 1.893871207921671e-08, + "loss": 2.2745, + "step": 4162 + }, + { + "epoch": 9.42, + "learning_rate": 1.8655875580356974e-08, + "loss": 2.5174, + "step": 4164 + }, + { + "epoch": 9.42, + "learning_rate": 1.8375147098058653e-08, + "loss": 2.7251, + "step": 4166 + }, + { + "epoch": 9.43, + "learning_rate": 1.8096527235351934e-08, + "loss": 2.0941, + "step": 4168 + }, + { + "epoch": 9.43, + "learning_rate": 1.7820016590737842e-08, + "loss": 2.4482, + "step": 4170 + }, + { + "epoch": 9.44, + "learning_rate": 1.754561575818625e-08, + "loss": 2.3424, + "step": 4172 + }, + { + "epoch": 9.44, + "learning_rate": 1.7273325327135215e-08, + "loss": 2.3049, + "step": 4174 + }, + { + "epoch": 9.45, + "learning_rate": 1.700314588248952e-08, + "loss": 2.5217, + "step": 4176 + }, + { + "epoch": 9.45, + "learning_rate": 1.673507800461893e-08, + "loss": 2.3833, + "step": 4178 + }, + { + "epoch": 9.45, + "learning_rate": 1.6469122269357817e-08, + "loss": 2.323, + "step": 4180 + }, + { + "epoch": 9.46, + "learning_rate": 1.6205279248003413e-08, + "loss": 2.3853, + "step": 4182 + }, + { + "epoch": 9.46, + "learning_rate": 1.5943549507314468e-08, + "loss": 2.3385, + "step": 4184 + }, + { + "epoch": 9.47, + "learning_rate": 1.568393360951026e-08, + "loss": 2.1541, + "step": 4186 + }, + { + "epoch": 9.47, + "learning_rate": 1.542643211226946e-08, + "loss": 2.5234, + "step": 4188 + }, + { + "epoch": 9.48, + "learning_rate": 1.5171045568728723e-08, + "loss": 1.9526, + "step": 4190 + }, + { + "epoch": 9.48, + "learning_rate": 1.4917774527481442e-08, + "loss": 2.4961, + "step": 4192 + }, + { + "epoch": 9.49, + "learning_rate": 1.4666619532577196e-08, + "loss": 2.3186, + "step": 4194 + }, + { + "epoch": 9.49, + "learning_rate": 1.441758112351954e-08, + "loss": 2.2356, + "step": 4196 + }, + { + "epoch": 9.5, + "learning_rate": 1.4170659835265774e-08, + "loss": 2.155, + "step": 4198 + }, + { + "epoch": 9.5, + "learning_rate": 1.3925856198225283e-08, + "loss": 2.2409, + "step": 4200 + }, + { + "epoch": 9.5, + "learning_rate": 1.3683170738258532e-08, + "loss": 2.4088, + "step": 4202 + }, + { + "epoch": 9.51, + "learning_rate": 1.344260397667607e-08, + "loss": 2.0696, + "step": 4204 + }, + { + "epoch": 9.51, + "learning_rate": 1.32041564302372e-08, + "loss": 2.1969, + "step": 4206 + }, + { + "epoch": 9.52, + "learning_rate": 1.2967828611148868e-08, + "loss": 2.1321, + "step": 4208 + }, + { + "epoch": 9.52, + "learning_rate": 1.2733621027064989e-08, + "loss": 2.1224, + "step": 4210 + }, + { + "epoch": 9.53, + "learning_rate": 1.2501534181084461e-08, + "loss": 2.6715, + "step": 4212 + }, + { + "epoch": 9.53, + "learning_rate": 1.2271568571751157e-08, + "loss": 2.4569, + "step": 4214 + }, + { + "epoch": 9.54, + "learning_rate": 1.204372469305226e-08, + "loss": 2.4008, + "step": 4216 + }, + { + "epoch": 9.54, + "learning_rate": 1.1818003034416935e-08, + "loss": 2.3672, + "step": 4218 + }, + { + "epoch": 9.54, + "learning_rate": 1.1594404080716103e-08, + "loss": 2.4229, + "step": 4220 + }, + { + "epoch": 9.55, + "learning_rate": 1.1372928312260333e-08, + "loss": 2.4505, + "step": 4222 + }, + { + "epoch": 9.55, + "learning_rate": 1.1153576204800285e-08, + "loss": 2.4657, + "step": 4224 + }, + { + "epoch": 9.56, + "learning_rate": 1.0936348229523717e-08, + "loss": 2.6675, + "step": 4226 + }, + { + "epoch": 9.56, + "learning_rate": 1.0721244853056366e-08, + "loss": 2.1615, + "step": 4228 + }, + { + "epoch": 9.57, + "learning_rate": 1.0508266537459843e-08, + "loss": 2.3642, + "step": 4230 + }, + { + "epoch": 9.57, + "learning_rate": 1.0297413740231076e-08, + "loss": 2.2433, + "step": 4232 + }, + { + "epoch": 9.58, + "learning_rate": 1.008868691430087e-08, + "loss": 2.5839, + "step": 4234 + }, + { + "epoch": 9.58, + "learning_rate": 9.882086508033571e-09, + "loss": 2.6095, + "step": 4236 + }, + { + "epoch": 9.59, + "learning_rate": 9.677612965225734e-09, + "loss": 2.4396, + "step": 4238 + }, + { + "epoch": 9.59, + "learning_rate": 9.475266725105014e-09, + "loss": 2.5627, + "step": 4240 + }, + { + "epoch": 9.59, + "learning_rate": 9.275048222329607e-09, + "loss": 2.188, + "step": 4242 + }, + { + "epoch": 9.6, + "learning_rate": 9.076957886987146e-09, + "loss": 2.2413, + "step": 4244 + }, + { + "epoch": 9.6, + "learning_rate": 8.88099614459381e-09, + "loss": 2.3452, + "step": 4246 + }, + { + "epoch": 9.61, + "learning_rate": 8.68716341609299e-09, + "loss": 2.3178, + "step": 4248 + }, + { + "epoch": 9.61, + "learning_rate": 8.49546011785518e-09, + "loss": 2.4892, + "step": 4250 + }, + { + "epoch": 9.62, + "learning_rate": 8.305886661676752e-09, + "loss": 2.4523, + "step": 4252 + }, + { + "epoch": 9.62, + "learning_rate": 8.118443454778302e-09, + "loss": 2.4388, + "step": 4254 + }, + { + "epoch": 9.63, + "learning_rate": 7.933130899805296e-09, + "loss": 2.1198, + "step": 4256 + }, + { + "epoch": 9.63, + "learning_rate": 7.74994939482576e-09, + "loss": 2.5573, + "step": 4258 + }, + { + "epoch": 9.64, + "learning_rate": 7.56889933333038e-09, + "loss": 2.2659, + "step": 4260 + }, + { + "epoch": 9.64, + "learning_rate": 7.389981104231169e-09, + "loss": 2.1934, + "step": 4262 + }, + { + "epoch": 9.64, + "learning_rate": 7.213195091860691e-09, + "loss": 2.2468, + "step": 4264 + }, + { + "epoch": 9.65, + "learning_rate": 7.038541675971399e-09, + "loss": 2.3865, + "step": 4266 + }, + { + "epoch": 9.65, + "learning_rate": 6.866021231734853e-09, + "loss": 2.3968, + "step": 4268 + }, + { + "epoch": 9.66, + "learning_rate": 6.695634129740613e-09, + "loss": 2.3712, + "step": 4270 + }, + { + "epoch": 9.66, + "learning_rate": 6.52738073599568e-09, + "loss": 2.2353, + "step": 4272 + }, + { + "epoch": 9.67, + "learning_rate": 6.361261411923724e-09, + "loss": 2.5092, + "step": 4274 + }, + { + "epoch": 9.67, + "learning_rate": 6.19727651436408e-09, + "loss": 2.2469, + "step": 4276 + }, + { + "epoch": 9.68, + "learning_rate": 6.035426395571419e-09, + "loss": 2.1285, + "step": 4278 + }, + { + "epoch": 9.68, + "learning_rate": 5.875711403214412e-09, + "loss": 2.2678, + "step": 4280 + }, + { + "epoch": 9.69, + "learning_rate": 5.718131880375398e-09, + "loss": 2.3838, + "step": 4282 + }, + { + "epoch": 9.69, + "learning_rate": 5.5626881655495006e-09, + "loss": 2.2587, + "step": 4284 + }, + { + "epoch": 9.69, + "learning_rate": 5.4093805926440635e-09, + "loss": 2.3756, + "step": 4286 + }, + { + "epoch": 9.7, + "learning_rate": 5.258209490977772e-09, + "loss": 2.2056, + "step": 4288 + }, + { + "epoch": 9.7, + "learning_rate": 5.109175185279757e-09, + "loss": 2.1925, + "step": 4290 + }, + { + "epoch": 9.71, + "learning_rate": 4.96227799568949e-09, + "loss": 2.3737, + "step": 4292 + }, + { + "epoch": 9.71, + "learning_rate": 4.8175182377553355e-09, + "loss": 2.2732, + "step": 4294 + }, + { + "epoch": 9.72, + "learning_rate": 4.674896222434555e-09, + "loss": 2.326, + "step": 4296 + }, + { + "epoch": 9.72, + "learning_rate": 4.5344122560920795e-09, + "loss": 2.5249, + "step": 4298 + }, + { + "epoch": 9.73, + "learning_rate": 4.396066640500406e-09, + "loss": 2.2555, + "step": 4300 + }, + { + "epoch": 9.73, + "learning_rate": 4.259859672838484e-09, + "loss": 2.5123, + "step": 4302 + }, + { + "epoch": 9.73, + "learning_rate": 4.125791645691268e-09, + "loss": 2.1902, + "step": 4304 + }, + { + "epoch": 9.74, + "learning_rate": 3.993862847049167e-09, + "loss": 2.2976, + "step": 4306 + }, + { + "epoch": 9.74, + "learning_rate": 3.864073560307046e-09, + "loss": 2.2722, + "step": 4308 + }, + { + "epoch": 9.75, + "learning_rate": 3.736424064264443e-09, + "loss": 2.1837, + "step": 4310 + }, + { + "epoch": 9.75, + "learning_rate": 3.610914633123685e-09, + "loss": 2.5105, + "step": 4312 + }, + { + "epoch": 9.76, + "learning_rate": 3.487545536490888e-09, + "loss": 2.0634, + "step": 4314 + }, + { + "epoch": 9.76, + "learning_rate": 3.366317039373845e-09, + "loss": 2.4091, + "step": 4316 + }, + { + "epoch": 9.77, + "learning_rate": 3.247229402182472e-09, + "loss": 2.4849, + "step": 4318 + }, + { + "epoch": 9.77, + "learning_rate": 3.130282880727808e-09, + "loss": 2.7144, + "step": 4320 + }, + { + "epoch": 9.78, + "learning_rate": 3.0154777262217934e-09, + "loss": 2.5959, + "step": 4322 + }, + { + "epoch": 9.78, + "learning_rate": 2.902814185276159e-09, + "loss": 2.1911, + "step": 4324 + }, + { + "epoch": 9.78, + "learning_rate": 2.7922924999025375e-09, + "loss": 2.4163, + "step": 4326 + }, + { + "epoch": 9.79, + "learning_rate": 2.6839129075116873e-09, + "loss": 2.195, + "step": 4328 + }, + { + "epoch": 9.79, + "learning_rate": 2.577675640912602e-09, + "loss": 2.1792, + "step": 4330 + }, + { + "epoch": 9.8, + "learning_rate": 2.4735809283128463e-09, + "loss": 2.3718, + "step": 4332 + }, + { + "epoch": 9.8, + "learning_rate": 2.3716289933172208e-09, + "loss": 2.4066, + "step": 4334 + }, + { + "epoch": 9.81, + "learning_rate": 2.2718200549277644e-09, + "loss": 2.2239, + "step": 4336 + }, + { + "epoch": 9.81, + "learning_rate": 2.1741543275431983e-09, + "loss": 2.3306, + "step": 4338 + }, + { + "epoch": 9.82, + "learning_rate": 2.0786320209584817e-09, + "loss": 2.4882, + "step": 4340 + }, + { + "epoch": 9.82, + "learning_rate": 1.9852533403640347e-09, + "loss": 2.5039, + "step": 4342 + }, + { + "epoch": 9.83, + "learning_rate": 1.894018486345961e-09, + "loss": 2.3346, + "step": 4344 + }, + { + "epoch": 9.83, + "learning_rate": 1.8049276548848247e-09, + "loss": 2.5675, + "step": 4346 + }, + { + "epoch": 9.83, + "learning_rate": 1.7179810373560977e-09, + "loss": 2.2032, + "step": 4348 + }, + { + "epoch": 9.84, + "learning_rate": 1.6331788205288245e-09, + "loss": 1.9619, + "step": 4350 + }, + { + "epoch": 9.84, + "learning_rate": 1.5505211865660673e-09, + "loss": 2.184, + "step": 4352 + }, + { + "epoch": 9.85, + "learning_rate": 1.470008313023796e-09, + "loss": 2.4945, + "step": 4354 + }, + { + "epoch": 9.85, + "learning_rate": 1.3916403728509995e-09, + "loss": 2.3055, + "step": 4356 + }, + { + "epoch": 9.86, + "learning_rate": 1.3154175343893514e-09, + "loss": 2.3081, + "step": 4358 + }, + { + "epoch": 9.86, + "learning_rate": 1.241339961372212e-09, + "loss": 2.671, + "step": 4360 + }, + { + "epoch": 9.87, + "learning_rate": 1.1694078129250717e-09, + "loss": 2.052, + "step": 4362 + }, + { + "epoch": 9.87, + "learning_rate": 1.0996212435647745e-09, + "loss": 2.3833, + "step": 4364 + }, + { + "epoch": 9.88, + "learning_rate": 1.0319804031990732e-09, + "loss": 2.4489, + "step": 4366 + }, + { + "epoch": 9.88, + "learning_rate": 9.664854371268516e-10, + "loss": 2.2591, + "step": 4368 + }, + { + "epoch": 9.88, + "learning_rate": 9.03136486037126e-10, + "loss": 2.4332, + "step": 4370 + }, + { + "epoch": 9.89, + "learning_rate": 8.419336860092663e-10, + "loss": 2.1944, + "step": 4372 + }, + { + "epoch": 9.89, + "learning_rate": 7.828771685124413e-10, + "loss": 2.1891, + "step": 4374 + }, + { + "epoch": 9.9, + "learning_rate": 7.259670604053969e-10, + "loss": 2.4247, + "step": 4376 + }, + { + "epoch": 9.9, + "learning_rate": 6.712034839363445e-10, + "loss": 2.172, + "step": 4378 + }, + { + "epoch": 9.91, + "learning_rate": 6.185865567422955e-10, + "loss": 2.3444, + "step": 4380 + }, + { + "epoch": 9.91, + "learning_rate": 5.681163918492827e-10, + "loss": 2.3733, + "step": 4382 + }, + { + "epoch": 9.92, + "learning_rate": 5.197930976718056e-10, + "loss": 2.1766, + "step": 4384 + }, + { + "epoch": 9.92, + "learning_rate": 4.736167780127199e-10, + "loss": 2.3506, + "step": 4386 + }, + { + "epoch": 9.92, + "learning_rate": 4.2958753206290275e-10, + "loss": 2.2205, + "step": 4388 + }, + { + "epoch": 9.93, + "learning_rate": 3.877054544011438e-10, + "loss": 2.1202, + "step": 4390 + }, + { + "epoch": 9.93, + "learning_rate": 3.479706349941436e-10, + "loss": 2.5056, + "step": 4392 + }, + { + "epoch": 9.94, + "learning_rate": 3.1038315919584834e-10, + "loss": 2.2159, + "step": 4394 + }, + { + "epoch": 9.94, + "learning_rate": 2.7494310774744955e-10, + "loss": 2.1418, + "step": 4396 + }, + { + "epoch": 9.95, + "learning_rate": 2.4165055677749515e-10, + "loss": 2.5672, + "step": 4398 + }, + { + "epoch": 9.95, + "learning_rate": 2.105055778014453e-10, + "loss": 2.844, + "step": 4400 + }, + { + "epoch": 9.96, + "learning_rate": 1.8150823772156155e-10, + "loss": 2.2968, + "step": 4402 + }, + { + "epoch": 9.96, + "learning_rate": 1.5465859882668463e-10, + "loss": 2.2367, + "step": 4404 + }, + { + "epoch": 9.97, + "learning_rate": 1.299567187923456e-10, + "loss": 2.4825, + "step": 4406 + }, + { + "epoch": 9.97, + "learning_rate": 1.0740265068043264e-10, + "loss": 2.3133, + "step": 4408 + }, + { + "epoch": 9.97, + "learning_rate": 8.699644293908016e-11, + "loss": 2.3099, + "step": 4410 + }, + { + "epoch": 9.98, + "learning_rate": 6.873813940277973e-11, + "loss": 2.3648, + "step": 4412 + }, + { + "epoch": 9.98, + "learning_rate": 5.262777929193607e-11, + "loss": 2.439, + "step": 4414 + }, + { + "epoch": 9.99, + "learning_rate": 3.8665397213089034e-11, + "loss": 2.4944, + "step": 4416 + }, + { + "epoch": 9.99, + "learning_rate": 2.685102315880261e-11, + "loss": 2.1788, + "step": 4418 + }, + { + "epoch": 10.0, + "learning_rate": 1.718468250733185e-11, + "loss": 2.394, + "step": 4420 + }, + { + "epoch": 10.0, + "step": 4420, + "total_flos": 1.0214099064245453e+17, + "train_loss": 2.5490156790250027, + "train_runtime": 26958.9225, + "train_samples_per_second": 10.496, + "train_steps_per_second": 0.164 + } + ], + "logging_steps": 2, + "max_steps": 4420, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 4000, + "total_flos": 1.0214099064245453e+17, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +}