diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,67471 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "global_step": 11241, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 5.917159763313609e-06, + "loss": 9.9531, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 1.1834319526627219e-05, + "loss": 9.7188, + "step": 2 + }, + { + "epoch": 0.0, + "learning_rate": 1.7751479289940828e-05, + "loss": 9.5938, + "step": 3 + }, + { + "epoch": 0.0, + "learning_rate": 2.3668639053254438e-05, + "loss": 9.7344, + "step": 4 + }, + { + "epoch": 0.0, + "learning_rate": 2.9585798816568047e-05, + "loss": 9.4062, + "step": 5 + }, + { + "epoch": 0.0, + "learning_rate": 3.5502958579881656e-05, + "loss": 9.2812, + "step": 6 + }, + { + "epoch": 0.0, + "learning_rate": 4.142011834319527e-05, + "loss": 8.8125, + "step": 7 + }, + { + "epoch": 0.0, + "learning_rate": 4.7337278106508875e-05, + "loss": 8.625, + "step": 8 + }, + { + "epoch": 0.0, + "learning_rate": 5.325443786982249e-05, + "loss": 8.2812, + "step": 9 + }, + { + "epoch": 0.0, + "learning_rate": 5.9171597633136094e-05, + "loss": 7.9297, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 6.50887573964497e-05, + "loss": 7.8438, + "step": 11 + }, + { + "epoch": 0.0, + "learning_rate": 7.100591715976331e-05, + "loss": 7.4688, + "step": 12 + }, + { + "epoch": 0.0, + "learning_rate": 7.692307692307693e-05, + "loss": 7.3359, + "step": 13 + }, + { + "epoch": 0.0, + "learning_rate": 8.284023668639054e-05, + "loss": 6.8828, + "step": 14 + }, + { + "epoch": 0.0, + "learning_rate": 8.875739644970414e-05, + "loss": 6.7031, + "step": 15 + }, + { + "epoch": 0.0, + "learning_rate": 9.467455621301775e-05, + "loss": 6.375, + "step": 16 + }, + { + "epoch": 0.0, + "learning_rate": 0.00010059171597633136, + "loss": 6.25, + "step": 17 + }, + { + "epoch": 0.0, + "learning_rate": 0.00010650887573964498, + "loss": 6.125, + "step": 18 + }, + { + "epoch": 0.0, + "learning_rate": 0.00011242603550295859, + "loss": 5.9453, + "step": 19 + }, + { + "epoch": 0.0, + "learning_rate": 0.00011834319526627219, + "loss": 6.0078, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001242603550295858, + "loss": 5.6328, + "step": 21 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001301775147928994, + "loss": 5.4922, + "step": 22 + }, + { + "epoch": 0.0, + "learning_rate": 0.000136094674556213, + "loss": 5.1797, + "step": 23 + }, + { + "epoch": 0.0, + "learning_rate": 0.00014201183431952663, + "loss": 5.2969, + "step": 24 + }, + { + "epoch": 0.0, + "learning_rate": 0.00014792899408284024, + "loss": 5.0078, + "step": 25 + }, + { + "epoch": 0.0, + "learning_rate": 0.00015384615384615385, + "loss": 4.9219, + "step": 26 + }, + { + "epoch": 0.0, + "learning_rate": 0.00015976331360946746, + "loss": 4.6797, + "step": 27 + }, + { + "epoch": 0.0, + "learning_rate": 0.00016568047337278108, + "loss": 4.6094, + "step": 28 + }, + { + "epoch": 0.0, + "learning_rate": 0.00017159763313609466, + "loss": 4.5312, + "step": 29 + }, + { + "epoch": 0.0, + "learning_rate": 0.00017751479289940828, + "loss": 4.2422, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001834319526627219, + "loss": 4.1875, + "step": 31 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001893491124260355, + "loss": 3.8438, + "step": 32 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019526627218934911, + "loss": 3.8789, + "step": 33 + }, + { + "epoch": 0.0, + "learning_rate": 0.00020118343195266273, + "loss": 3.5508, + "step": 34 + }, + { + "epoch": 0.0, + "learning_rate": 0.00020710059171597634, + "loss": 3.5117, + "step": 35 + }, + { + "epoch": 0.0, + "learning_rate": 0.00021301775147928995, + "loss": 3.3242, + "step": 36 + }, + { + "epoch": 0.0, + "learning_rate": 0.00021893491124260357, + "loss": 3.4062, + "step": 37 + }, + { + "epoch": 0.0, + "learning_rate": 0.00022485207100591718, + "loss": 3.0391, + "step": 38 + }, + { + "epoch": 0.0, + "learning_rate": 0.0002307692307692308, + "loss": 3.0469, + "step": 39 + }, + { + "epoch": 0.0, + "learning_rate": 0.00023668639053254438, + "loss": 2.8516, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 0.000242603550295858, + "loss": 2.7031, + "step": 41 + }, + { + "epoch": 0.0, + "learning_rate": 0.0002485207100591716, + "loss": 2.8438, + "step": 42 + }, + { + "epoch": 0.0, + "learning_rate": 0.00025443786982248524, + "loss": 2.7812, + "step": 43 + }, + { + "epoch": 0.0, + "learning_rate": 0.0002603550295857988, + "loss": 2.8828, + "step": 44 + }, + { + "epoch": 0.0, + "learning_rate": 0.00026627218934911247, + "loss": 2.6211, + "step": 45 + }, + { + "epoch": 0.0, + "learning_rate": 0.000272189349112426, + "loss": 2.6367, + "step": 46 + }, + { + "epoch": 0.0, + "learning_rate": 0.00027810650887573964, + "loss": 2.543, + "step": 47 + }, + { + "epoch": 0.0, + "learning_rate": 0.00028402366863905325, + "loss": 2.5742, + "step": 48 + }, + { + "epoch": 0.0, + "learning_rate": 0.00028994082840236686, + "loss": 2.4688, + "step": 49 + }, + { + "epoch": 0.0, + "learning_rate": 0.0002958579881656805, + "loss": 2.5273, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 0.0003017751479289941, + "loss": 2.4531, + "step": 51 + }, + { + "epoch": 0.0, + "learning_rate": 0.0003076923076923077, + "loss": 2.6367, + "step": 52 + }, + { + "epoch": 0.0, + "learning_rate": 0.0003136094674556213, + "loss": 2.5664, + "step": 53 + }, + { + "epoch": 0.0, + "learning_rate": 0.00031952662721893493, + "loss": 2.3535, + "step": 54 + }, + { + "epoch": 0.0, + "learning_rate": 0.00032544378698224854, + "loss": 2.5273, + "step": 55 + }, + { + "epoch": 0.0, + "learning_rate": 0.00033136094674556215, + "loss": 2.5195, + "step": 56 + }, + { + "epoch": 0.01, + "learning_rate": 0.00033727810650887577, + "loss": 2.4609, + "step": 57 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003431952662721893, + "loss": 2.3906, + "step": 58 + }, + { + "epoch": 0.01, + "learning_rate": 0.000349112426035503, + "loss": 2.3672, + "step": 59 + }, + { + "epoch": 0.01, + "learning_rate": 0.00035502958579881655, + "loss": 2.3906, + "step": 60 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003609467455621302, + "loss": 2.3867, + "step": 61 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003668639053254438, + "loss": 2.2695, + "step": 62 + }, + { + "epoch": 0.01, + "learning_rate": 0.00037278106508875744, + "loss": 2.2773, + "step": 63 + }, + { + "epoch": 0.01, + "learning_rate": 0.000378698224852071, + "loss": 2.2227, + "step": 64 + }, + { + "epoch": 0.01, + "learning_rate": 0.00038461538461538467, + "loss": 2.3008, + "step": 65 + }, + { + "epoch": 0.01, + "learning_rate": 0.00039053254437869823, + "loss": 2.1484, + "step": 66 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003964497041420119, + "loss": 2.3398, + "step": 67 + }, + { + "epoch": 0.01, + "learning_rate": 0.00040236686390532545, + "loss": 2.2266, + "step": 68 + }, + { + "epoch": 0.01, + "learning_rate": 0.000408284023668639, + "loss": 2.1328, + "step": 69 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004142011834319527, + "loss": 2.2031, + "step": 70 + }, + { + "epoch": 0.01, + "learning_rate": 0.00042011834319526624, + "loss": 2.2344, + "step": 71 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004260355029585799, + "loss": 2.0977, + "step": 72 + }, + { + "epoch": 0.01, + "learning_rate": 0.00043195266272189346, + "loss": 2.2188, + "step": 73 + }, + { + "epoch": 0.01, + "learning_rate": 0.00043786982248520713, + "loss": 2.1094, + "step": 74 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004437869822485207, + "loss": 2.0645, + "step": 75 + }, + { + "epoch": 0.01, + "learning_rate": 0.00044970414201183436, + "loss": 2.1992, + "step": 76 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004556213017751479, + "loss": 2.2539, + "step": 77 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004615384615384616, + "loss": 2.0488, + "step": 78 + }, + { + "epoch": 0.01, + "learning_rate": 0.00046745562130177514, + "loss": 2.0801, + "step": 79 + }, + { + "epoch": 0.01, + "learning_rate": 0.00047337278106508875, + "loss": 2.2578, + "step": 80 + }, + { + "epoch": 0.01, + "learning_rate": 0.00047928994082840237, + "loss": 2.2266, + "step": 81 + }, + { + "epoch": 0.01, + "learning_rate": 0.000485207100591716, + "loss": 2.0859, + "step": 82 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004911242603550296, + "loss": 1.9297, + "step": 83 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004970414201183431, + "loss": 2.0586, + "step": 84 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005029585798816569, + "loss": 2.0098, + "step": 85 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005088757396449705, + "loss": 2.0508, + "step": 86 + }, + { + "epoch": 0.01, + "learning_rate": 0.000514792899408284, + "loss": 2.1016, + "step": 87 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005207100591715976, + "loss": 1.9707, + "step": 88 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005266272189349113, + "loss": 2.0078, + "step": 89 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005325443786982249, + "loss": 2.0801, + "step": 90 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005384615384615384, + "loss": 1.9902, + "step": 91 + }, + { + "epoch": 0.01, + "learning_rate": 0.000544378698224852, + "loss": 2.0371, + "step": 92 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005502958579881658, + "loss": 1.9785, + "step": 93 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005562130177514793, + "loss": 2.0664, + "step": 94 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005621301775147929, + "loss": 1.9941, + "step": 95 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005680473372781065, + "loss": 2.0625, + "step": 96 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005739644970414202, + "loss": 2.1504, + "step": 97 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005798816568047337, + "loss": 1.9473, + "step": 98 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005857988165680473, + "loss": 2.0, + "step": 99 + }, + { + "epoch": 0.01, + "learning_rate": 0.000591715976331361, + "loss": 2.0781, + "step": 100 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005976331360946747, + "loss": 1.8945, + "step": 101 + }, + { + "epoch": 0.01, + "learning_rate": 0.0006035502958579882, + "loss": 1.9219, + "step": 102 + }, + { + "epoch": 0.01, + "learning_rate": 0.0006094674556213018, + "loss": 2.0371, + "step": 103 + }, + { + "epoch": 0.01, + "learning_rate": 0.0006153846153846154, + "loss": 1.9512, + "step": 104 + }, + { + "epoch": 0.01, + "learning_rate": 0.0006213017751479289, + "loss": 1.9844, + "step": 105 + }, + { + "epoch": 0.01, + "learning_rate": 0.0006272189349112426, + "loss": 1.9258, + "step": 106 + }, + { + "epoch": 0.01, + "learning_rate": 0.0006331360946745562, + "loss": 1.9277, + "step": 107 + }, + { + "epoch": 0.01, + "learning_rate": 0.0006390532544378699, + "loss": 1.8848, + "step": 108 + }, + { + "epoch": 0.01, + "learning_rate": 0.0006449704142011834, + "loss": 1.9395, + "step": 109 + }, + { + "epoch": 0.01, + "learning_rate": 0.0006508875739644971, + "loss": 1.8809, + "step": 110 + }, + { + "epoch": 0.01, + "learning_rate": 0.0006568047337278107, + "loss": 2.0469, + "step": 111 + }, + { + "epoch": 0.01, + "learning_rate": 0.0006627218934911243, + "loss": 2.0137, + "step": 112 + }, + { + "epoch": 0.01, + "learning_rate": 0.0006686390532544378, + "loss": 1.7812, + "step": 113 + }, + { + "epoch": 0.01, + "learning_rate": 0.0006745562130177515, + "loss": 1.9668, + "step": 114 + }, + { + "epoch": 0.01, + "learning_rate": 0.0006804733727810651, + "loss": 1.8066, + "step": 115 + }, + { + "epoch": 0.01, + "learning_rate": 0.0006863905325443787, + "loss": 1.8887, + "step": 116 + }, + { + "epoch": 0.01, + "learning_rate": 0.0006923076923076923, + "loss": 1.8926, + "step": 117 + }, + { + "epoch": 0.01, + "learning_rate": 0.000698224852071006, + "loss": 1.9531, + "step": 118 + }, + { + "epoch": 0.01, + "learning_rate": 0.0007041420118343196, + "loss": 2.0039, + "step": 119 + }, + { + "epoch": 0.01, + "learning_rate": 0.0007100591715976331, + "loss": 2.0, + "step": 120 + }, + { + "epoch": 0.01, + "learning_rate": 0.0007159763313609467, + "loss": 2.0391, + "step": 121 + }, + { + "epoch": 0.01, + "learning_rate": 0.0007218934911242604, + "loss": 1.875, + "step": 122 + }, + { + "epoch": 0.01, + "learning_rate": 0.000727810650887574, + "loss": 1.9707, + "step": 123 + }, + { + "epoch": 0.01, + "learning_rate": 0.0007337278106508876, + "loss": 1.7715, + "step": 124 + }, + { + "epoch": 0.01, + "learning_rate": 0.0007396449704142012, + "loss": 1.9121, + "step": 125 + }, + { + "epoch": 0.01, + "learning_rate": 0.0007455621301775149, + "loss": 1.9688, + "step": 126 + }, + { + "epoch": 0.01, + "learning_rate": 0.0007514792899408284, + "loss": 2.0977, + "step": 127 + }, + { + "epoch": 0.01, + "learning_rate": 0.000757396449704142, + "loss": 2.002, + "step": 128 + }, + { + "epoch": 0.01, + "learning_rate": 0.0007633136094674556, + "loss": 1.9375, + "step": 129 + }, + { + "epoch": 0.01, + "learning_rate": 0.0007692307692307693, + "loss": 1.8633, + "step": 130 + }, + { + "epoch": 0.01, + "learning_rate": 0.0007751479289940828, + "loss": 1.9336, + "step": 131 + }, + { + "epoch": 0.01, + "learning_rate": 0.0007810650887573965, + "loss": 1.8184, + "step": 132 + }, + { + "epoch": 0.01, + "learning_rate": 0.0007869822485207101, + "loss": 1.8672, + "step": 133 + }, + { + "epoch": 0.01, + "learning_rate": 0.0007928994082840238, + "loss": 1.877, + "step": 134 + }, + { + "epoch": 0.01, + "learning_rate": 0.0007988165680473373, + "loss": 1.8281, + "step": 135 + }, + { + "epoch": 0.01, + "learning_rate": 0.0008047337278106509, + "loss": 1.9043, + "step": 136 + }, + { + "epoch": 0.01, + "learning_rate": 0.0008106508875739645, + "loss": 1.9297, + "step": 137 + }, + { + "epoch": 0.01, + "learning_rate": 0.000816568047337278, + "loss": 1.832, + "step": 138 + }, + { + "epoch": 0.01, + "learning_rate": 0.0008224852071005917, + "loss": 1.9395, + "step": 139 + }, + { + "epoch": 0.01, + "learning_rate": 0.0008284023668639054, + "loss": 1.9434, + "step": 140 + }, + { + "epoch": 0.01, + "learning_rate": 0.000834319526627219, + "loss": 1.9043, + "step": 141 + }, + { + "epoch": 0.01, + "learning_rate": 0.0008402366863905325, + "loss": 1.8027, + "step": 142 + }, + { + "epoch": 0.01, + "learning_rate": 0.0008461538461538462, + "loss": 1.7734, + "step": 143 + }, + { + "epoch": 0.01, + "learning_rate": 0.0008520710059171598, + "loss": 1.9688, + "step": 144 + }, + { + "epoch": 0.01, + "learning_rate": 0.0008579881656804734, + "loss": 1.957, + "step": 145 + }, + { + "epoch": 0.01, + "learning_rate": 0.0008639053254437869, + "loss": 1.8926, + "step": 146 + }, + { + "epoch": 0.01, + "learning_rate": 0.0008698224852071006, + "loss": 2.0215, + "step": 147 + }, + { + "epoch": 0.01, + "learning_rate": 0.0008757396449704143, + "loss": 1.877, + "step": 148 + }, + { + "epoch": 0.01, + "learning_rate": 0.0008816568047337278, + "loss": 1.832, + "step": 149 + }, + { + "epoch": 0.01, + "learning_rate": 0.0008875739644970414, + "loss": 1.8672, + "step": 150 + }, + { + "epoch": 0.01, + "learning_rate": 0.0008934911242603551, + "loss": 1.9238, + "step": 151 + }, + { + "epoch": 0.01, + "learning_rate": 0.0008994082840236687, + "loss": 1.8848, + "step": 152 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009053254437869822, + "loss": 1.8848, + "step": 153 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009112426035502958, + "loss": 1.9551, + "step": 154 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009171597633136096, + "loss": 1.8457, + "step": 155 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009230769230769232, + "loss": 1.8652, + "step": 156 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009289940828402367, + "loss": 1.8223, + "step": 157 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009349112426035503, + "loss": 1.7852, + "step": 158 + }, + { + "epoch": 0.01, + "learning_rate": 0.000940828402366864, + "loss": 1.8828, + "step": 159 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009467455621301775, + "loss": 1.8301, + "step": 160 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009526627218934911, + "loss": 1.959, + "step": 161 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009585798816568047, + "loss": 1.8145, + "step": 162 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009644970414201185, + "loss": 1.8301, + "step": 163 + }, + { + "epoch": 0.01, + "learning_rate": 0.000970414201183432, + "loss": 1.8613, + "step": 164 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009763313609467456, + "loss": 1.8008, + "step": 165 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009822485207100593, + "loss": 1.8379, + "step": 166 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009881656804733728, + "loss": 1.8008, + "step": 167 + }, + { + "epoch": 0.01, + "learning_rate": 0.0009940828402366863, + "loss": 1.8438, + "step": 168 + }, + { + "epoch": 0.02, + "learning_rate": 0.001, + "loss": 1.9199, + "step": 169 + }, + { + "epoch": 0.02, + "learning_rate": 0.0010059171597633137, + "loss": 1.7637, + "step": 170 + }, + { + "epoch": 0.02, + "learning_rate": 0.0010118343195266272, + "loss": 1.9297, + "step": 171 + }, + { + "epoch": 0.02, + "learning_rate": 0.001017751479289941, + "loss": 2.0293, + "step": 172 + }, + { + "epoch": 0.02, + "learning_rate": 0.0010236686390532543, + "loss": 1.8398, + "step": 173 + }, + { + "epoch": 0.02, + "learning_rate": 0.001029585798816568, + "loss": 1.8438, + "step": 174 + }, + { + "epoch": 0.02, + "learning_rate": 0.0010355029585798817, + "loss": 1.7617, + "step": 175 + }, + { + "epoch": 0.02, + "learning_rate": 0.0010414201183431952, + "loss": 1.8945, + "step": 176 + }, + { + "epoch": 0.02, + "learning_rate": 0.001047337278106509, + "loss": 1.9238, + "step": 177 + }, + { + "epoch": 0.02, + "learning_rate": 0.0010532544378698226, + "loss": 1.7637, + "step": 178 + }, + { + "epoch": 0.02, + "learning_rate": 0.0010591715976331361, + "loss": 1.9199, + "step": 179 + }, + { + "epoch": 0.02, + "learning_rate": 0.0010650887573964499, + "loss": 1.7949, + "step": 180 + }, + { + "epoch": 0.02, + "learning_rate": 0.0010710059171597632, + "loss": 1.7852, + "step": 181 + }, + { + "epoch": 0.02, + "learning_rate": 0.0010769230769230769, + "loss": 1.8203, + "step": 182 + }, + { + "epoch": 0.02, + "learning_rate": 0.0010828402366863906, + "loss": 1.8027, + "step": 183 + }, + { + "epoch": 0.02, + "learning_rate": 0.001088757396449704, + "loss": 1.8203, + "step": 184 + }, + { + "epoch": 0.02, + "learning_rate": 0.0010946745562130178, + "loss": 1.9082, + "step": 185 + }, + { + "epoch": 0.02, + "learning_rate": 0.0011005917159763315, + "loss": 1.9902, + "step": 186 + }, + { + "epoch": 0.02, + "learning_rate": 0.001106508875739645, + "loss": 1.8945, + "step": 187 + }, + { + "epoch": 0.02, + "learning_rate": 0.0011124260355029586, + "loss": 1.7168, + "step": 188 + }, + { + "epoch": 0.02, + "learning_rate": 0.001118343195266272, + "loss": 1.7695, + "step": 189 + }, + { + "epoch": 0.02, + "learning_rate": 0.0011242603550295858, + "loss": 1.7793, + "step": 190 + }, + { + "epoch": 0.02, + "learning_rate": 0.0011301775147928995, + "loss": 1.9023, + "step": 191 + }, + { + "epoch": 0.02, + "learning_rate": 0.001136094674556213, + "loss": 1.793, + "step": 192 + }, + { + "epoch": 0.02, + "learning_rate": 0.0011420118343195267, + "loss": 1.8457, + "step": 193 + }, + { + "epoch": 0.02, + "learning_rate": 0.0011479289940828404, + "loss": 1.8984, + "step": 194 + }, + { + "epoch": 0.02, + "learning_rate": 0.0011538461538461537, + "loss": 1.8301, + "step": 195 + }, + { + "epoch": 0.02, + "learning_rate": 0.0011597633136094675, + "loss": 1.8223, + "step": 196 + }, + { + "epoch": 0.02, + "learning_rate": 0.001165680473372781, + "loss": 1.8711, + "step": 197 + }, + { + "epoch": 0.02, + "learning_rate": 0.0011715976331360947, + "loss": 1.8105, + "step": 198 + }, + { + "epoch": 0.02, + "learning_rate": 0.0011775147928994084, + "loss": 1.9648, + "step": 199 + }, + { + "epoch": 0.02, + "learning_rate": 0.001183431952662722, + "loss": 1.7695, + "step": 200 + }, + { + "epoch": 0.02, + "learning_rate": 0.0011893491124260356, + "loss": 1.8809, + "step": 201 + }, + { + "epoch": 0.02, + "learning_rate": 0.0011952662721893494, + "loss": 1.7676, + "step": 202 + }, + { + "epoch": 0.02, + "learning_rate": 0.0012011834319526626, + "loss": 1.959, + "step": 203 + }, + { + "epoch": 0.02, + "learning_rate": 0.0012071005917159764, + "loss": 1.7734, + "step": 204 + }, + { + "epoch": 0.02, + "learning_rate": 0.0012130177514792899, + "loss": 1.7676, + "step": 205 + }, + { + "epoch": 0.02, + "learning_rate": 0.0012189349112426036, + "loss": 1.8047, + "step": 206 + }, + { + "epoch": 0.02, + "learning_rate": 0.0012248520710059173, + "loss": 1.7715, + "step": 207 + }, + { + "epoch": 0.02, + "learning_rate": 0.0012307692307692308, + "loss": 1.875, + "step": 208 + }, + { + "epoch": 0.02, + "learning_rate": 0.0012366863905325445, + "loss": 1.8008, + "step": 209 + }, + { + "epoch": 0.02, + "learning_rate": 0.0012426035502958578, + "loss": 1.8281, + "step": 210 + }, + { + "epoch": 0.02, + "learning_rate": 0.0012485207100591715, + "loss": 1.8613, + "step": 211 + }, + { + "epoch": 0.02, + "learning_rate": 0.0012544378698224853, + "loss": 1.8496, + "step": 212 + }, + { + "epoch": 0.02, + "learning_rate": 0.0012603550295857988, + "loss": 1.8027, + "step": 213 + }, + { + "epoch": 0.02, + "learning_rate": 0.0012662721893491125, + "loss": 1.8828, + "step": 214 + }, + { + "epoch": 0.02, + "learning_rate": 0.0012721893491124262, + "loss": 1.9023, + "step": 215 + }, + { + "epoch": 0.02, + "learning_rate": 0.0012781065088757397, + "loss": 1.8711, + "step": 216 + }, + { + "epoch": 0.02, + "learning_rate": 0.0012840236686390532, + "loss": 1.8984, + "step": 217 + }, + { + "epoch": 0.02, + "learning_rate": 0.0012899408284023667, + "loss": 1.8711, + "step": 218 + }, + { + "epoch": 0.02, + "learning_rate": 0.0012958579881656804, + "loss": 1.8789, + "step": 219 + }, + { + "epoch": 0.02, + "learning_rate": 0.0013017751479289942, + "loss": 1.8262, + "step": 220 + }, + { + "epoch": 0.02, + "learning_rate": 0.0013076923076923077, + "loss": 1.8262, + "step": 221 + }, + { + "epoch": 0.02, + "learning_rate": 0.0013136094674556214, + "loss": 1.748, + "step": 222 + }, + { + "epoch": 0.02, + "learning_rate": 0.0013195266272189351, + "loss": 1.8906, + "step": 223 + }, + { + "epoch": 0.02, + "learning_rate": 0.0013254437869822486, + "loss": 1.8477, + "step": 224 + }, + { + "epoch": 0.02, + "learning_rate": 0.0013313609467455621, + "loss": 1.918, + "step": 225 + }, + { + "epoch": 0.02, + "learning_rate": 0.0013372781065088756, + "loss": 1.8691, + "step": 226 + }, + { + "epoch": 0.02, + "learning_rate": 0.0013431952662721893, + "loss": 1.9434, + "step": 227 + }, + { + "epoch": 0.02, + "learning_rate": 0.001349112426035503, + "loss": 1.7695, + "step": 228 + }, + { + "epoch": 0.02, + "learning_rate": 0.0013550295857988166, + "loss": 1.8477, + "step": 229 + }, + { + "epoch": 0.02, + "learning_rate": 0.0013609467455621303, + "loss": 1.8125, + "step": 230 + }, + { + "epoch": 0.02, + "learning_rate": 0.001366863905325444, + "loss": 1.9004, + "step": 231 + }, + { + "epoch": 0.02, + "learning_rate": 0.0013727810650887573, + "loss": 1.8477, + "step": 232 + }, + { + "epoch": 0.02, + "learning_rate": 0.001378698224852071, + "loss": 1.877, + "step": 233 + }, + { + "epoch": 0.02, + "learning_rate": 0.0013846153846153845, + "loss": 1.7969, + "step": 234 + }, + { + "epoch": 0.02, + "learning_rate": 0.0013905325443786982, + "loss": 1.8496, + "step": 235 + }, + { + "epoch": 0.02, + "learning_rate": 0.001396449704142012, + "loss": 1.8887, + "step": 236 + }, + { + "epoch": 0.02, + "learning_rate": 0.0014023668639053255, + "loss": 1.9023, + "step": 237 + }, + { + "epoch": 0.02, + "learning_rate": 0.0014082840236686392, + "loss": 1.7559, + "step": 238 + }, + { + "epoch": 0.02, + "learning_rate": 0.0014142011834319525, + "loss": 1.6758, + "step": 239 + }, + { + "epoch": 0.02, + "learning_rate": 0.0014201183431952662, + "loss": 1.8828, + "step": 240 + }, + { + "epoch": 0.02, + "learning_rate": 0.00142603550295858, + "loss": 1.7461, + "step": 241 + }, + { + "epoch": 0.02, + "learning_rate": 0.0014319526627218934, + "loss": 1.7773, + "step": 242 + }, + { + "epoch": 0.02, + "learning_rate": 0.0014378698224852072, + "loss": 1.8086, + "step": 243 + }, + { + "epoch": 0.02, + "learning_rate": 0.0014437869822485209, + "loss": 2.002, + "step": 244 + }, + { + "epoch": 0.02, + "learning_rate": 0.0014497041420118344, + "loss": 1.7988, + "step": 245 + }, + { + "epoch": 0.02, + "learning_rate": 0.001455621301775148, + "loss": 1.8828, + "step": 246 + }, + { + "epoch": 0.02, + "learning_rate": 0.0014615384615384614, + "loss": 1.7305, + "step": 247 + }, + { + "epoch": 0.02, + "learning_rate": 0.001467455621301775, + "loss": 1.7578, + "step": 248 + }, + { + "epoch": 0.02, + "learning_rate": 0.0014733727810650888, + "loss": 1.8789, + "step": 249 + }, + { + "epoch": 0.02, + "learning_rate": 0.0014792899408284023, + "loss": 1.7246, + "step": 250 + }, + { + "epoch": 0.02, + "learning_rate": 0.001485207100591716, + "loss": 1.8027, + "step": 251 + }, + { + "epoch": 0.02, + "learning_rate": 0.0014911242603550298, + "loss": 1.7441, + "step": 252 + }, + { + "epoch": 0.02, + "learning_rate": 0.0014970414201183433, + "loss": 1.6816, + "step": 253 + }, + { + "epoch": 0.02, + "learning_rate": 0.0015029585798816568, + "loss": 1.8633, + "step": 254 + }, + { + "epoch": 0.02, + "learning_rate": 0.0015088757396449703, + "loss": 1.8711, + "step": 255 + }, + { + "epoch": 0.02, + "learning_rate": 0.001514792899408284, + "loss": 1.8164, + "step": 256 + }, + { + "epoch": 0.02, + "learning_rate": 0.0015207100591715977, + "loss": 1.8242, + "step": 257 + }, + { + "epoch": 0.02, + "learning_rate": 0.0015266272189349112, + "loss": 1.7559, + "step": 258 + }, + { + "epoch": 0.02, + "learning_rate": 0.001532544378698225, + "loss": 1.8984, + "step": 259 + }, + { + "epoch": 0.02, + "learning_rate": 0.0015384615384615387, + "loss": 1.7012, + "step": 260 + }, + { + "epoch": 0.02, + "learning_rate": 0.001544378698224852, + "loss": 1.7344, + "step": 261 + }, + { + "epoch": 0.02, + "learning_rate": 0.0015502958579881657, + "loss": 1.7363, + "step": 262 + }, + { + "epoch": 0.02, + "learning_rate": 0.0015562130177514792, + "loss": 1.7109, + "step": 263 + }, + { + "epoch": 0.02, + "learning_rate": 0.001562130177514793, + "loss": 1.8008, + "step": 264 + }, + { + "epoch": 0.02, + "learning_rate": 0.0015680473372781066, + "loss": 1.8633, + "step": 265 + }, + { + "epoch": 0.02, + "learning_rate": 0.0015739644970414201, + "loss": 1.7773, + "step": 266 + }, + { + "epoch": 0.02, + "learning_rate": 0.0015798816568047339, + "loss": 1.9023, + "step": 267 + }, + { + "epoch": 0.02, + "learning_rate": 0.0015857988165680476, + "loss": 1.8711, + "step": 268 + }, + { + "epoch": 0.02, + "learning_rate": 0.0015917159763313609, + "loss": 1.7285, + "step": 269 + }, + { + "epoch": 0.02, + "learning_rate": 0.0015976331360946746, + "loss": 1.8555, + "step": 270 + }, + { + "epoch": 0.02, + "learning_rate": 0.001603550295857988, + "loss": 1.8867, + "step": 271 + }, + { + "epoch": 0.02, + "learning_rate": 0.0016094674556213018, + "loss": 1.8418, + "step": 272 + }, + { + "epoch": 0.02, + "learning_rate": 0.0016153846153846155, + "loss": 1.8379, + "step": 273 + }, + { + "epoch": 0.02, + "learning_rate": 0.001621301775147929, + "loss": 1.8789, + "step": 274 + }, + { + "epoch": 0.02, + "learning_rate": 0.0016272189349112428, + "loss": 1.7383, + "step": 275 + }, + { + "epoch": 0.02, + "learning_rate": 0.001633136094674556, + "loss": 1.7598, + "step": 276 + }, + { + "epoch": 0.02, + "learning_rate": 0.0016390532544378698, + "loss": 1.7949, + "step": 277 + }, + { + "epoch": 0.02, + "learning_rate": 0.0016449704142011835, + "loss": 1.9961, + "step": 278 + }, + { + "epoch": 0.02, + "learning_rate": 0.001650887573964497, + "loss": 1.8105, + "step": 279 + }, + { + "epoch": 0.02, + "learning_rate": 0.0016568047337278107, + "loss": 1.9062, + "step": 280 + }, + { + "epoch": 0.02, + "learning_rate": 0.0016627218934911244, + "loss": 1.7773, + "step": 281 + }, + { + "epoch": 0.03, + "learning_rate": 0.001668639053254438, + "loss": 1.7188, + "step": 282 + }, + { + "epoch": 0.03, + "learning_rate": 0.0016745562130177514, + "loss": 1.8555, + "step": 283 + }, + { + "epoch": 0.03, + "learning_rate": 0.001680473372781065, + "loss": 1.8691, + "step": 284 + }, + { + "epoch": 0.03, + "learning_rate": 0.0016863905325443787, + "loss": 1.832, + "step": 285 + }, + { + "epoch": 0.03, + "learning_rate": 0.0016923076923076924, + "loss": 1.8477, + "step": 286 + }, + { + "epoch": 0.03, + "learning_rate": 0.001698224852071006, + "loss": 1.8984, + "step": 287 + }, + { + "epoch": 0.03, + "learning_rate": 0.0017041420118343196, + "loss": 1.8555, + "step": 288 + }, + { + "epoch": 0.03, + "learning_rate": 0.0017100591715976333, + "loss": 1.832, + "step": 289 + }, + { + "epoch": 0.03, + "learning_rate": 0.0017159763313609468, + "loss": 1.8691, + "step": 290 + }, + { + "epoch": 0.03, + "learning_rate": 0.0017218934911242603, + "loss": 1.7168, + "step": 291 + }, + { + "epoch": 0.03, + "learning_rate": 0.0017278106508875739, + "loss": 1.7441, + "step": 292 + }, + { + "epoch": 0.03, + "learning_rate": 0.0017337278106508876, + "loss": 1.709, + "step": 293 + }, + { + "epoch": 0.03, + "learning_rate": 0.0017396449704142013, + "loss": 1.7793, + "step": 294 + }, + { + "epoch": 0.03, + "learning_rate": 0.0017455621301775148, + "loss": 1.8633, + "step": 295 + }, + { + "epoch": 0.03, + "learning_rate": 0.0017514792899408285, + "loss": 1.7305, + "step": 296 + }, + { + "epoch": 0.03, + "learning_rate": 0.0017573964497041422, + "loss": 1.8164, + "step": 297 + }, + { + "epoch": 0.03, + "learning_rate": 0.0017633136094674555, + "loss": 1.8164, + "step": 298 + }, + { + "epoch": 0.03, + "learning_rate": 0.0017692307692307693, + "loss": 1.8359, + "step": 299 + }, + { + "epoch": 0.03, + "learning_rate": 0.0017751479289940828, + "loss": 1.8496, + "step": 300 + }, + { + "epoch": 0.03, + "learning_rate": 0.0017810650887573965, + "loss": 1.8613, + "step": 301 + }, + { + "epoch": 0.03, + "learning_rate": 0.0017869822485207102, + "loss": 1.7598, + "step": 302 + }, + { + "epoch": 0.03, + "learning_rate": 0.0017928994082840237, + "loss": 1.8633, + "step": 303 + }, + { + "epoch": 0.03, + "learning_rate": 0.0017988165680473374, + "loss": 1.8867, + "step": 304 + }, + { + "epoch": 0.03, + "learning_rate": 0.0018047337278106507, + "loss": 1.8047, + "step": 305 + }, + { + "epoch": 0.03, + "learning_rate": 0.0018106508875739644, + "loss": 1.8086, + "step": 306 + }, + { + "epoch": 0.03, + "learning_rate": 0.0018165680473372782, + "loss": 1.7617, + "step": 307 + }, + { + "epoch": 0.03, + "learning_rate": 0.0018224852071005917, + "loss": 1.8008, + "step": 308 + }, + { + "epoch": 0.03, + "learning_rate": 0.0018284023668639054, + "loss": 1.9238, + "step": 309 + }, + { + "epoch": 0.03, + "learning_rate": 0.001834319526627219, + "loss": 1.8242, + "step": 310 + }, + { + "epoch": 0.03, + "learning_rate": 0.0018402366863905326, + "loss": 1.9336, + "step": 311 + }, + { + "epoch": 0.03, + "learning_rate": 0.0018461538461538463, + "loss": 1.8535, + "step": 312 + }, + { + "epoch": 0.03, + "learning_rate": 0.0018520710059171596, + "loss": 1.7441, + "step": 313 + }, + { + "epoch": 0.03, + "learning_rate": 0.0018579881656804733, + "loss": 1.8691, + "step": 314 + }, + { + "epoch": 0.03, + "learning_rate": 0.001863905325443787, + "loss": 1.8828, + "step": 315 + }, + { + "epoch": 0.03, + "learning_rate": 0.0018698224852071006, + "loss": 1.7598, + "step": 316 + }, + { + "epoch": 0.03, + "learning_rate": 0.0018757396449704143, + "loss": 1.7695, + "step": 317 + }, + { + "epoch": 0.03, + "learning_rate": 0.001881656804733728, + "loss": 1.8184, + "step": 318 + }, + { + "epoch": 0.03, + "learning_rate": 0.0018875739644970415, + "loss": 1.8477, + "step": 319 + }, + { + "epoch": 0.03, + "learning_rate": 0.001893491124260355, + "loss": 1.8301, + "step": 320 + }, + { + "epoch": 0.03, + "learning_rate": 0.0018994082840236685, + "loss": 1.8125, + "step": 321 + }, + { + "epoch": 0.03, + "learning_rate": 0.0019053254437869822, + "loss": 1.9004, + "step": 322 + }, + { + "epoch": 0.03, + "learning_rate": 0.001911242603550296, + "loss": 1.8672, + "step": 323 + }, + { + "epoch": 0.03, + "learning_rate": 0.0019171597633136095, + "loss": 1.8047, + "step": 324 + }, + { + "epoch": 0.03, + "learning_rate": 0.0019230769230769232, + "loss": 1.6836, + "step": 325 + }, + { + "epoch": 0.03, + "learning_rate": 0.001928994082840237, + "loss": 1.8789, + "step": 326 + }, + { + "epoch": 0.03, + "learning_rate": 0.0019349112426035502, + "loss": 1.8125, + "step": 327 + }, + { + "epoch": 0.03, + "learning_rate": 0.001940828402366864, + "loss": 1.7109, + "step": 328 + }, + { + "epoch": 0.03, + "learning_rate": 0.0019467455621301774, + "loss": 1.7441, + "step": 329 + }, + { + "epoch": 0.03, + "learning_rate": 0.0019526627218934911, + "loss": 1.998, + "step": 330 + }, + { + "epoch": 0.03, + "learning_rate": 0.0019585798816568046, + "loss": 1.8613, + "step": 331 + }, + { + "epoch": 0.03, + "learning_rate": 0.0019644970414201186, + "loss": 1.8691, + "step": 332 + }, + { + "epoch": 0.03, + "learning_rate": 0.001970414201183432, + "loss": 1.7637, + "step": 333 + }, + { + "epoch": 0.03, + "learning_rate": 0.0019763313609467456, + "loss": 1.7266, + "step": 334 + }, + { + "epoch": 0.03, + "learning_rate": 0.001982248520710059, + "loss": 1.7871, + "step": 335 + }, + { + "epoch": 0.03, + "learning_rate": 0.0019881656804733726, + "loss": 1.8301, + "step": 336 + }, + { + "epoch": 0.03, + "learning_rate": 0.0019940828402366865, + "loss": 1.8828, + "step": 337 + }, + { + "epoch": 0.03, + "learning_rate": 0.002, + "loss": 1.832, + "step": 338 + }, + { + "epoch": 0.03, + "learning_rate": 0.0019999999584876115, + "loss": 1.791, + "step": 339 + }, + { + "epoch": 0.03, + "learning_rate": 0.0019999998339504496, + "loss": 1.7656, + "step": 340 + }, + { + "epoch": 0.03, + "learning_rate": 0.0019999996263885236, + "loss": 1.8145, + "step": 341 + }, + { + "epoch": 0.03, + "learning_rate": 0.0019999993358018526, + "loss": 1.8633, + "step": 342 + }, + { + "epoch": 0.03, + "learning_rate": 0.001999998962190459, + "loss": 1.8906, + "step": 343 + }, + { + "epoch": 0.03, + "learning_rate": 0.0019999985055543746, + "loss": 1.8398, + "step": 344 + }, + { + "epoch": 0.03, + "learning_rate": 0.001999997965893637, + "loss": 1.8926, + "step": 345 + }, + { + "epoch": 0.03, + "learning_rate": 0.0019999973432082915, + "loss": 1.8105, + "step": 346 + }, + { + "epoch": 0.03, + "learning_rate": 0.001999996637498389, + "loss": 1.8242, + "step": 347 + }, + { + "epoch": 0.03, + "learning_rate": 0.0019999958487639893, + "loss": 1.8145, + "step": 348 + }, + { + "epoch": 0.03, + "learning_rate": 0.001999994977005157, + "loss": 1.8926, + "step": 349 + }, + { + "epoch": 0.03, + "learning_rate": 0.0019999940222219645, + "loss": 1.8027, + "step": 350 + }, + { + "epoch": 0.03, + "learning_rate": 0.0019999929844144912, + "loss": 1.7754, + "step": 351 + }, + { + "epoch": 0.03, + "learning_rate": 0.0019999918635828233, + "loss": 1.8809, + "step": 352 + }, + { + "epoch": 0.03, + "learning_rate": 0.001999990659727054, + "loss": 1.6914, + "step": 353 + }, + { + "epoch": 0.03, + "learning_rate": 0.0019999893728472828, + "loss": 1.8145, + "step": 354 + }, + { + "epoch": 0.03, + "learning_rate": 0.001999988002943617, + "loss": 1.8574, + "step": 355 + }, + { + "epoch": 0.03, + "learning_rate": 0.0019999865500161702, + "loss": 1.9062, + "step": 356 + }, + { + "epoch": 0.03, + "learning_rate": 0.001999985014065063, + "loss": 1.7656, + "step": 357 + }, + { + "epoch": 0.03, + "learning_rate": 0.0019999833950904225, + "loss": 1.8027, + "step": 358 + }, + { + "epoch": 0.03, + "learning_rate": 0.001999981693092384, + "loss": 1.9082, + "step": 359 + }, + { + "epoch": 0.03, + "learning_rate": 0.001999979908071088, + "loss": 1.834, + "step": 360 + }, + { + "epoch": 0.03, + "learning_rate": 0.0019999780400266837, + "loss": 1.7715, + "step": 361 + }, + { + "epoch": 0.03, + "learning_rate": 0.0019999760889593252, + "loss": 1.9512, + "step": 362 + }, + { + "epoch": 0.03, + "learning_rate": 0.0019999740548691746, + "loss": 1.6934, + "step": 363 + }, + { + "epoch": 0.03, + "learning_rate": 0.0019999719377564013, + "loss": 1.9688, + "step": 364 + }, + { + "epoch": 0.03, + "learning_rate": 0.0019999697376211806, + "loss": 1.7852, + "step": 365 + }, + { + "epoch": 0.03, + "learning_rate": 0.0019999674544636955, + "loss": 1.7949, + "step": 366 + }, + { + "epoch": 0.03, + "learning_rate": 0.001999965088284135, + "loss": 1.7578, + "step": 367 + }, + { + "epoch": 0.03, + "learning_rate": 0.0019999626390826966, + "loss": 1.8867, + "step": 368 + }, + { + "epoch": 0.03, + "learning_rate": 0.0019999601068595827, + "loss": 1.8262, + "step": 369 + }, + { + "epoch": 0.03, + "learning_rate": 0.0019999574916150036, + "loss": 1.8574, + "step": 370 + }, + { + "epoch": 0.03, + "learning_rate": 0.0019999547933491767, + "loss": 1.8516, + "step": 371 + }, + { + "epoch": 0.03, + "learning_rate": 0.001999952012062326, + "loss": 1.793, + "step": 372 + }, + { + "epoch": 0.03, + "learning_rate": 0.0019999491477546826, + "loss": 1.8867, + "step": 373 + }, + { + "epoch": 0.03, + "learning_rate": 0.0019999462004264843, + "loss": 1.6699, + "step": 374 + }, + { + "epoch": 0.03, + "learning_rate": 0.0019999431700779753, + "loss": 1.7324, + "step": 375 + }, + { + "epoch": 0.03, + "learning_rate": 0.0019999400567094075, + "loss": 1.8242, + "step": 376 + }, + { + "epoch": 0.03, + "learning_rate": 0.0019999368603210396, + "loss": 1.8691, + "step": 377 + }, + { + "epoch": 0.03, + "learning_rate": 0.001999933580913137, + "loss": 1.8027, + "step": 378 + }, + { + "epoch": 0.03, + "learning_rate": 0.001999930218485971, + "loss": 1.7188, + "step": 379 + }, + { + "epoch": 0.03, + "learning_rate": 0.0019999267730398222, + "loss": 1.7715, + "step": 380 + }, + { + "epoch": 0.03, + "learning_rate": 0.001999923244574976, + "loss": 1.8594, + "step": 381 + }, + { + "epoch": 0.03, + "learning_rate": 0.0019999196330917246, + "loss": 1.8203, + "step": 382 + }, + { + "epoch": 0.03, + "learning_rate": 0.001999915938590369, + "loss": 1.791, + "step": 383 + }, + { + "epoch": 0.03, + "learning_rate": 0.0019999121610712154, + "loss": 1.7246, + "step": 384 + }, + { + "epoch": 0.03, + "learning_rate": 0.0019999083005345776, + "loss": 1.8477, + "step": 385 + }, + { + "epoch": 0.03, + "learning_rate": 0.001999904356980776, + "loss": 1.9648, + "step": 386 + }, + { + "epoch": 0.03, + "learning_rate": 0.0019999003304101383, + "loss": 1.7988, + "step": 387 + }, + { + "epoch": 0.03, + "learning_rate": 0.0019998962208229985, + "loss": 1.8672, + "step": 388 + }, + { + "epoch": 0.03, + "learning_rate": 0.0019998920282196975, + "loss": 1.7578, + "step": 389 + }, + { + "epoch": 0.03, + "learning_rate": 0.001999887752600584, + "loss": 1.8105, + "step": 390 + }, + { + "epoch": 0.03, + "learning_rate": 0.0019998833939660126, + "loss": 1.8477, + "step": 391 + }, + { + "epoch": 0.03, + "learning_rate": 0.0019998789523163456, + "loss": 1.7539, + "step": 392 + }, + { + "epoch": 0.03, + "learning_rate": 0.001999874427651951, + "loss": 1.8262, + "step": 393 + }, + { + "epoch": 0.04, + "learning_rate": 0.0019998698199732053, + "loss": 1.8027, + "step": 394 + }, + { + "epoch": 0.04, + "learning_rate": 0.0019998651292804904, + "loss": 1.7188, + "step": 395 + }, + { + "epoch": 0.04, + "learning_rate": 0.0019998603555741964, + "loss": 1.7109, + "step": 396 + }, + { + "epoch": 0.04, + "learning_rate": 0.0019998554988547193, + "loss": 1.8789, + "step": 397 + }, + { + "epoch": 0.04, + "learning_rate": 0.001999850559122462, + "loss": 1.9414, + "step": 398 + }, + { + "epoch": 0.04, + "learning_rate": 0.001999845536377835, + "loss": 1.8828, + "step": 399 + }, + { + "epoch": 0.04, + "learning_rate": 0.0019998404306212554, + "loss": 1.7051, + "step": 400 + }, + { + "epoch": 0.04, + "learning_rate": 0.0019998352418531466, + "loss": 1.7031, + "step": 401 + }, + { + "epoch": 0.04, + "learning_rate": 0.00199982997007394, + "loss": 1.7051, + "step": 402 + }, + { + "epoch": 0.04, + "learning_rate": 0.0019998246152840727, + "loss": 1.7441, + "step": 403 + }, + { + "epoch": 0.04, + "learning_rate": 0.00199981917748399, + "loss": 1.7734, + "step": 404 + }, + { + "epoch": 0.04, + "learning_rate": 0.0019998136566741426, + "loss": 1.9043, + "step": 405 + }, + { + "epoch": 0.04, + "learning_rate": 0.0019998080528549895, + "loss": 1.9492, + "step": 406 + }, + { + "epoch": 0.04, + "learning_rate": 0.0019998023660269958, + "loss": 1.9219, + "step": 407 + }, + { + "epoch": 0.04, + "learning_rate": 0.001999796596190633, + "loss": 1.8906, + "step": 408 + }, + { + "epoch": 0.04, + "learning_rate": 0.001999790743346381, + "loss": 1.8691, + "step": 409 + }, + { + "epoch": 0.04, + "learning_rate": 0.0019997848074947257, + "loss": 1.7305, + "step": 410 + }, + { + "epoch": 0.04, + "learning_rate": 0.001999778788636159, + "loss": 1.6387, + "step": 411 + }, + { + "epoch": 0.04, + "learning_rate": 0.001999772686771182, + "loss": 1.7773, + "step": 412 + }, + { + "epoch": 0.04, + "learning_rate": 0.0019997665019002996, + "loss": 1.7812, + "step": 413 + }, + { + "epoch": 0.04, + "learning_rate": 0.001999760234024027, + "loss": 1.9238, + "step": 414 + }, + { + "epoch": 0.04, + "learning_rate": 0.0019997538831428834, + "loss": 1.748, + "step": 415 + }, + { + "epoch": 0.04, + "learning_rate": 0.001999747449257397, + "loss": 1.8438, + "step": 416 + }, + { + "epoch": 0.04, + "learning_rate": 0.001999740932368101, + "loss": 1.7949, + "step": 417 + }, + { + "epoch": 0.04, + "learning_rate": 0.001999734332475537, + "loss": 1.8262, + "step": 418 + }, + { + "epoch": 0.04, + "learning_rate": 0.001999727649580253, + "loss": 1.7871, + "step": 419 + }, + { + "epoch": 0.04, + "learning_rate": 0.001999720883682804, + "loss": 1.8379, + "step": 420 + }, + { + "epoch": 0.04, + "learning_rate": 0.0019997140347837515, + "loss": 1.7617, + "step": 421 + }, + { + "epoch": 0.04, + "learning_rate": 0.0019997071028836637, + "loss": 1.8281, + "step": 422 + }, + { + "epoch": 0.04, + "learning_rate": 0.0019997000879831165, + "loss": 1.6836, + "step": 423 + }, + { + "epoch": 0.04, + "learning_rate": 0.001999692990082693, + "loss": 1.7871, + "step": 424 + }, + { + "epoch": 0.04, + "learning_rate": 0.001999685809182981, + "loss": 1.6562, + "step": 425 + }, + { + "epoch": 0.04, + "learning_rate": 0.0019996785452845783, + "loss": 1.7305, + "step": 426 + }, + { + "epoch": 0.04, + "learning_rate": 0.001999671198388087, + "loss": 1.8125, + "step": 427 + }, + { + "epoch": 0.04, + "learning_rate": 0.001999663768494117, + "loss": 1.6777, + "step": 428 + }, + { + "epoch": 0.04, + "learning_rate": 0.0019996562556032858, + "loss": 1.8223, + "step": 429 + }, + { + "epoch": 0.04, + "learning_rate": 0.0019996486597162163, + "loss": 1.7188, + "step": 430 + }, + { + "epoch": 0.04, + "learning_rate": 0.0019996409808335403, + "loss": 1.6855, + "step": 431 + }, + { + "epoch": 0.04, + "learning_rate": 0.0019996332189558944, + "loss": 1.8613, + "step": 432 + }, + { + "epoch": 0.04, + "learning_rate": 0.0019996253740839232, + "loss": 1.7051, + "step": 433 + }, + { + "epoch": 0.04, + "learning_rate": 0.0019996174462182784, + "loss": 1.8359, + "step": 434 + }, + { + "epoch": 0.04, + "learning_rate": 0.001999609435359618, + "loss": 1.8438, + "step": 435 + }, + { + "epoch": 0.04, + "learning_rate": 0.001999601341508607, + "loss": 1.8555, + "step": 436 + }, + { + "epoch": 0.04, + "learning_rate": 0.001999593164665917, + "loss": 1.7734, + "step": 437 + }, + { + "epoch": 0.04, + "learning_rate": 0.0019995849048322282, + "loss": 1.8477, + "step": 438 + }, + { + "epoch": 0.04, + "learning_rate": 0.001999576562008225, + "loss": 1.7402, + "step": 439 + }, + { + "epoch": 0.04, + "learning_rate": 0.0019995681361946005, + "loss": 1.8535, + "step": 440 + }, + { + "epoch": 0.04, + "learning_rate": 0.0019995596273920543, + "loss": 1.8516, + "step": 441 + }, + { + "epoch": 0.04, + "learning_rate": 0.001999551035601293, + "loss": 1.8301, + "step": 442 + }, + { + "epoch": 0.04, + "learning_rate": 0.00199954236082303, + "loss": 1.7812, + "step": 443 + }, + { + "epoch": 0.04, + "learning_rate": 0.001999533603057985, + "loss": 1.8672, + "step": 444 + }, + { + "epoch": 0.04, + "learning_rate": 0.0019995247623068855, + "loss": 1.7949, + "step": 445 + }, + { + "epoch": 0.04, + "learning_rate": 0.001999515838570465, + "loss": 1.7441, + "step": 446 + }, + { + "epoch": 0.04, + "learning_rate": 0.0019995068318494655, + "loss": 1.7773, + "step": 447 + }, + { + "epoch": 0.04, + "learning_rate": 0.0019994977421446338, + "loss": 1.707, + "step": 448 + }, + { + "epoch": 0.04, + "learning_rate": 0.001999488569456725, + "loss": 1.7617, + "step": 449 + }, + { + "epoch": 0.04, + "learning_rate": 0.0019994793137865004, + "loss": 1.7129, + "step": 450 + }, + { + "epoch": 0.04, + "learning_rate": 0.0019994699751347284, + "loss": 1.7324, + "step": 451 + }, + { + "epoch": 0.04, + "learning_rate": 0.001999460553502185, + "loss": 1.8203, + "step": 452 + }, + { + "epoch": 0.04, + "learning_rate": 0.001999451048889651, + "loss": 1.7227, + "step": 453 + }, + { + "epoch": 0.04, + "learning_rate": 0.0019994414612979176, + "loss": 1.8574, + "step": 454 + }, + { + "epoch": 0.04, + "learning_rate": 0.001999431790727779, + "loss": 1.8223, + "step": 455 + }, + { + "epoch": 0.04, + "learning_rate": 0.001999422037180039, + "loss": 1.8281, + "step": 456 + }, + { + "epoch": 0.04, + "learning_rate": 0.001999412200655507, + "loss": 1.8398, + "step": 457 + }, + { + "epoch": 0.04, + "learning_rate": 0.001999402281155, + "loss": 1.748, + "step": 458 + }, + { + "epoch": 0.04, + "learning_rate": 0.0019993922786793413, + "loss": 1.7891, + "step": 459 + }, + { + "epoch": 0.04, + "learning_rate": 0.0019993821932293614, + "loss": 1.8691, + "step": 460 + }, + { + "epoch": 0.04, + "learning_rate": 0.0019993720248058975, + "loss": 1.8223, + "step": 461 + }, + { + "epoch": 0.04, + "learning_rate": 0.0019993617734097944, + "loss": 1.7031, + "step": 462 + }, + { + "epoch": 0.04, + "learning_rate": 0.0019993514390419026, + "loss": 1.7109, + "step": 463 + }, + { + "epoch": 0.04, + "learning_rate": 0.0019993410217030807, + "loss": 1.7734, + "step": 464 + }, + { + "epoch": 0.04, + "learning_rate": 0.0019993305213941927, + "loss": 1.8164, + "step": 465 + }, + { + "epoch": 0.04, + "learning_rate": 0.0019993199381161115, + "loss": 1.8574, + "step": 466 + }, + { + "epoch": 0.04, + "learning_rate": 0.0019993092718697146, + "loss": 1.8027, + "step": 467 + }, + { + "epoch": 0.04, + "learning_rate": 0.0019992985226558882, + "loss": 1.75, + "step": 468 + }, + { + "epoch": 0.04, + "learning_rate": 0.0019992876904755255, + "loss": 1.8105, + "step": 469 + }, + { + "epoch": 0.04, + "learning_rate": 0.001999276775329524, + "loss": 1.7695, + "step": 470 + }, + { + "epoch": 0.04, + "learning_rate": 0.001999265777218792, + "loss": 1.7246, + "step": 471 + }, + { + "epoch": 0.04, + "learning_rate": 0.001999254696144241, + "loss": 1.7422, + "step": 472 + }, + { + "epoch": 0.04, + "learning_rate": 0.0019992435321067917, + "loss": 1.9316, + "step": 473 + }, + { + "epoch": 0.04, + "learning_rate": 0.001999232285107371, + "loss": 1.6855, + "step": 474 + }, + { + "epoch": 0.04, + "learning_rate": 0.0019992209551469124, + "loss": 1.748, + "step": 475 + }, + { + "epoch": 0.04, + "learning_rate": 0.001999209542226357, + "loss": 1.8281, + "step": 476 + }, + { + "epoch": 0.04, + "learning_rate": 0.001999198046346652, + "loss": 1.7578, + "step": 477 + }, + { + "epoch": 0.04, + "learning_rate": 0.0019991864675087518, + "loss": 1.7988, + "step": 478 + }, + { + "epoch": 0.04, + "learning_rate": 0.001999174805713618, + "loss": 1.752, + "step": 479 + }, + { + "epoch": 0.04, + "learning_rate": 0.0019991630609622188, + "loss": 1.7637, + "step": 480 + }, + { + "epoch": 0.04, + "learning_rate": 0.001999151233255529, + "loss": 1.7305, + "step": 481 + }, + { + "epoch": 0.04, + "learning_rate": 0.0019991393225945306, + "loss": 1.7383, + "step": 482 + }, + { + "epoch": 0.04, + "learning_rate": 0.001999127328980213, + "loss": 1.8477, + "step": 483 + }, + { + "epoch": 0.04, + "learning_rate": 0.0019991152524135715, + "loss": 1.7109, + "step": 484 + }, + { + "epoch": 0.04, + "learning_rate": 0.001999103092895609, + "loss": 1.623, + "step": 485 + }, + { + "epoch": 0.04, + "learning_rate": 0.0019990908504273347, + "loss": 1.7031, + "step": 486 + }, + { + "epoch": 0.04, + "learning_rate": 0.0019990785250097653, + "loss": 1.752, + "step": 487 + }, + { + "epoch": 0.04, + "learning_rate": 0.0019990661166439244, + "loss": 1.7207, + "step": 488 + }, + { + "epoch": 0.04, + "learning_rate": 0.0019990536253308416, + "loss": 1.6895, + "step": 489 + }, + { + "epoch": 0.04, + "learning_rate": 0.001999041051071554, + "loss": 1.7129, + "step": 490 + }, + { + "epoch": 0.04, + "learning_rate": 0.0019990283938671064, + "loss": 1.8516, + "step": 491 + }, + { + "epoch": 0.04, + "learning_rate": 0.0019990156537185487, + "loss": 1.7578, + "step": 492 + }, + { + "epoch": 0.04, + "learning_rate": 0.0019990028306269393, + "loss": 1.6152, + "step": 493 + }, + { + "epoch": 0.04, + "learning_rate": 0.001998989924593343, + "loss": 1.7539, + "step": 494 + }, + { + "epoch": 0.04, + "learning_rate": 0.0019989769356188298, + "loss": 1.752, + "step": 495 + }, + { + "epoch": 0.04, + "learning_rate": 0.00199896386370448, + "loss": 1.6875, + "step": 496 + }, + { + "epoch": 0.04, + "learning_rate": 0.001998950708851378, + "loss": 1.7617, + "step": 497 + }, + { + "epoch": 0.04, + "learning_rate": 0.001998937471060616, + "loss": 1.8047, + "step": 498 + }, + { + "epoch": 0.04, + "learning_rate": 0.001998924150333293, + "loss": 1.7109, + "step": 499 + }, + { + "epoch": 0.04, + "learning_rate": 0.0019989107466705154, + "loss": 1.8203, + "step": 500 + }, + { + "epoch": 0.04, + "learning_rate": 0.001998897260073395, + "loss": 1.7227, + "step": 501 + }, + { + "epoch": 0.04, + "learning_rate": 0.001998883690543053, + "loss": 1.6875, + "step": 502 + }, + { + "epoch": 0.04, + "learning_rate": 0.0019988700380806153, + "loss": 1.6836, + "step": 503 + }, + { + "epoch": 0.04, + "learning_rate": 0.001998856302687215, + "loss": 1.8027, + "step": 504 + }, + { + "epoch": 0.04, + "learning_rate": 0.001998842484363993, + "loss": 1.7012, + "step": 505 + }, + { + "epoch": 0.05, + "learning_rate": 0.0019988285831120966, + "loss": 1.8359, + "step": 506 + }, + { + "epoch": 0.05, + "learning_rate": 0.0019988145989326793, + "loss": 1.8145, + "step": 507 + }, + { + "epoch": 0.05, + "learning_rate": 0.0019988005318269026, + "loss": 1.8203, + "step": 508 + }, + { + "epoch": 0.05, + "learning_rate": 0.001998786381795935, + "loss": 1.8008, + "step": 509 + }, + { + "epoch": 0.05, + "learning_rate": 0.00199877214884095, + "loss": 1.8203, + "step": 510 + }, + { + "epoch": 0.05, + "learning_rate": 0.00199875783296313, + "loss": 1.6836, + "step": 511 + }, + { + "epoch": 0.05, + "learning_rate": 0.001998743434163664, + "loss": 1.9004, + "step": 512 + }, + { + "epoch": 0.05, + "learning_rate": 0.0019987289524437467, + "loss": 1.7148, + "step": 513 + }, + { + "epoch": 0.05, + "learning_rate": 0.001998714387804581, + "loss": 1.9492, + "step": 514 + }, + { + "epoch": 0.05, + "learning_rate": 0.001998699740247376, + "loss": 1.6465, + "step": 515 + }, + { + "epoch": 0.05, + "learning_rate": 0.0019986850097733475, + "loss": 1.748, + "step": 516 + }, + { + "epoch": 0.05, + "learning_rate": 0.0019986701963837184, + "loss": 1.7617, + "step": 517 + }, + { + "epoch": 0.05, + "learning_rate": 0.001998655300079719, + "loss": 1.7227, + "step": 518 + }, + { + "epoch": 0.05, + "learning_rate": 0.0019986403208625864, + "loss": 1.75, + "step": 519 + }, + { + "epoch": 0.05, + "learning_rate": 0.001998625258733563, + "loss": 1.6562, + "step": 520 + }, + { + "epoch": 0.05, + "learning_rate": 0.0019986101136939007, + "loss": 1.8242, + "step": 521 + }, + { + "epoch": 0.05, + "learning_rate": 0.001998594885744856, + "loss": 1.7324, + "step": 522 + }, + { + "epoch": 0.05, + "learning_rate": 0.0019985795748876938, + "loss": 1.8105, + "step": 523 + }, + { + "epoch": 0.05, + "learning_rate": 0.001998564181123685, + "loss": 1.7812, + "step": 524 + }, + { + "epoch": 0.05, + "learning_rate": 0.0019985487044541076, + "loss": 1.7773, + "step": 525 + }, + { + "epoch": 0.05, + "learning_rate": 0.0019985331448802465, + "loss": 1.793, + "step": 526 + }, + { + "epoch": 0.05, + "learning_rate": 0.0019985175024033936, + "loss": 1.8164, + "step": 527 + }, + { + "epoch": 0.05, + "learning_rate": 0.001998501777024848, + "loss": 1.8828, + "step": 528 + }, + { + "epoch": 0.05, + "learning_rate": 0.001998485968745914, + "loss": 1.6973, + "step": 529 + }, + { + "epoch": 0.05, + "learning_rate": 0.001998470077567906, + "loss": 1.7441, + "step": 530 + }, + { + "epoch": 0.05, + "learning_rate": 0.001998454103492142, + "loss": 1.7734, + "step": 531 + }, + { + "epoch": 0.05, + "learning_rate": 0.001998438046519949, + "loss": 1.7402, + "step": 532 + }, + { + "epoch": 0.05, + "learning_rate": 0.0019984219066526594, + "loss": 1.9395, + "step": 533 + }, + { + "epoch": 0.05, + "learning_rate": 0.0019984056838916135, + "loss": 1.6875, + "step": 534 + }, + { + "epoch": 0.05, + "learning_rate": 0.0019983893782381584, + "loss": 1.6621, + "step": 535 + }, + { + "epoch": 0.05, + "learning_rate": 0.0019983729896936478, + "loss": 1.7988, + "step": 536 + }, + { + "epoch": 0.05, + "learning_rate": 0.001998356518259442, + "loss": 1.8223, + "step": 537 + }, + { + "epoch": 0.05, + "learning_rate": 0.001998339963936909, + "loss": 1.6914, + "step": 538 + }, + { + "epoch": 0.05, + "learning_rate": 0.001998323326727423, + "loss": 1.6172, + "step": 539 + }, + { + "epoch": 0.05, + "learning_rate": 0.0019983066066323655, + "loss": 1.8262, + "step": 540 + }, + { + "epoch": 0.05, + "learning_rate": 0.0019982898036531244, + "loss": 1.75, + "step": 541 + }, + { + "epoch": 0.05, + "learning_rate": 0.001998272917791095, + "loss": 1.7891, + "step": 542 + }, + { + "epoch": 0.05, + "learning_rate": 0.001998255949047679, + "loss": 1.6699, + "step": 543 + }, + { + "epoch": 0.05, + "learning_rate": 0.0019982388974242858, + "loss": 1.9043, + "step": 544 + }, + { + "epoch": 0.05, + "learning_rate": 0.00199822176292233, + "loss": 1.7656, + "step": 545 + }, + { + "epoch": 0.05, + "learning_rate": 0.001998204545543235, + "loss": 1.6621, + "step": 546 + }, + { + "epoch": 0.05, + "learning_rate": 0.0019981872452884304, + "loss": 1.6875, + "step": 547 + }, + { + "epoch": 0.05, + "learning_rate": 0.0019981698621593523, + "loss": 1.6875, + "step": 548 + }, + { + "epoch": 0.05, + "learning_rate": 0.0019981523961574437, + "loss": 1.7305, + "step": 549 + }, + { + "epoch": 0.05, + "learning_rate": 0.0019981348472841547, + "loss": 1.7832, + "step": 550 + }, + { + "epoch": 0.05, + "learning_rate": 0.001998117215540943, + "loss": 1.7637, + "step": 551 + }, + { + "epoch": 0.05, + "learning_rate": 0.0019980995009292713, + "loss": 1.7578, + "step": 552 + }, + { + "epoch": 0.05, + "learning_rate": 0.0019980817034506116, + "loss": 1.793, + "step": 553 + }, + { + "epoch": 0.05, + "learning_rate": 0.001998063823106441, + "loss": 1.6934, + "step": 554 + }, + { + "epoch": 0.05, + "learning_rate": 0.0019980458598982437, + "loss": 1.6387, + "step": 555 + }, + { + "epoch": 0.05, + "learning_rate": 0.0019980278138275113, + "loss": 1.7129, + "step": 556 + }, + { + "epoch": 0.05, + "learning_rate": 0.0019980096848957424, + "loss": 1.9668, + "step": 557 + }, + { + "epoch": 0.05, + "learning_rate": 0.0019979914731044417, + "loss": 1.7012, + "step": 558 + }, + { + "epoch": 0.05, + "learning_rate": 0.0019979731784551214, + "loss": 1.8125, + "step": 559 + }, + { + "epoch": 0.05, + "learning_rate": 0.0019979548009493005, + "loss": 1.8457, + "step": 560 + }, + { + "epoch": 0.05, + "learning_rate": 0.001997936340588505, + "loss": 1.7324, + "step": 561 + }, + { + "epoch": 0.05, + "learning_rate": 0.001997917797374267, + "loss": 1.8301, + "step": 562 + }, + { + "epoch": 0.05, + "learning_rate": 0.001997899171308126, + "loss": 1.9395, + "step": 563 + }, + { + "epoch": 0.05, + "learning_rate": 0.0019978804623916294, + "loss": 1.8047, + "step": 564 + }, + { + "epoch": 0.05, + "learning_rate": 0.0019978616706263297, + "loss": 1.7461, + "step": 565 + }, + { + "epoch": 0.05, + "learning_rate": 0.0019978427960137867, + "loss": 1.7734, + "step": 566 + }, + { + "epoch": 0.05, + "learning_rate": 0.0019978238385555684, + "loss": 1.7773, + "step": 567 + }, + { + "epoch": 0.05, + "learning_rate": 0.0019978047982532484, + "loss": 1.7324, + "step": 568 + }, + { + "epoch": 0.05, + "learning_rate": 0.0019977856751084073, + "loss": 1.8008, + "step": 569 + }, + { + "epoch": 0.05, + "learning_rate": 0.0019977664691226333, + "loss": 1.6719, + "step": 570 + }, + { + "epoch": 0.05, + "learning_rate": 0.00199774718029752, + "loss": 1.8457, + "step": 571 + }, + { + "epoch": 0.05, + "learning_rate": 0.00199772780863467, + "loss": 1.7188, + "step": 572 + }, + { + "epoch": 0.05, + "learning_rate": 0.0019977083541356908, + "loss": 1.7656, + "step": 573 + }, + { + "epoch": 0.05, + "learning_rate": 0.001997688816802198, + "loss": 1.7031, + "step": 574 + }, + { + "epoch": 0.05, + "learning_rate": 0.0019976691966358136, + "loss": 1.7598, + "step": 575 + }, + { + "epoch": 0.05, + "learning_rate": 0.001997649493638167, + "loss": 1.8164, + "step": 576 + }, + { + "epoch": 0.05, + "learning_rate": 0.001997629707810893, + "loss": 1.7988, + "step": 577 + }, + { + "epoch": 0.05, + "learning_rate": 0.001997609839155635, + "loss": 1.7148, + "step": 578 + }, + { + "epoch": 0.05, + "learning_rate": 0.0019975898876740425, + "loss": 1.7324, + "step": 579 + }, + { + "epoch": 0.05, + "learning_rate": 0.001997569853367772, + "loss": 1.8105, + "step": 580 + }, + { + "epoch": 0.05, + "learning_rate": 0.0019975497362384863, + "loss": 1.7871, + "step": 581 + }, + { + "epoch": 0.05, + "learning_rate": 0.001997529536287857, + "loss": 1.7598, + "step": 582 + }, + { + "epoch": 0.05, + "learning_rate": 0.00199750925351756, + "loss": 1.8516, + "step": 583 + }, + { + "epoch": 0.05, + "learning_rate": 0.001997488887929279, + "loss": 1.668, + "step": 584 + }, + { + "epoch": 0.05, + "learning_rate": 0.001997468439524706, + "loss": 1.8086, + "step": 585 + }, + { + "epoch": 0.05, + "learning_rate": 0.0019974479083055383, + "loss": 1.6914, + "step": 586 + }, + { + "epoch": 0.05, + "learning_rate": 0.0019974272942734806, + "loss": 1.7305, + "step": 587 + }, + { + "epoch": 0.05, + "learning_rate": 0.0019974065974302435, + "loss": 1.6855, + "step": 588 + }, + { + "epoch": 0.05, + "learning_rate": 0.001997385817777546, + "loss": 1.6895, + "step": 589 + }, + { + "epoch": 0.05, + "learning_rate": 0.0019973649553171138, + "loss": 1.8418, + "step": 590 + }, + { + "epoch": 0.05, + "learning_rate": 0.001997344010050678, + "loss": 1.8379, + "step": 591 + }, + { + "epoch": 0.05, + "learning_rate": 0.0019973229819799784, + "loss": 1.7422, + "step": 592 + }, + { + "epoch": 0.05, + "learning_rate": 0.0019973018711067604, + "loss": 1.8008, + "step": 593 + }, + { + "epoch": 0.05, + "learning_rate": 0.0019972806774327767, + "loss": 1.7168, + "step": 594 + }, + { + "epoch": 0.05, + "learning_rate": 0.0019972594009597874, + "loss": 1.6992, + "step": 595 + }, + { + "epoch": 0.05, + "learning_rate": 0.001997238041689558, + "loss": 1.7656, + "step": 596 + }, + { + "epoch": 0.05, + "learning_rate": 0.001997216599623863, + "loss": 1.8418, + "step": 597 + }, + { + "epoch": 0.05, + "learning_rate": 0.0019971950747644826, + "loss": 1.7168, + "step": 598 + }, + { + "epoch": 0.05, + "learning_rate": 0.001997173467113203, + "loss": 1.8262, + "step": 599 + }, + { + "epoch": 0.05, + "learning_rate": 0.0019971517766718184, + "loss": 1.793, + "step": 600 + }, + { + "epoch": 0.05, + "learning_rate": 0.00199713000344213, + "loss": 1.668, + "step": 601 + }, + { + "epoch": 0.05, + "learning_rate": 0.001997108147425945, + "loss": 1.748, + "step": 602 + }, + { + "epoch": 0.05, + "learning_rate": 0.001997086208625079, + "loss": 1.8281, + "step": 603 + }, + { + "epoch": 0.05, + "learning_rate": 0.0019970641870413522, + "loss": 1.6992, + "step": 604 + }, + { + "epoch": 0.05, + "learning_rate": 0.001997042082676594, + "loss": 1.7656, + "step": 605 + }, + { + "epoch": 0.05, + "learning_rate": 0.0019970198955326385, + "loss": 1.7715, + "step": 606 + }, + { + "epoch": 0.05, + "learning_rate": 0.0019969976256113287, + "loss": 1.8105, + "step": 607 + }, + { + "epoch": 0.05, + "learning_rate": 0.0019969752729145137, + "loss": 1.7559, + "step": 608 + }, + { + "epoch": 0.05, + "learning_rate": 0.0019969528374440488, + "loss": 1.5664, + "step": 609 + }, + { + "epoch": 0.05, + "learning_rate": 0.0019969303192017966, + "loss": 1.8008, + "step": 610 + }, + { + "epoch": 0.05, + "learning_rate": 0.0019969077181896267, + "loss": 1.7988, + "step": 611 + }, + { + "epoch": 0.05, + "learning_rate": 0.001996885034409416, + "loss": 1.9199, + "step": 612 + }, + { + "epoch": 0.05, + "learning_rate": 0.0019968622678630476, + "loss": 1.7207, + "step": 613 + }, + { + "epoch": 0.05, + "learning_rate": 0.0019968394185524113, + "loss": 1.6094, + "step": 614 + }, + { + "epoch": 0.05, + "learning_rate": 0.0019968164864794047, + "loss": 1.7148, + "step": 615 + }, + { + "epoch": 0.05, + "learning_rate": 0.0019967934716459317, + "loss": 1.8418, + "step": 616 + }, + { + "epoch": 0.05, + "learning_rate": 0.0019967703740539026, + "loss": 1.6602, + "step": 617 + }, + { + "epoch": 0.05, + "learning_rate": 0.0019967471937052356, + "loss": 1.7812, + "step": 618 + }, + { + "epoch": 0.06, + "learning_rate": 0.001996723930601855, + "loss": 1.752, + "step": 619 + }, + { + "epoch": 0.06, + "learning_rate": 0.001996700584745692, + "loss": 1.748, + "step": 620 + }, + { + "epoch": 0.06, + "learning_rate": 0.0019966771561386854, + "loss": 1.752, + "step": 621 + }, + { + "epoch": 0.06, + "learning_rate": 0.00199665364478278, + "loss": 1.8242, + "step": 622 + }, + { + "epoch": 0.06, + "learning_rate": 0.001996630050679928, + "loss": 1.6992, + "step": 623 + }, + { + "epoch": 0.06, + "learning_rate": 0.0019966063738320883, + "loss": 1.7871, + "step": 624 + }, + { + "epoch": 0.06, + "learning_rate": 0.0019965826142412262, + "loss": 1.7891, + "step": 625 + }, + { + "epoch": 0.06, + "learning_rate": 0.001996558771909315, + "loss": 1.6797, + "step": 626 + }, + { + "epoch": 0.06, + "learning_rate": 0.0019965348468383337, + "loss": 1.7188, + "step": 627 + }, + { + "epoch": 0.06, + "learning_rate": 0.0019965108390302693, + "loss": 1.8848, + "step": 628 + }, + { + "epoch": 0.06, + "learning_rate": 0.001996486748487114, + "loss": 1.7871, + "step": 629 + }, + { + "epoch": 0.06, + "learning_rate": 0.001996462575210869, + "loss": 1.6836, + "step": 630 + }, + { + "epoch": 0.06, + "learning_rate": 0.001996438319203541, + "loss": 1.7324, + "step": 631 + }, + { + "epoch": 0.06, + "learning_rate": 0.001996413980467143, + "loss": 1.6738, + "step": 632 + }, + { + "epoch": 0.06, + "learning_rate": 0.001996389559003697, + "loss": 1.8379, + "step": 633 + }, + { + "epoch": 0.06, + "learning_rate": 0.0019963650548152296, + "loss": 1.7441, + "step": 634 + }, + { + "epoch": 0.06, + "learning_rate": 0.0019963404679037757, + "loss": 1.8203, + "step": 635 + }, + { + "epoch": 0.06, + "learning_rate": 0.0019963157982713768, + "loss": 1.7676, + "step": 636 + }, + { + "epoch": 0.06, + "learning_rate": 0.0019962910459200805, + "loss": 1.7715, + "step": 637 + }, + { + "epoch": 0.06, + "learning_rate": 0.0019962662108519427, + "loss": 1.752, + "step": 638 + }, + { + "epoch": 0.06, + "learning_rate": 0.0019962412930690245, + "loss": 1.8535, + "step": 639 + }, + { + "epoch": 0.06, + "learning_rate": 0.001996216292573395, + "loss": 1.7773, + "step": 640 + }, + { + "epoch": 0.06, + "learning_rate": 0.00199619120936713, + "loss": 1.627, + "step": 641 + }, + { + "epoch": 0.06, + "learning_rate": 0.001996166043452312, + "loss": 1.6992, + "step": 642 + }, + { + "epoch": 0.06, + "learning_rate": 0.00199614079483103, + "loss": 1.7617, + "step": 643 + }, + { + "epoch": 0.06, + "learning_rate": 0.0019961154635053805, + "loss": 1.8262, + "step": 644 + }, + { + "epoch": 0.06, + "learning_rate": 0.001996090049477467, + "loss": 1.7246, + "step": 645 + }, + { + "epoch": 0.06, + "learning_rate": 0.001996064552749399, + "loss": 1.7402, + "step": 646 + }, + { + "epoch": 0.06, + "learning_rate": 0.0019960389733232934, + "loss": 1.7383, + "step": 647 + }, + { + "epoch": 0.06, + "learning_rate": 0.0019960133112012744, + "loss": 1.9062, + "step": 648 + }, + { + "epoch": 0.06, + "learning_rate": 0.0019959875663854722, + "loss": 1.7676, + "step": 649 + }, + { + "epoch": 0.06, + "learning_rate": 0.0019959617388780244, + "loss": 1.7773, + "step": 650 + }, + { + "epoch": 0.06, + "learning_rate": 0.001995935828681075, + "loss": 1.7383, + "step": 651 + }, + { + "epoch": 0.06, + "learning_rate": 0.0019959098357967757, + "loss": 1.5938, + "step": 652 + }, + { + "epoch": 0.06, + "learning_rate": 0.0019958837602272843, + "loss": 1.7559, + "step": 653 + }, + { + "epoch": 0.06, + "learning_rate": 0.001995857601974766, + "loss": 1.7441, + "step": 654 + }, + { + "epoch": 0.06, + "learning_rate": 0.0019958313610413912, + "loss": 1.8594, + "step": 655 + }, + { + "epoch": 0.06, + "learning_rate": 0.0019958050374293402, + "loss": 1.8242, + "step": 656 + }, + { + "epoch": 0.06, + "learning_rate": 0.001995778631140798, + "loss": 1.748, + "step": 657 + }, + { + "epoch": 0.06, + "learning_rate": 0.0019957521421779573, + "loss": 1.6602, + "step": 658 + }, + { + "epoch": 0.06, + "learning_rate": 0.0019957255705430168, + "loss": 1.707, + "step": 659 + }, + { + "epoch": 0.06, + "learning_rate": 0.0019956989162381827, + "loss": 1.8027, + "step": 660 + }, + { + "epoch": 0.06, + "learning_rate": 0.001995672179265668, + "loss": 1.7461, + "step": 661 + }, + { + "epoch": 0.06, + "learning_rate": 0.001995645359627692, + "loss": 1.7051, + "step": 662 + }, + { + "epoch": 0.06, + "learning_rate": 0.001995618457326483, + "loss": 1.7539, + "step": 663 + }, + { + "epoch": 0.06, + "learning_rate": 0.001995591472364273, + "loss": 1.7305, + "step": 664 + }, + { + "epoch": 0.06, + "learning_rate": 0.0019955644047433027, + "loss": 1.707, + "step": 665 + }, + { + "epoch": 0.06, + "learning_rate": 0.0019955372544658203, + "loss": 1.7793, + "step": 666 + }, + { + "epoch": 0.06, + "learning_rate": 0.0019955100215340786, + "loss": 1.7148, + "step": 667 + }, + { + "epoch": 0.06, + "learning_rate": 0.0019954827059503397, + "loss": 1.7129, + "step": 668 + }, + { + "epoch": 0.06, + "learning_rate": 0.001995455307716871, + "loss": 1.7539, + "step": 669 + }, + { + "epoch": 0.06, + "learning_rate": 0.0019954278268359466, + "loss": 1.7207, + "step": 670 + }, + { + "epoch": 0.06, + "learning_rate": 0.0019954002633098497, + "loss": 1.6855, + "step": 671 + }, + { + "epoch": 0.06, + "learning_rate": 0.001995372617140867, + "loss": 1.6855, + "step": 672 + }, + { + "epoch": 0.06, + "learning_rate": 0.0019953448883312953, + "loss": 1.8105, + "step": 673 + }, + { + "epoch": 0.06, + "learning_rate": 0.001995317076883436, + "loss": 1.7715, + "step": 674 + }, + { + "epoch": 0.06, + "learning_rate": 0.0019952891827995983, + "loss": 1.7656, + "step": 675 + }, + { + "epoch": 0.06, + "learning_rate": 0.001995261206082098, + "loss": 1.7285, + "step": 676 + }, + { + "epoch": 0.06, + "learning_rate": 0.001995233146733258, + "loss": 1.7695, + "step": 677 + }, + { + "epoch": 0.06, + "learning_rate": 0.0019952050047554078, + "loss": 1.8105, + "step": 678 + }, + { + "epoch": 0.06, + "learning_rate": 0.001995176780150884, + "loss": 1.75, + "step": 679 + }, + { + "epoch": 0.06, + "learning_rate": 0.0019951484729220296, + "loss": 1.6777, + "step": 680 + }, + { + "epoch": 0.06, + "learning_rate": 0.001995120083071195, + "loss": 1.7188, + "step": 681 + }, + { + "epoch": 0.06, + "learning_rate": 0.0019950916106007376, + "loss": 1.6406, + "step": 682 + }, + { + "epoch": 0.06, + "learning_rate": 0.001995063055513021, + "loss": 1.7812, + "step": 683 + }, + { + "epoch": 0.06, + "learning_rate": 0.001995034417810416, + "loss": 1.793, + "step": 684 + }, + { + "epoch": 0.06, + "learning_rate": 0.0019950056974953004, + "loss": 1.6992, + "step": 685 + }, + { + "epoch": 0.06, + "learning_rate": 0.0019949768945700585, + "loss": 1.8535, + "step": 686 + }, + { + "epoch": 0.06, + "learning_rate": 0.0019949480090370817, + "loss": 1.8828, + "step": 687 + }, + { + "epoch": 0.06, + "learning_rate": 0.0019949190408987683, + "loss": 1.7305, + "step": 688 + }, + { + "epoch": 0.06, + "learning_rate": 0.001994889990157523, + "loss": 1.8379, + "step": 689 + }, + { + "epoch": 0.06, + "learning_rate": 0.0019948608568157583, + "loss": 1.8613, + "step": 690 + }, + { + "epoch": 0.06, + "learning_rate": 0.001994831640875893, + "loss": 1.7773, + "step": 691 + }, + { + "epoch": 0.06, + "learning_rate": 0.001994802342340352, + "loss": 1.7266, + "step": 692 + }, + { + "epoch": 0.06, + "learning_rate": 0.001994772961211569, + "loss": 1.5684, + "step": 693 + }, + { + "epoch": 0.06, + "learning_rate": 0.0019947434974919814, + "loss": 1.7324, + "step": 694 + }, + { + "epoch": 0.06, + "learning_rate": 0.0019947139511840377, + "loss": 1.8711, + "step": 695 + }, + { + "epoch": 0.06, + "learning_rate": 0.00199468432229019, + "loss": 1.6855, + "step": 696 + }, + { + "epoch": 0.06, + "learning_rate": 0.0019946546108128974, + "loss": 1.7266, + "step": 697 + }, + { + "epoch": 0.06, + "learning_rate": 0.001994624816754628, + "loss": 1.752, + "step": 698 + }, + { + "epoch": 0.06, + "learning_rate": 0.001994594940117855, + "loss": 1.6953, + "step": 699 + }, + { + "epoch": 0.06, + "learning_rate": 0.0019945649809050585, + "loss": 1.7109, + "step": 700 + }, + { + "epoch": 0.06, + "learning_rate": 0.001994534939118726, + "loss": 1.873, + "step": 701 + }, + { + "epoch": 0.06, + "learning_rate": 0.0019945048147613524, + "loss": 1.8125, + "step": 702 + }, + { + "epoch": 0.06, + "learning_rate": 0.001994474607835438, + "loss": 1.7891, + "step": 703 + }, + { + "epoch": 0.06, + "learning_rate": 0.001994444318343491, + "loss": 1.7402, + "step": 704 + }, + { + "epoch": 0.06, + "learning_rate": 0.0019944139462880262, + "loss": 1.7266, + "step": 705 + }, + { + "epoch": 0.06, + "learning_rate": 0.001994383491671565, + "loss": 1.7285, + "step": 706 + }, + { + "epoch": 0.06, + "learning_rate": 0.0019943529544966363, + "loss": 1.7871, + "step": 707 + }, + { + "epoch": 0.06, + "learning_rate": 0.001994322334765775, + "loss": 1.8535, + "step": 708 + }, + { + "epoch": 0.06, + "learning_rate": 0.001994291632481524, + "loss": 1.6973, + "step": 709 + }, + { + "epoch": 0.06, + "learning_rate": 0.001994260847646431, + "loss": 1.877, + "step": 710 + }, + { + "epoch": 0.06, + "learning_rate": 0.001994229980263053, + "loss": 1.7344, + "step": 711 + }, + { + "epoch": 0.06, + "learning_rate": 0.001994199030333953, + "loss": 1.6836, + "step": 712 + }, + { + "epoch": 0.06, + "learning_rate": 0.0019941679978617, + "loss": 1.7012, + "step": 713 + }, + { + "epoch": 0.06, + "learning_rate": 0.0019941368828488705, + "loss": 1.623, + "step": 714 + }, + { + "epoch": 0.06, + "learning_rate": 0.001994105685298048, + "loss": 1.834, + "step": 715 + }, + { + "epoch": 0.06, + "learning_rate": 0.001994074405211822, + "loss": 1.6953, + "step": 716 + }, + { + "epoch": 0.06, + "learning_rate": 0.001994043042592791, + "loss": 1.7402, + "step": 717 + }, + { + "epoch": 0.06, + "learning_rate": 0.0019940115974435573, + "loss": 1.7734, + "step": 718 + }, + { + "epoch": 0.06, + "learning_rate": 0.0019939800697667326, + "loss": 1.7305, + "step": 719 + }, + { + "epoch": 0.06, + "learning_rate": 0.001993948459564934, + "loss": 1.8672, + "step": 720 + }, + { + "epoch": 0.06, + "learning_rate": 0.0019939167668407863, + "loss": 1.5957, + "step": 721 + }, + { + "epoch": 0.06, + "learning_rate": 0.0019938849915969205, + "loss": 1.6738, + "step": 722 + }, + { + "epoch": 0.06, + "learning_rate": 0.0019938531338359747, + "loss": 1.7188, + "step": 723 + }, + { + "epoch": 0.06, + "learning_rate": 0.0019938211935605944, + "loss": 1.6914, + "step": 724 + }, + { + "epoch": 0.06, + "learning_rate": 0.001993789170773431, + "loss": 1.7422, + "step": 725 + }, + { + "epoch": 0.06, + "learning_rate": 0.001993757065477143, + "loss": 1.7637, + "step": 726 + }, + { + "epoch": 0.06, + "learning_rate": 0.001993724877674396, + "loss": 1.752, + "step": 727 + }, + { + "epoch": 0.06, + "learning_rate": 0.001993692607367863, + "loss": 1.7246, + "step": 728 + }, + { + "epoch": 0.06, + "learning_rate": 0.0019936602545602227, + "loss": 1.7461, + "step": 729 + }, + { + "epoch": 0.06, + "learning_rate": 0.001993627819254161, + "loss": 1.7441, + "step": 730 + }, + { + "epoch": 0.07, + "learning_rate": 0.0019935953014523712, + "loss": 1.7363, + "step": 731 + }, + { + "epoch": 0.07, + "learning_rate": 0.001993562701157553, + "loss": 1.7598, + "step": 732 + }, + { + "epoch": 0.07, + "learning_rate": 0.001993530018372413, + "loss": 1.6504, + "step": 733 + }, + { + "epoch": 0.07, + "learning_rate": 0.001993497253099665, + "loss": 1.6113, + "step": 734 + }, + { + "epoch": 0.07, + "learning_rate": 0.0019934644053420285, + "loss": 1.791, + "step": 735 + }, + { + "epoch": 0.07, + "learning_rate": 0.0019934314751022314, + "loss": 1.7969, + "step": 736 + }, + { + "epoch": 0.07, + "learning_rate": 0.0019933984623830077, + "loss": 1.8516, + "step": 737 + }, + { + "epoch": 0.07, + "learning_rate": 0.001993365367187098, + "loss": 1.6367, + "step": 738 + }, + { + "epoch": 0.07, + "learning_rate": 0.00199333218951725, + "loss": 1.7305, + "step": 739 + }, + { + "epoch": 0.07, + "learning_rate": 0.0019932989293762185, + "loss": 1.8203, + "step": 740 + }, + { + "epoch": 0.07, + "learning_rate": 0.001993265586766765, + "loss": 1.6445, + "step": 741 + }, + { + "epoch": 0.07, + "learning_rate": 0.001993232161691657, + "loss": 1.6895, + "step": 742 + }, + { + "epoch": 0.07, + "learning_rate": 0.0019931986541536707, + "loss": 1.6543, + "step": 743 + }, + { + "epoch": 0.07, + "learning_rate": 0.001993165064155588, + "loss": 1.6738, + "step": 744 + }, + { + "epoch": 0.07, + "learning_rate": 0.001993131391700196, + "loss": 1.7715, + "step": 745 + }, + { + "epoch": 0.07, + "learning_rate": 0.0019930976367902925, + "loss": 1.7344, + "step": 746 + }, + { + "epoch": 0.07, + "learning_rate": 0.0019930637994286793, + "loss": 1.7617, + "step": 747 + }, + { + "epoch": 0.07, + "learning_rate": 0.001993029879618165, + "loss": 1.8223, + "step": 748 + }, + { + "epoch": 0.07, + "learning_rate": 0.0019929958773615662, + "loss": 1.7422, + "step": 749 + }, + { + "epoch": 0.07, + "learning_rate": 0.0019929617926617064, + "loss": 1.7754, + "step": 750 + }, + { + "epoch": 0.07, + "learning_rate": 0.001992927625521415, + "loss": 1.6777, + "step": 751 + }, + { + "epoch": 0.07, + "learning_rate": 0.0019928933759435284, + "loss": 1.6523, + "step": 752 + }, + { + "epoch": 0.07, + "learning_rate": 0.001992859043930891, + "loss": 1.7559, + "step": 753 + }, + { + "epoch": 0.07, + "learning_rate": 0.001992824629486353, + "loss": 1.8965, + "step": 754 + }, + { + "epoch": 0.07, + "learning_rate": 0.0019927901326127714, + "loss": 1.791, + "step": 755 + }, + { + "epoch": 0.07, + "learning_rate": 0.00199275555331301, + "loss": 1.75, + "step": 756 + }, + { + "epoch": 0.07, + "learning_rate": 0.0019927208915899402, + "loss": 1.8516, + "step": 757 + }, + { + "epoch": 0.07, + "learning_rate": 0.00199268614744644, + "loss": 1.8086, + "step": 758 + }, + { + "epoch": 0.07, + "learning_rate": 0.0019926513208853936, + "loss": 1.7363, + "step": 759 + }, + { + "epoch": 0.07, + "learning_rate": 0.001992616411909692, + "loss": 1.7031, + "step": 760 + }, + { + "epoch": 0.07, + "learning_rate": 0.0019925814205222344, + "loss": 1.7441, + "step": 761 + }, + { + "epoch": 0.07, + "learning_rate": 0.0019925463467259257, + "loss": 1.6602, + "step": 762 + }, + { + "epoch": 0.07, + "learning_rate": 0.0019925111905236778, + "loss": 1.9531, + "step": 763 + }, + { + "epoch": 0.07, + "learning_rate": 0.0019924759519184096, + "loss": 1.7363, + "step": 764 + }, + { + "epoch": 0.07, + "learning_rate": 0.001992440630913047, + "loss": 1.6699, + "step": 765 + }, + { + "epoch": 0.07, + "learning_rate": 0.0019924052275105218, + "loss": 1.6621, + "step": 766 + }, + { + "epoch": 0.07, + "learning_rate": 0.001992369741713774, + "loss": 1.6816, + "step": 767 + }, + { + "epoch": 0.07, + "learning_rate": 0.0019923341735257494, + "loss": 1.832, + "step": 768 + }, + { + "epoch": 0.07, + "learning_rate": 0.0019922985229494016, + "loss": 1.6074, + "step": 769 + }, + { + "epoch": 0.07, + "learning_rate": 0.0019922627899876897, + "loss": 1.7246, + "step": 770 + }, + { + "epoch": 0.07, + "learning_rate": 0.0019922269746435813, + "loss": 1.8887, + "step": 771 + }, + { + "epoch": 0.07, + "learning_rate": 0.0019921910769200493, + "loss": 1.6016, + "step": 772 + }, + { + "epoch": 0.07, + "learning_rate": 0.0019921550968200743, + "loss": 1.8828, + "step": 773 + }, + { + "epoch": 0.07, + "learning_rate": 0.0019921190343466437, + "loss": 1.7051, + "step": 774 + }, + { + "epoch": 0.07, + "learning_rate": 0.0019920828895027515, + "loss": 1.6387, + "step": 775 + }, + { + "epoch": 0.07, + "learning_rate": 0.0019920466622913983, + "loss": 1.7441, + "step": 776 + }, + { + "epoch": 0.07, + "learning_rate": 0.0019920103527155918, + "loss": 1.6758, + "step": 777 + }, + { + "epoch": 0.07, + "learning_rate": 0.0019919739607783477, + "loss": 1.7402, + "step": 778 + }, + { + "epoch": 0.07, + "learning_rate": 0.001991937486482686, + "loss": 1.7285, + "step": 779 + }, + { + "epoch": 0.07, + "learning_rate": 0.0019919009298316357, + "loss": 1.7285, + "step": 780 + }, + { + "epoch": 0.07, + "learning_rate": 0.0019918642908282325, + "loss": 1.6035, + "step": 781 + }, + { + "epoch": 0.07, + "learning_rate": 0.001991827569475517, + "loss": 1.6699, + "step": 782 + }, + { + "epoch": 0.07, + "learning_rate": 0.0019917907657765388, + "loss": 1.7773, + "step": 783 + }, + { + "epoch": 0.07, + "learning_rate": 0.0019917538797343533, + "loss": 1.6582, + "step": 784 + }, + { + "epoch": 0.07, + "learning_rate": 0.0019917169113520226, + "loss": 1.6836, + "step": 785 + }, + { + "epoch": 0.07, + "learning_rate": 0.0019916798606326174, + "loss": 1.7578, + "step": 786 + }, + { + "epoch": 0.07, + "learning_rate": 0.001991642727579212, + "loss": 1.6445, + "step": 787 + }, + { + "epoch": 0.07, + "learning_rate": 0.0019916055121948903, + "loss": 1.6914, + "step": 788 + }, + { + "epoch": 0.07, + "learning_rate": 0.0019915682144827423, + "loss": 1.6875, + "step": 789 + }, + { + "epoch": 0.07, + "learning_rate": 0.001991530834445864, + "loss": 1.6953, + "step": 790 + }, + { + "epoch": 0.07, + "learning_rate": 0.001991493372087359, + "loss": 1.7363, + "step": 791 + }, + { + "epoch": 0.07, + "learning_rate": 0.0019914558274103386, + "loss": 1.791, + "step": 792 + }, + { + "epoch": 0.07, + "learning_rate": 0.001991418200417919, + "loss": 1.6445, + "step": 793 + }, + { + "epoch": 0.07, + "learning_rate": 0.001991380491113224, + "loss": 1.6465, + "step": 794 + }, + { + "epoch": 0.07, + "learning_rate": 0.0019913426994993847, + "loss": 1.7148, + "step": 795 + }, + { + "epoch": 0.07, + "learning_rate": 0.001991304825579539, + "loss": 1.6836, + "step": 796 + }, + { + "epoch": 0.07, + "learning_rate": 0.0019912668693568307, + "loss": 1.752, + "step": 797 + }, + { + "epoch": 0.07, + "learning_rate": 0.001991228830834412, + "loss": 1.7188, + "step": 798 + }, + { + "epoch": 0.07, + "learning_rate": 0.0019911907100154404, + "loss": 1.7148, + "step": 799 + }, + { + "epoch": 0.07, + "learning_rate": 0.001991152506903081, + "loss": 1.6777, + "step": 800 + }, + { + "epoch": 0.07, + "learning_rate": 0.0019911142215005054, + "loss": 1.6953, + "step": 801 + }, + { + "epoch": 0.07, + "learning_rate": 0.001991075853810893, + "loss": 1.6289, + "step": 802 + }, + { + "epoch": 0.07, + "learning_rate": 0.0019910374038374285, + "loss": 1.7715, + "step": 803 + }, + { + "epoch": 0.07, + "learning_rate": 0.0019909988715833046, + "loss": 1.8438, + "step": 804 + }, + { + "epoch": 0.07, + "learning_rate": 0.00199096025705172, + "loss": 1.6895, + "step": 805 + }, + { + "epoch": 0.07, + "learning_rate": 0.0019909215602458815, + "loss": 1.5449, + "step": 806 + }, + { + "epoch": 0.07, + "learning_rate": 0.001990882781169001, + "loss": 1.7148, + "step": 807 + }, + { + "epoch": 0.07, + "learning_rate": 0.0019908439198242983, + "loss": 1.6914, + "step": 808 + }, + { + "epoch": 0.07, + "learning_rate": 0.0019908049762150004, + "loss": 1.918, + "step": 809 + }, + { + "epoch": 0.07, + "learning_rate": 0.00199076595034434, + "loss": 1.8242, + "step": 810 + }, + { + "epoch": 0.07, + "learning_rate": 0.0019907268422155577, + "loss": 1.6836, + "step": 811 + }, + { + "epoch": 0.07, + "learning_rate": 0.0019906876518318998, + "loss": 1.7656, + "step": 812 + }, + { + "epoch": 0.07, + "learning_rate": 0.0019906483791966205, + "loss": 1.6738, + "step": 813 + }, + { + "epoch": 0.07, + "learning_rate": 0.001990609024312981, + "loss": 1.6855, + "step": 814 + }, + { + "epoch": 0.07, + "learning_rate": 0.001990569587184247, + "loss": 1.7441, + "step": 815 + }, + { + "epoch": 0.07, + "learning_rate": 0.0019905300678136947, + "loss": 1.6777, + "step": 816 + }, + { + "epoch": 0.07, + "learning_rate": 0.001990490466204604, + "loss": 1.8379, + "step": 817 + }, + { + "epoch": 0.07, + "learning_rate": 0.0019904507823602633, + "loss": 1.7617, + "step": 818 + }, + { + "epoch": 0.07, + "learning_rate": 0.001990411016283967, + "loss": 1.6934, + "step": 819 + }, + { + "epoch": 0.07, + "learning_rate": 0.001990371167979017, + "loss": 1.7773, + "step": 820 + }, + { + "epoch": 0.07, + "learning_rate": 0.0019903312374487214, + "loss": 1.7383, + "step": 821 + }, + { + "epoch": 0.07, + "learning_rate": 0.0019902912246963955, + "loss": 1.707, + "step": 822 + }, + { + "epoch": 0.07, + "learning_rate": 0.0019902511297253617, + "loss": 1.7402, + "step": 823 + }, + { + "epoch": 0.07, + "learning_rate": 0.0019902109525389484, + "loss": 1.666, + "step": 824 + }, + { + "epoch": 0.07, + "learning_rate": 0.0019901706931404915, + "loss": 1.709, + "step": 825 + }, + { + "epoch": 0.07, + "learning_rate": 0.0019901303515333334, + "loss": 1.7109, + "step": 826 + }, + { + "epoch": 0.07, + "learning_rate": 0.0019900899277208238, + "loss": 1.7129, + "step": 827 + }, + { + "epoch": 0.07, + "learning_rate": 0.0019900494217063185, + "loss": 1.6934, + "step": 828 + }, + { + "epoch": 0.07, + "learning_rate": 0.001990008833493181, + "loss": 1.7266, + "step": 829 + }, + { + "epoch": 0.07, + "learning_rate": 0.0019899681630847804, + "loss": 1.8066, + "step": 830 + }, + { + "epoch": 0.07, + "learning_rate": 0.0019899274104844943, + "loss": 1.6309, + "step": 831 + }, + { + "epoch": 0.07, + "learning_rate": 0.001989886575695705, + "loss": 1.6387, + "step": 832 + }, + { + "epoch": 0.07, + "learning_rate": 0.0019898456587218037, + "loss": 1.7734, + "step": 833 + }, + { + "epoch": 0.07, + "learning_rate": 0.0019898046595661872, + "loss": 1.7188, + "step": 834 + }, + { + "epoch": 0.07, + "learning_rate": 0.0019897635782322592, + "loss": 1.7402, + "step": 835 + }, + { + "epoch": 0.07, + "learning_rate": 0.001989722414723431, + "loss": 1.707, + "step": 836 + }, + { + "epoch": 0.07, + "learning_rate": 0.0019896811690431203, + "loss": 1.7734, + "step": 837 + }, + { + "epoch": 0.07, + "learning_rate": 0.0019896398411947507, + "loss": 1.7148, + "step": 838 + }, + { + "epoch": 0.07, + "learning_rate": 0.001989598431181754, + "loss": 1.6758, + "step": 839 + }, + { + "epoch": 0.07, + "learning_rate": 0.0019895569390075683, + "loss": 1.7988, + "step": 840 + }, + { + "epoch": 0.07, + "learning_rate": 0.001989515364675638, + "loss": 1.7852, + "step": 841 + }, + { + "epoch": 0.07, + "learning_rate": 0.0019894737081894154, + "loss": 1.7266, + "step": 842 + }, + { + "epoch": 0.07, + "learning_rate": 0.001989431969552359, + "loss": 1.8281, + "step": 843 + }, + { + "epoch": 0.08, + "learning_rate": 0.0019893901487679335, + "loss": 1.7715, + "step": 844 + }, + { + "epoch": 0.08, + "learning_rate": 0.0019893482458396114, + "loss": 1.6914, + "step": 845 + }, + { + "epoch": 0.08, + "learning_rate": 0.0019893062607708716, + "loss": 1.6094, + "step": 846 + }, + { + "epoch": 0.08, + "learning_rate": 0.0019892641935652005, + "loss": 1.7246, + "step": 847 + }, + { + "epoch": 0.08, + "learning_rate": 0.00198922204422609, + "loss": 1.6641, + "step": 848 + }, + { + "epoch": 0.08, + "learning_rate": 0.00198917981275704, + "loss": 1.7578, + "step": 849 + }, + { + "epoch": 0.08, + "learning_rate": 0.001989137499161556, + "loss": 1.6348, + "step": 850 + }, + { + "epoch": 0.08, + "learning_rate": 0.0019890951034431522, + "loss": 1.6914, + "step": 851 + }, + { + "epoch": 0.08, + "learning_rate": 0.0019890526256053477, + "loss": 1.6699, + "step": 852 + }, + { + "epoch": 0.08, + "learning_rate": 0.0019890100656516693, + "loss": 1.707, + "step": 853 + }, + { + "epoch": 0.08, + "learning_rate": 0.001988967423585651, + "loss": 1.6855, + "step": 854 + }, + { + "epoch": 0.08, + "learning_rate": 0.0019889246994108326, + "loss": 1.7031, + "step": 855 + }, + { + "epoch": 0.08, + "learning_rate": 0.0019888818931307617, + "loss": 1.75, + "step": 856 + }, + { + "epoch": 0.08, + "learning_rate": 0.0019888390047489916, + "loss": 1.6855, + "step": 857 + }, + { + "epoch": 0.08, + "learning_rate": 0.001988796034269084, + "loss": 1.7891, + "step": 858 + }, + { + "epoch": 0.08, + "learning_rate": 0.001988752981694606, + "loss": 1.7637, + "step": 859 + }, + { + "epoch": 0.08, + "learning_rate": 0.0019887098470291317, + "loss": 1.8262, + "step": 860 + }, + { + "epoch": 0.08, + "learning_rate": 0.001988666630276243, + "loss": 1.7305, + "step": 861 + }, + { + "epoch": 0.08, + "learning_rate": 0.0019886233314395283, + "loss": 1.8301, + "step": 862 + }, + { + "epoch": 0.08, + "learning_rate": 0.001988579950522581, + "loss": 1.7715, + "step": 863 + }, + { + "epoch": 0.08, + "learning_rate": 0.001988536487529004, + "loss": 1.6914, + "step": 864 + }, + { + "epoch": 0.08, + "learning_rate": 0.001988492942462406, + "loss": 1.7422, + "step": 865 + }, + { + "epoch": 0.08, + "learning_rate": 0.001988449315326401, + "loss": 1.7188, + "step": 866 + }, + { + "epoch": 0.08, + "learning_rate": 0.0019884056061246127, + "loss": 1.7051, + "step": 867 + }, + { + "epoch": 0.08, + "learning_rate": 0.0019883618148606686, + "loss": 1.6816, + "step": 868 + }, + { + "epoch": 0.08, + "learning_rate": 0.0019883179415382058, + "loss": 1.791, + "step": 869 + }, + { + "epoch": 0.08, + "learning_rate": 0.0019882739861608652, + "loss": 1.8066, + "step": 870 + }, + { + "epoch": 0.08, + "learning_rate": 0.0019882299487322982, + "loss": 1.7012, + "step": 871 + }, + { + "epoch": 0.08, + "learning_rate": 0.0019881858292561594, + "loss": 1.75, + "step": 872 + }, + { + "epoch": 0.08, + "learning_rate": 0.001988141627736113, + "loss": 1.6699, + "step": 873 + }, + { + "epoch": 0.08, + "learning_rate": 0.0019880973441758275, + "loss": 1.6348, + "step": 874 + }, + { + "epoch": 0.08, + "learning_rate": 0.0019880529785789806, + "loss": 1.7852, + "step": 875 + }, + { + "epoch": 0.08, + "learning_rate": 0.001988008530949256, + "loss": 1.7441, + "step": 876 + }, + { + "epoch": 0.08, + "learning_rate": 0.0019879640012903424, + "loss": 1.6738, + "step": 877 + }, + { + "epoch": 0.08, + "learning_rate": 0.0019879193896059385, + "loss": 1.7129, + "step": 878 + }, + { + "epoch": 0.08, + "learning_rate": 0.0019878746958997473, + "loss": 1.7227, + "step": 879 + }, + { + "epoch": 0.08, + "learning_rate": 0.0019878299201754796, + "loss": 1.7949, + "step": 880 + }, + { + "epoch": 0.08, + "learning_rate": 0.0019877850624368527, + "loss": 1.5879, + "step": 881 + }, + { + "epoch": 0.08, + "learning_rate": 0.001987740122687592, + "loss": 1.7285, + "step": 882 + }, + { + "epoch": 0.08, + "learning_rate": 0.0019876951009314267, + "loss": 1.7031, + "step": 883 + }, + { + "epoch": 0.08, + "learning_rate": 0.001987649997172097, + "loss": 1.75, + "step": 884 + }, + { + "epoch": 0.08, + "learning_rate": 0.0019876048114133454, + "loss": 1.8691, + "step": 885 + }, + { + "epoch": 0.08, + "learning_rate": 0.001987559543658925, + "loss": 1.6855, + "step": 886 + }, + { + "epoch": 0.08, + "learning_rate": 0.0019875141939125936, + "loss": 1.6543, + "step": 887 + }, + { + "epoch": 0.08, + "learning_rate": 0.0019874687621781162, + "loss": 1.7949, + "step": 888 + }, + { + "epoch": 0.08, + "learning_rate": 0.001987423248459265, + "loss": 1.8301, + "step": 889 + }, + { + "epoch": 0.08, + "learning_rate": 0.001987377652759819, + "loss": 1.793, + "step": 890 + }, + { + "epoch": 0.08, + "learning_rate": 0.0019873319750835635, + "loss": 1.7988, + "step": 891 + }, + { + "epoch": 0.08, + "learning_rate": 0.0019872862154342906, + "loss": 1.6816, + "step": 892 + }, + { + "epoch": 0.08, + "learning_rate": 0.0019872403738158, + "loss": 1.8105, + "step": 893 + }, + { + "epoch": 0.08, + "learning_rate": 0.001987194450231897, + "loss": 1.6523, + "step": 894 + }, + { + "epoch": 0.08, + "learning_rate": 0.001987148444686395, + "loss": 1.7207, + "step": 895 + }, + { + "epoch": 0.08, + "learning_rate": 0.0019871023571831135, + "loss": 1.6035, + "step": 896 + }, + { + "epoch": 0.08, + "learning_rate": 0.001987056187725879, + "loss": 1.6426, + "step": 897 + }, + { + "epoch": 0.08, + "learning_rate": 0.0019870099363185245, + "loss": 1.7266, + "step": 898 + }, + { + "epoch": 0.08, + "learning_rate": 0.00198696360296489, + "loss": 1.6992, + "step": 899 + }, + { + "epoch": 0.08, + "learning_rate": 0.0019869171876688223, + "loss": 1.791, + "step": 900 + }, + { + "epoch": 0.08, + "learning_rate": 0.001986870690434175, + "loss": 1.6406, + "step": 901 + }, + { + "epoch": 0.08, + "learning_rate": 0.001986824111264809, + "loss": 1.7422, + "step": 902 + }, + { + "epoch": 0.08, + "learning_rate": 0.001986777450164591, + "loss": 1.5859, + "step": 903 + }, + { + "epoch": 0.08, + "learning_rate": 0.001986730707137395, + "loss": 1.7363, + "step": 904 + }, + { + "epoch": 0.08, + "learning_rate": 0.001986683882187102, + "loss": 1.7012, + "step": 905 + }, + { + "epoch": 0.08, + "learning_rate": 0.0019866369753175996, + "loss": 1.6621, + "step": 906 + }, + { + "epoch": 0.08, + "learning_rate": 0.0019865899865327825, + "loss": 1.6816, + "step": 907 + }, + { + "epoch": 0.08, + "learning_rate": 0.0019865429158365516, + "loss": 1.8027, + "step": 908 + }, + { + "epoch": 0.08, + "learning_rate": 0.001986495763232815, + "loss": 1.5918, + "step": 909 + }, + { + "epoch": 0.08, + "learning_rate": 0.0019864485287254875, + "loss": 1.8086, + "step": 910 + }, + { + "epoch": 0.08, + "learning_rate": 0.0019864012123184905, + "loss": 1.6602, + "step": 911 + }, + { + "epoch": 0.08, + "learning_rate": 0.0019863538140157534, + "loss": 1.6406, + "step": 912 + }, + { + "epoch": 0.08, + "learning_rate": 0.0019863063338212104, + "loss": 1.582, + "step": 913 + }, + { + "epoch": 0.08, + "learning_rate": 0.001986258771738804, + "loss": 1.6445, + "step": 914 + }, + { + "epoch": 0.08, + "learning_rate": 0.001986211127772483, + "loss": 1.6543, + "step": 915 + }, + { + "epoch": 0.08, + "learning_rate": 0.0019861634019262024, + "loss": 1.7363, + "step": 916 + }, + { + "epoch": 0.08, + "learning_rate": 0.0019861155942039257, + "loss": 1.7754, + "step": 917 + }, + { + "epoch": 0.08, + "learning_rate": 0.0019860677046096217, + "loss": 1.7227, + "step": 918 + }, + { + "epoch": 0.08, + "learning_rate": 0.0019860197331472663, + "loss": 1.5625, + "step": 919 + }, + { + "epoch": 0.08, + "learning_rate": 0.001985971679820842, + "loss": 1.8379, + "step": 920 + }, + { + "epoch": 0.08, + "learning_rate": 0.001985923544634339, + "loss": 1.7852, + "step": 921 + }, + { + "epoch": 0.08, + "learning_rate": 0.0019858753275917534, + "loss": 1.6719, + "step": 922 + }, + { + "epoch": 0.08, + "learning_rate": 0.001985827028697089, + "loss": 1.6074, + "step": 923 + }, + { + "epoch": 0.08, + "learning_rate": 0.0019857786479543548, + "loss": 1.7891, + "step": 924 + }, + { + "epoch": 0.08, + "learning_rate": 0.0019857301853675683, + "loss": 1.7578, + "step": 925 + }, + { + "epoch": 0.08, + "learning_rate": 0.0019856816409407527, + "loss": 1.7852, + "step": 926 + }, + { + "epoch": 0.08, + "learning_rate": 0.0019856330146779387, + "loss": 1.6328, + "step": 927 + }, + { + "epoch": 0.08, + "learning_rate": 0.0019855843065831632, + "loss": 1.7402, + "step": 928 + }, + { + "epoch": 0.08, + "learning_rate": 0.0019855355166604706, + "loss": 1.7188, + "step": 929 + }, + { + "epoch": 0.08, + "learning_rate": 0.001985486644913911, + "loss": 1.8281, + "step": 930 + }, + { + "epoch": 0.08, + "learning_rate": 0.001985437691347543, + "loss": 1.6973, + "step": 931 + }, + { + "epoch": 0.08, + "learning_rate": 0.00198538865596543, + "loss": 1.7461, + "step": 932 + }, + { + "epoch": 0.08, + "learning_rate": 0.0019853395387716436, + "loss": 1.7578, + "step": 933 + }, + { + "epoch": 0.08, + "learning_rate": 0.0019852903397702616, + "loss": 1.6367, + "step": 934 + }, + { + "epoch": 0.08, + "learning_rate": 0.001985241058965369, + "loss": 1.7051, + "step": 935 + }, + { + "epoch": 0.08, + "learning_rate": 0.0019851916963610564, + "loss": 1.7441, + "step": 936 + }, + { + "epoch": 0.08, + "learning_rate": 0.0019851422519614236, + "loss": 1.8184, + "step": 937 + }, + { + "epoch": 0.08, + "learning_rate": 0.0019850927257705746, + "loss": 1.7695, + "step": 938 + }, + { + "epoch": 0.08, + "learning_rate": 0.0019850431177926217, + "loss": 1.6855, + "step": 939 + }, + { + "epoch": 0.08, + "learning_rate": 0.0019849934280316834, + "loss": 1.7422, + "step": 940 + }, + { + "epoch": 0.08, + "learning_rate": 0.0019849436564918855, + "loss": 1.6641, + "step": 941 + }, + { + "epoch": 0.08, + "learning_rate": 0.00198489380317736, + "loss": 1.8047, + "step": 942 + }, + { + "epoch": 0.08, + "learning_rate": 0.001984843868092246, + "loss": 1.584, + "step": 943 + }, + { + "epoch": 0.08, + "learning_rate": 0.0019847938512406894, + "loss": 1.6816, + "step": 944 + }, + { + "epoch": 0.08, + "learning_rate": 0.001984743752626843, + "loss": 1.8281, + "step": 945 + }, + { + "epoch": 0.08, + "learning_rate": 0.001984693572254866, + "loss": 1.7617, + "step": 946 + }, + { + "epoch": 0.08, + "learning_rate": 0.0019846433101289246, + "loss": 1.6797, + "step": 947 + }, + { + "epoch": 0.08, + "learning_rate": 0.001984592966253192, + "loss": 1.7344, + "step": 948 + }, + { + "epoch": 0.08, + "learning_rate": 0.001984542540631847, + "loss": 1.5918, + "step": 949 + }, + { + "epoch": 0.08, + "learning_rate": 0.001984492033269078, + "loss": 1.7793, + "step": 950 + }, + { + "epoch": 0.08, + "learning_rate": 0.0019844414441690774, + "loss": 1.6211, + "step": 951 + }, + { + "epoch": 0.08, + "learning_rate": 0.001984390773336045, + "loss": 1.6367, + "step": 952 + }, + { + "epoch": 0.08, + "learning_rate": 0.001984340020774188, + "loss": 1.7266, + "step": 953 + }, + { + "epoch": 0.08, + "learning_rate": 0.0019842891864877204, + "loss": 1.6777, + "step": 954 + }, + { + "epoch": 0.08, + "learning_rate": 0.0019842382704808627, + "loss": 1.7266, + "step": 955 + }, + { + "epoch": 0.09, + "learning_rate": 0.0019841872727578416, + "loss": 1.7637, + "step": 956 + }, + { + "epoch": 0.09, + "learning_rate": 0.001984136193322892, + "loss": 1.6543, + "step": 957 + }, + { + "epoch": 0.09, + "learning_rate": 0.001984085032180254, + "loss": 1.6992, + "step": 958 + }, + { + "epoch": 0.09, + "learning_rate": 0.0019840337893341753, + "loss": 1.7617, + "step": 959 + }, + { + "epoch": 0.09, + "learning_rate": 0.0019839824647889114, + "loss": 1.7578, + "step": 960 + }, + { + "epoch": 0.09, + "learning_rate": 0.0019839310585487223, + "loss": 1.5879, + "step": 961 + }, + { + "epoch": 0.09, + "learning_rate": 0.0019838795706178763, + "loss": 1.7129, + "step": 962 + }, + { + "epoch": 0.09, + "learning_rate": 0.0019838280010006483, + "loss": 1.6152, + "step": 963 + }, + { + "epoch": 0.09, + "learning_rate": 0.00198377634970132, + "loss": 1.8027, + "step": 964 + }, + { + "epoch": 0.09, + "learning_rate": 0.001983724616724179, + "loss": 1.7363, + "step": 965 + }, + { + "epoch": 0.09, + "learning_rate": 0.001983672802073522, + "loss": 1.7832, + "step": 966 + }, + { + "epoch": 0.09, + "learning_rate": 0.0019836209057536493, + "loss": 1.8223, + "step": 967 + }, + { + "epoch": 0.09, + "learning_rate": 0.00198356892776887, + "loss": 1.6777, + "step": 968 + }, + { + "epoch": 0.09, + "learning_rate": 0.0019835168681235, + "loss": 1.6738, + "step": 969 + }, + { + "epoch": 0.09, + "learning_rate": 0.0019834647268218613, + "loss": 1.6387, + "step": 970 + }, + { + "epoch": 0.09, + "learning_rate": 0.0019834125038682825, + "loss": 1.7012, + "step": 971 + }, + { + "epoch": 0.09, + "learning_rate": 0.0019833601992671, + "loss": 1.6934, + "step": 972 + }, + { + "epoch": 0.09, + "learning_rate": 0.0019833078130226563, + "loss": 1.6836, + "step": 973 + }, + { + "epoch": 0.09, + "learning_rate": 0.0019832553451393006, + "loss": 1.6699, + "step": 974 + }, + { + "epoch": 0.09, + "learning_rate": 0.0019832027956213885, + "loss": 1.6504, + "step": 975 + }, + { + "epoch": 0.09, + "learning_rate": 0.0019831501644732843, + "loss": 1.7012, + "step": 976 + }, + { + "epoch": 0.09, + "learning_rate": 0.0019830974516993564, + "loss": 1.8672, + "step": 977 + }, + { + "epoch": 0.09, + "learning_rate": 0.001983044657303982, + "loss": 1.7031, + "step": 978 + }, + { + "epoch": 0.09, + "learning_rate": 0.001982991781291544, + "loss": 1.7012, + "step": 979 + }, + { + "epoch": 0.09, + "learning_rate": 0.0019829388236664324, + "loss": 1.834, + "step": 980 + }, + { + "epoch": 0.09, + "learning_rate": 0.001982885784433044, + "loss": 1.5859, + "step": 981 + }, + { + "epoch": 0.09, + "learning_rate": 0.0019828326635957824, + "loss": 1.7344, + "step": 982 + }, + { + "epoch": 0.09, + "learning_rate": 0.001982779461159058, + "loss": 1.6934, + "step": 983 + }, + { + "epoch": 0.09, + "learning_rate": 0.0019827261771272883, + "loss": 1.7207, + "step": 984 + }, + { + "epoch": 0.09, + "learning_rate": 0.0019826728115048963, + "loss": 1.8027, + "step": 985 + }, + { + "epoch": 0.09, + "learning_rate": 0.0019826193642963133, + "loss": 1.7949, + "step": 986 + }, + { + "epoch": 0.09, + "learning_rate": 0.001982565835505977, + "loss": 1.6953, + "step": 987 + }, + { + "epoch": 0.09, + "learning_rate": 0.001982512225138331, + "loss": 1.6836, + "step": 988 + }, + { + "epoch": 0.09, + "learning_rate": 0.0019824585331978265, + "loss": 1.6074, + "step": 989 + }, + { + "epoch": 0.09, + "learning_rate": 0.0019824047596889213, + "loss": 1.7383, + "step": 990 + }, + { + "epoch": 0.09, + "learning_rate": 0.00198235090461608, + "loss": 1.7031, + "step": 991 + }, + { + "epoch": 0.09, + "learning_rate": 0.0019822969679837736, + "loss": 1.9316, + "step": 992 + }, + { + "epoch": 0.09, + "learning_rate": 0.0019822429497964806, + "loss": 1.7578, + "step": 993 + }, + { + "epoch": 0.09, + "learning_rate": 0.0019821888500586855, + "loss": 1.7168, + "step": 994 + }, + { + "epoch": 0.09, + "learning_rate": 0.00198213466877488, + "loss": 1.6992, + "step": 995 + }, + { + "epoch": 0.09, + "learning_rate": 0.001982080405949563, + "loss": 1.6914, + "step": 996 + }, + { + "epoch": 0.09, + "learning_rate": 0.001982026061587239, + "loss": 1.6211, + "step": 997 + }, + { + "epoch": 0.09, + "learning_rate": 0.0019819716356924196, + "loss": 1.6348, + "step": 998 + }, + { + "epoch": 0.09, + "learning_rate": 0.001981917128269625, + "loss": 1.7832, + "step": 999 + }, + { + "epoch": 0.09, + "learning_rate": 0.001981862539323379, + "loss": 1.7773, + "step": 1000 + }, + { + "epoch": 0.09, + "learning_rate": 0.001981807868858215, + "loss": 1.7734, + "step": 1001 + }, + { + "epoch": 0.09, + "learning_rate": 0.001981753116878671, + "loss": 1.7617, + "step": 1002 + }, + { + "epoch": 0.09, + "learning_rate": 0.001981698283389294, + "loss": 1.7539, + "step": 1003 + }, + { + "epoch": 0.09, + "learning_rate": 0.0019816433683946355, + "loss": 1.6484, + "step": 1004 + }, + { + "epoch": 0.09, + "learning_rate": 0.001981588371899255, + "loss": 1.6211, + "step": 1005 + }, + { + "epoch": 0.09, + "learning_rate": 0.001981533293907719, + "loss": 1.6719, + "step": 1006 + }, + { + "epoch": 0.09, + "learning_rate": 0.0019814781344246, + "loss": 1.6562, + "step": 1007 + }, + { + "epoch": 0.09, + "learning_rate": 0.001981422893454478, + "loss": 1.6211, + "step": 1008 + }, + { + "epoch": 0.09, + "learning_rate": 0.0019813675710019384, + "loss": 1.7344, + "step": 1009 + }, + { + "epoch": 0.09, + "learning_rate": 0.001981312167071575, + "loss": 1.6426, + "step": 1010 + }, + { + "epoch": 0.09, + "learning_rate": 0.0019812566816679884, + "loss": 1.668, + "step": 1011 + }, + { + "epoch": 0.09, + "learning_rate": 0.001981201114795784, + "loss": 1.7637, + "step": 1012 + }, + { + "epoch": 0.09, + "learning_rate": 0.001981145466459576, + "loss": 1.6816, + "step": 1013 + }, + { + "epoch": 0.09, + "learning_rate": 0.0019810897366639843, + "loss": 1.7148, + "step": 1014 + }, + { + "epoch": 0.09, + "learning_rate": 0.001981033925413636, + "loss": 1.6934, + "step": 1015 + }, + { + "epoch": 0.09, + "learning_rate": 0.0019809780327131646, + "loss": 1.7773, + "step": 1016 + }, + { + "epoch": 0.09, + "learning_rate": 0.001980922058567211, + "loss": 1.7266, + "step": 1017 + }, + { + "epoch": 0.09, + "learning_rate": 0.0019808660029804214, + "loss": 1.6777, + "step": 1018 + }, + { + "epoch": 0.09, + "learning_rate": 0.001980809865957451, + "loss": 1.7109, + "step": 1019 + }, + { + "epoch": 0.09, + "learning_rate": 0.00198075364750296, + "loss": 1.709, + "step": 1020 + }, + { + "epoch": 0.09, + "learning_rate": 0.0019806973476216164, + "loss": 1.7539, + "step": 1021 + }, + { + "epoch": 0.09, + "learning_rate": 0.0019806409663180938, + "loss": 1.6953, + "step": 1022 + }, + { + "epoch": 0.09, + "learning_rate": 0.001980584503597074, + "loss": 1.6211, + "step": 1023 + }, + { + "epoch": 0.09, + "learning_rate": 0.001980527959463244, + "loss": 1.6797, + "step": 1024 + }, + { + "epoch": 0.09, + "learning_rate": 0.0019804713339212987, + "loss": 1.6094, + "step": 1025 + }, + { + "epoch": 0.09, + "learning_rate": 0.0019804146269759397, + "loss": 1.6875, + "step": 1026 + }, + { + "epoch": 0.09, + "learning_rate": 0.0019803578386318747, + "loss": 1.6504, + "step": 1027 + }, + { + "epoch": 0.09, + "learning_rate": 0.001980300968893819, + "loss": 1.7305, + "step": 1028 + }, + { + "epoch": 0.09, + "learning_rate": 0.0019802440177664935, + "loss": 1.7246, + "step": 1029 + }, + { + "epoch": 0.09, + "learning_rate": 0.001980186985254627, + "loss": 1.75, + "step": 1030 + }, + { + "epoch": 0.09, + "learning_rate": 0.0019801298713629546, + "loss": 1.7715, + "step": 1031 + }, + { + "epoch": 0.09, + "learning_rate": 0.001980072676096218, + "loss": 1.7344, + "step": 1032 + }, + { + "epoch": 0.09, + "learning_rate": 0.001980015399459166, + "loss": 1.7461, + "step": 1033 + }, + { + "epoch": 0.09, + "learning_rate": 0.0019799580414565543, + "loss": 1.7891, + "step": 1034 + }, + { + "epoch": 0.09, + "learning_rate": 0.001979900602093144, + "loss": 1.666, + "step": 1035 + }, + { + "epoch": 0.09, + "learning_rate": 0.001979843081373705, + "loss": 1.6758, + "step": 1036 + }, + { + "epoch": 0.09, + "learning_rate": 0.0019797854793030126, + "loss": 1.7871, + "step": 1037 + }, + { + "epoch": 0.09, + "learning_rate": 0.001979727795885849, + "loss": 1.5508, + "step": 1038 + }, + { + "epoch": 0.09, + "learning_rate": 0.001979670031127004, + "loss": 1.8125, + "step": 1039 + }, + { + "epoch": 0.09, + "learning_rate": 0.001979612185031273, + "loss": 1.7344, + "step": 1040 + }, + { + "epoch": 0.09, + "learning_rate": 0.0019795542576034582, + "loss": 1.6348, + "step": 1041 + }, + { + "epoch": 0.09, + "learning_rate": 0.0019794962488483697, + "loss": 1.6289, + "step": 1042 + }, + { + "epoch": 0.09, + "learning_rate": 0.0019794381587708238, + "loss": 1.7598, + "step": 1043 + }, + { + "epoch": 0.09, + "learning_rate": 0.001979379987375643, + "loss": 1.8125, + "step": 1044 + }, + { + "epoch": 0.09, + "learning_rate": 0.0019793217346676566, + "loss": 1.7266, + "step": 1045 + }, + { + "epoch": 0.09, + "learning_rate": 0.001979263400651702, + "loss": 1.6348, + "step": 1046 + }, + { + "epoch": 0.09, + "learning_rate": 0.0019792049853326215, + "loss": 1.6895, + "step": 1047 + }, + { + "epoch": 0.09, + "learning_rate": 0.0019791464887152655, + "loss": 1.7207, + "step": 1048 + }, + { + "epoch": 0.09, + "learning_rate": 0.001979087910804491, + "loss": 1.6836, + "step": 1049 + }, + { + "epoch": 0.09, + "learning_rate": 0.0019790292516051605, + "loss": 1.709, + "step": 1050 + }, + { + "epoch": 0.09, + "learning_rate": 0.0019789705111221446, + "loss": 1.7461, + "step": 1051 + }, + { + "epoch": 0.09, + "learning_rate": 0.0019789116893603204, + "loss": 1.6074, + "step": 1052 + }, + { + "epoch": 0.09, + "learning_rate": 0.001978852786324571, + "loss": 1.7031, + "step": 1053 + }, + { + "epoch": 0.09, + "learning_rate": 0.0019787938020197878, + "loss": 1.793, + "step": 1054 + }, + { + "epoch": 0.09, + "learning_rate": 0.001978734736450867, + "loss": 1.5547, + "step": 1055 + }, + { + "epoch": 0.09, + "learning_rate": 0.001978675589622713, + "loss": 1.7383, + "step": 1056 + }, + { + "epoch": 0.09, + "learning_rate": 0.0019786163615402364, + "loss": 1.5176, + "step": 1057 + }, + { + "epoch": 0.09, + "learning_rate": 0.0019785570522083545, + "loss": 1.8633, + "step": 1058 + }, + { + "epoch": 0.09, + "learning_rate": 0.001978497661631992, + "loss": 1.6895, + "step": 1059 + }, + { + "epoch": 0.09, + "learning_rate": 0.0019784381898160786, + "loss": 1.7441, + "step": 1060 + }, + { + "epoch": 0.09, + "learning_rate": 0.0019783786367655524, + "loss": 1.8242, + "step": 1061 + }, + { + "epoch": 0.09, + "learning_rate": 0.0019783190024853584, + "loss": 1.6328, + "step": 1062 + }, + { + "epoch": 0.09, + "learning_rate": 0.0019782592869804472, + "loss": 1.7598, + "step": 1063 + }, + { + "epoch": 0.09, + "learning_rate": 0.001978199490255777, + "loss": 1.6094, + "step": 1064 + }, + { + "epoch": 0.09, + "learning_rate": 0.001978139612316312, + "loss": 1.6953, + "step": 1065 + }, + { + "epoch": 0.09, + "learning_rate": 0.0019780796531670237, + "loss": 1.7461, + "step": 1066 + }, + { + "epoch": 0.09, + "learning_rate": 0.0019780196128128904, + "loss": 1.6289, + "step": 1067 + }, + { + "epoch": 0.1, + "learning_rate": 0.0019779594912588965, + "loss": 1.7109, + "step": 1068 + }, + { + "epoch": 0.1, + "learning_rate": 0.001977899288510034, + "loss": 1.7129, + "step": 1069 + }, + { + "epoch": 0.1, + "learning_rate": 0.001977839004571301, + "loss": 1.6953, + "step": 1070 + }, + { + "epoch": 0.1, + "learning_rate": 0.0019777786394477026, + "loss": 1.6816, + "step": 1071 + }, + { + "epoch": 0.1, + "learning_rate": 0.001977718193144251, + "loss": 1.7012, + "step": 1072 + }, + { + "epoch": 0.1, + "learning_rate": 0.0019776576656659636, + "loss": 1.5195, + "step": 1073 + }, + { + "epoch": 0.1, + "learning_rate": 0.0019775970570178668, + "loss": 1.6465, + "step": 1074 + }, + { + "epoch": 0.1, + "learning_rate": 0.0019775363672049923, + "loss": 1.6973, + "step": 1075 + }, + { + "epoch": 0.1, + "learning_rate": 0.0019774755962323792, + "loss": 1.7715, + "step": 1076 + }, + { + "epoch": 0.1, + "learning_rate": 0.001977414744105072, + "loss": 1.8105, + "step": 1077 + }, + { + "epoch": 0.1, + "learning_rate": 0.001977353810828124, + "loss": 1.5996, + "step": 1078 + }, + { + "epoch": 0.1, + "learning_rate": 0.0019772927964065933, + "loss": 1.6484, + "step": 1079 + }, + { + "epoch": 0.1, + "learning_rate": 0.0019772317008455464, + "loss": 1.7637, + "step": 1080 + }, + { + "epoch": 0.1, + "learning_rate": 0.0019771705241500553, + "loss": 1.8125, + "step": 1081 + }, + { + "epoch": 0.1, + "learning_rate": 0.001977109266325199, + "loss": 1.7773, + "step": 1082 + }, + { + "epoch": 0.1, + "learning_rate": 0.0019770479273760644, + "loss": 1.7832, + "step": 1083 + }, + { + "epoch": 0.1, + "learning_rate": 0.0019769865073077427, + "loss": 1.7578, + "step": 1084 + }, + { + "epoch": 0.1, + "learning_rate": 0.0019769250061253342, + "loss": 1.5469, + "step": 1085 + }, + { + "epoch": 0.1, + "learning_rate": 0.001976863423833945, + "loss": 1.7578, + "step": 1086 + }, + { + "epoch": 0.1, + "learning_rate": 0.0019768017604386875, + "loss": 1.7031, + "step": 1087 + }, + { + "epoch": 0.1, + "learning_rate": 0.001976740015944682, + "loss": 1.6465, + "step": 1088 + }, + { + "epoch": 0.1, + "learning_rate": 0.0019766781903570543, + "loss": 1.7012, + "step": 1089 + }, + { + "epoch": 0.1, + "learning_rate": 0.0019766162836809373, + "loss": 1.6836, + "step": 1090 + }, + { + "epoch": 0.1, + "learning_rate": 0.001976554295921471, + "loss": 1.6719, + "step": 1091 + }, + { + "epoch": 0.1, + "learning_rate": 0.0019764922270838025, + "loss": 1.7676, + "step": 1092 + }, + { + "epoch": 0.1, + "learning_rate": 0.0019764300771730843, + "loss": 1.7461, + "step": 1093 + }, + { + "epoch": 0.1, + "learning_rate": 0.001976367846194476, + "loss": 1.7598, + "step": 1094 + }, + { + "epoch": 0.1, + "learning_rate": 0.0019763055341531456, + "loss": 1.6797, + "step": 1095 + }, + { + "epoch": 0.1, + "learning_rate": 0.001976243141054266, + "loss": 1.707, + "step": 1096 + }, + { + "epoch": 0.1, + "learning_rate": 0.001976180666903017, + "loss": 1.7695, + "step": 1097 + }, + { + "epoch": 0.1, + "learning_rate": 0.001976118111704585, + "loss": 1.6934, + "step": 1098 + }, + { + "epoch": 0.1, + "learning_rate": 0.0019760554754641654, + "loss": 1.6973, + "step": 1099 + }, + { + "epoch": 0.1, + "learning_rate": 0.001975992758186957, + "loss": 1.5938, + "step": 1100 + }, + { + "epoch": 0.1, + "learning_rate": 0.001975929959878168, + "loss": 1.6875, + "step": 1101 + }, + { + "epoch": 0.1, + "learning_rate": 0.001975867080543011, + "loss": 1.7715, + "step": 1102 + }, + { + "epoch": 0.1, + "learning_rate": 0.001975804120186707, + "loss": 1.6758, + "step": 1103 + }, + { + "epoch": 0.1, + "learning_rate": 0.001975741078814484, + "loss": 1.7461, + "step": 1104 + }, + { + "epoch": 0.1, + "learning_rate": 0.0019756779564315753, + "loss": 1.6953, + "step": 1105 + }, + { + "epoch": 0.1, + "learning_rate": 0.0019756147530432216, + "loss": 1.6211, + "step": 1106 + }, + { + "epoch": 0.1, + "learning_rate": 0.001975551468654671, + "loss": 1.6348, + "step": 1107 + }, + { + "epoch": 0.1, + "learning_rate": 0.001975488103271177, + "loss": 1.7891, + "step": 1108 + }, + { + "epoch": 0.1, + "learning_rate": 0.0019754246568980003, + "loss": 1.6699, + "step": 1109 + }, + { + "epoch": 0.1, + "learning_rate": 0.001975361129540409, + "loss": 1.6953, + "step": 1110 + }, + { + "epoch": 0.1, + "learning_rate": 0.0019752975212036774, + "loss": 1.6328, + "step": 1111 + }, + { + "epoch": 0.1, + "learning_rate": 0.001975233831893087, + "loss": 1.75, + "step": 1112 + }, + { + "epoch": 0.1, + "learning_rate": 0.0019751700616139245, + "loss": 1.7422, + "step": 1113 + }, + { + "epoch": 0.1, + "learning_rate": 0.001975106210371485, + "loss": 1.7207, + "step": 1114 + }, + { + "epoch": 0.1, + "learning_rate": 0.00197504227817107, + "loss": 1.6543, + "step": 1115 + }, + { + "epoch": 0.1, + "learning_rate": 0.0019749782650179872, + "loss": 1.7109, + "step": 1116 + }, + { + "epoch": 0.1, + "learning_rate": 0.0019749141709175513, + "loss": 1.6406, + "step": 1117 + }, + { + "epoch": 0.1, + "learning_rate": 0.0019748499958750837, + "loss": 1.6777, + "step": 1118 + }, + { + "epoch": 0.1, + "learning_rate": 0.0019747857398959125, + "loss": 1.7988, + "step": 1119 + }, + { + "epoch": 0.1, + "learning_rate": 0.0019747214029853725, + "loss": 1.752, + "step": 1120 + }, + { + "epoch": 0.1, + "learning_rate": 0.001974656985148805, + "loss": 1.7051, + "step": 1121 + }, + { + "epoch": 0.1, + "learning_rate": 0.001974592486391559, + "loss": 1.6602, + "step": 1122 + }, + { + "epoch": 0.1, + "learning_rate": 0.0019745279067189887, + "loss": 1.6719, + "step": 1123 + }, + { + "epoch": 0.1, + "learning_rate": 0.0019744632461364564, + "loss": 1.7344, + "step": 1124 + }, + { + "epoch": 0.1, + "learning_rate": 0.00197439850464933, + "loss": 1.7207, + "step": 1125 + }, + { + "epoch": 0.1, + "learning_rate": 0.0019743336822629857, + "loss": 1.748, + "step": 1126 + }, + { + "epoch": 0.1, + "learning_rate": 0.0019742687789828044, + "loss": 1.7578, + "step": 1127 + }, + { + "epoch": 0.1, + "learning_rate": 0.0019742037948141744, + "loss": 1.6348, + "step": 1128 + }, + { + "epoch": 0.1, + "learning_rate": 0.0019741387297624916, + "loss": 1.6328, + "step": 1129 + }, + { + "epoch": 0.1, + "learning_rate": 0.0019740735838331587, + "loss": 1.7578, + "step": 1130 + }, + { + "epoch": 0.1, + "learning_rate": 0.001974008357031583, + "loss": 1.7461, + "step": 1131 + }, + { + "epoch": 0.1, + "learning_rate": 0.0019739430493631803, + "loss": 1.7871, + "step": 1132 + }, + { + "epoch": 0.1, + "learning_rate": 0.0019738776608333734, + "loss": 1.752, + "step": 1133 + }, + { + "epoch": 0.1, + "learning_rate": 0.001973812191447591, + "loss": 1.6211, + "step": 1134 + }, + { + "epoch": 0.1, + "learning_rate": 0.0019737466412112683, + "loss": 1.8281, + "step": 1135 + }, + { + "epoch": 0.1, + "learning_rate": 0.0019736810101298476, + "loss": 1.6934, + "step": 1136 + }, + { + "epoch": 0.1, + "learning_rate": 0.001973615298208778, + "loss": 1.6562, + "step": 1137 + }, + { + "epoch": 0.1, + "learning_rate": 0.0019735495054535156, + "loss": 1.6562, + "step": 1138 + }, + { + "epoch": 0.1, + "learning_rate": 0.0019734836318695227, + "loss": 1.6055, + "step": 1139 + }, + { + "epoch": 0.1, + "learning_rate": 0.001973417677462268, + "loss": 1.7305, + "step": 1140 + }, + { + "epoch": 0.1, + "learning_rate": 0.0019733516422372277, + "loss": 1.7754, + "step": 1141 + }, + { + "epoch": 0.1, + "learning_rate": 0.0019732855261998844, + "loss": 1.8477, + "step": 1142 + }, + { + "epoch": 0.1, + "learning_rate": 0.001973219329355727, + "loss": 1.666, + "step": 1143 + }, + { + "epoch": 0.1, + "learning_rate": 0.0019731530517102516, + "loss": 1.5293, + "step": 1144 + }, + { + "epoch": 0.1, + "learning_rate": 0.001973086693268961, + "loss": 1.6855, + "step": 1145 + }, + { + "epoch": 0.1, + "learning_rate": 0.001973020254037365, + "loss": 1.7031, + "step": 1146 + }, + { + "epoch": 0.1, + "learning_rate": 0.0019729537340209794, + "loss": 1.75, + "step": 1147 + }, + { + "epoch": 0.1, + "learning_rate": 0.0019728871332253266, + "loss": 1.6797, + "step": 1148 + }, + { + "epoch": 0.1, + "learning_rate": 0.0019728204516559367, + "loss": 1.6562, + "step": 1149 + }, + { + "epoch": 0.1, + "learning_rate": 0.0019727536893183453, + "loss": 1.7207, + "step": 1150 + }, + { + "epoch": 0.1, + "learning_rate": 0.001972686846218096, + "loss": 1.832, + "step": 1151 + }, + { + "epoch": 0.1, + "learning_rate": 0.001972619922360738, + "loss": 1.7891, + "step": 1152 + }, + { + "epoch": 0.1, + "learning_rate": 0.001972552917751828, + "loss": 1.5117, + "step": 1153 + }, + { + "epoch": 0.1, + "learning_rate": 0.0019724858323969282, + "loss": 1.7246, + "step": 1154 + }, + { + "epoch": 0.1, + "learning_rate": 0.0019724186663016095, + "loss": 1.7559, + "step": 1155 + }, + { + "epoch": 0.1, + "learning_rate": 0.0019723514194714474, + "loss": 1.7734, + "step": 1156 + }, + { + "epoch": 0.1, + "learning_rate": 0.0019722840919120257, + "loss": 1.7637, + "step": 1157 + }, + { + "epoch": 0.1, + "learning_rate": 0.001972216683628934, + "loss": 1.7715, + "step": 1158 + }, + { + "epoch": 0.1, + "learning_rate": 0.0019721491946277686, + "loss": 1.6953, + "step": 1159 + }, + { + "epoch": 0.1, + "learning_rate": 0.0019720816249141336, + "loss": 1.625, + "step": 1160 + }, + { + "epoch": 0.1, + "learning_rate": 0.001972013974493638, + "loss": 1.7383, + "step": 1161 + }, + { + "epoch": 0.1, + "learning_rate": 0.0019719462433718986, + "loss": 1.6211, + "step": 1162 + }, + { + "epoch": 0.1, + "learning_rate": 0.0019718784315545393, + "loss": 1.6367, + "step": 1163 + }, + { + "epoch": 0.1, + "learning_rate": 0.0019718105390471898, + "loss": 1.8809, + "step": 1164 + }, + { + "epoch": 0.1, + "learning_rate": 0.001971742565855487, + "loss": 1.5742, + "step": 1165 + }, + { + "epoch": 0.1, + "learning_rate": 0.001971674511985074, + "loss": 1.7227, + "step": 1166 + }, + { + "epoch": 0.1, + "learning_rate": 0.001971606377441602, + "loss": 1.5957, + "step": 1167 + }, + { + "epoch": 0.1, + "learning_rate": 0.0019715381622307264, + "loss": 1.6367, + "step": 1168 + }, + { + "epoch": 0.1, + "learning_rate": 0.001971469866358112, + "loss": 1.75, + "step": 1169 + }, + { + "epoch": 0.1, + "learning_rate": 0.001971401489829428, + "loss": 1.7285, + "step": 1170 + }, + { + "epoch": 0.1, + "learning_rate": 0.001971333032650352, + "loss": 1.7129, + "step": 1171 + }, + { + "epoch": 0.1, + "learning_rate": 0.0019712644948265677, + "loss": 1.7852, + "step": 1172 + }, + { + "epoch": 0.1, + "learning_rate": 0.0019711958763637648, + "loss": 1.6504, + "step": 1173 + }, + { + "epoch": 0.1, + "learning_rate": 0.001971127177267641, + "loss": 1.6426, + "step": 1174 + }, + { + "epoch": 0.1, + "learning_rate": 0.0019710583975438996, + "loss": 1.8242, + "step": 1175 + }, + { + "epoch": 0.1, + "learning_rate": 0.0019709895371982514, + "loss": 1.6348, + "step": 1176 + }, + { + "epoch": 0.1, + "learning_rate": 0.0019709205962364137, + "loss": 1.6738, + "step": 1177 + }, + { + "epoch": 0.1, + "learning_rate": 0.0019708515746641093, + "loss": 1.7188, + "step": 1178 + }, + { + "epoch": 0.1, + "learning_rate": 0.00197078247248707, + "loss": 1.7324, + "step": 1179 + }, + { + "epoch": 0.1, + "learning_rate": 0.0019707132897110314, + "loss": 1.7441, + "step": 1180 + }, + { + "epoch": 0.11, + "learning_rate": 0.0019706440263417392, + "loss": 1.5859, + "step": 1181 + }, + { + "epoch": 0.11, + "learning_rate": 0.0019705746823849425, + "loss": 1.6895, + "step": 1182 + }, + { + "epoch": 0.11, + "learning_rate": 0.0019705052578463996, + "loss": 1.7227, + "step": 1183 + }, + { + "epoch": 0.11, + "learning_rate": 0.0019704357527318738, + "loss": 1.6504, + "step": 1184 + }, + { + "epoch": 0.11, + "learning_rate": 0.001970366167047136, + "loss": 1.6445, + "step": 1185 + }, + { + "epoch": 0.11, + "learning_rate": 0.0019702965007979633, + "loss": 1.627, + "step": 1186 + }, + { + "epoch": 0.11, + "learning_rate": 0.00197022675399014, + "loss": 1.7188, + "step": 1187 + }, + { + "epoch": 0.11, + "learning_rate": 0.001970156926629457, + "loss": 1.8477, + "step": 1188 + }, + { + "epoch": 0.11, + "learning_rate": 0.001970087018721711, + "loss": 1.8516, + "step": 1189 + }, + { + "epoch": 0.11, + "learning_rate": 0.0019700170302727068, + "loss": 1.7715, + "step": 1190 + }, + { + "epoch": 0.11, + "learning_rate": 0.001969946961288255, + "loss": 1.6973, + "step": 1191 + }, + { + "epoch": 0.11, + "learning_rate": 0.0019698768117741724, + "loss": 1.6934, + "step": 1192 + }, + { + "epoch": 0.11, + "learning_rate": 0.0019698065817362843, + "loss": 1.6719, + "step": 1193 + }, + { + "epoch": 0.11, + "learning_rate": 0.0019697362711804207, + "loss": 1.7168, + "step": 1194 + }, + { + "epoch": 0.11, + "learning_rate": 0.0019696658801124194, + "loss": 1.7598, + "step": 1195 + }, + { + "epoch": 0.11, + "learning_rate": 0.0019695954085381245, + "loss": 1.5449, + "step": 1196 + }, + { + "epoch": 0.11, + "learning_rate": 0.001969524856463387, + "loss": 1.7773, + "step": 1197 + }, + { + "epoch": 0.11, + "learning_rate": 0.0019694542238940646, + "loss": 1.7168, + "step": 1198 + }, + { + "epoch": 0.11, + "learning_rate": 0.0019693835108360214, + "loss": 1.6582, + "step": 1199 + }, + { + "epoch": 0.11, + "learning_rate": 0.0019693127172951278, + "loss": 1.6797, + "step": 1200 + }, + { + "epoch": 0.11, + "learning_rate": 0.001969241843277262, + "loss": 1.6484, + "step": 1201 + }, + { + "epoch": 0.11, + "learning_rate": 0.0019691708887883083, + "loss": 1.6797, + "step": 1202 + }, + { + "epoch": 0.11, + "learning_rate": 0.001969099853834158, + "loss": 1.5957, + "step": 1203 + }, + { + "epoch": 0.11, + "learning_rate": 0.001969028738420708, + "loss": 1.6641, + "step": 1204 + }, + { + "epoch": 0.11, + "learning_rate": 0.0019689575425538633, + "loss": 1.6758, + "step": 1205 + }, + { + "epoch": 0.11, + "learning_rate": 0.0019688862662395343, + "loss": 1.6934, + "step": 1206 + }, + { + "epoch": 0.11, + "learning_rate": 0.001968814909483639, + "loss": 1.6133, + "step": 1207 + }, + { + "epoch": 0.11, + "learning_rate": 0.0019687434722921024, + "loss": 1.7578, + "step": 1208 + }, + { + "epoch": 0.11, + "learning_rate": 0.001968671954670854, + "loss": 1.666, + "step": 1209 + }, + { + "epoch": 0.11, + "learning_rate": 0.001968600356625833, + "loss": 1.7539, + "step": 1210 + }, + { + "epoch": 0.11, + "learning_rate": 0.0019685286781629834, + "loss": 1.6309, + "step": 1211 + }, + { + "epoch": 0.11, + "learning_rate": 0.0019684569192882566, + "loss": 1.6387, + "step": 1212 + }, + { + "epoch": 0.11, + "learning_rate": 0.0019683850800076095, + "loss": 1.6797, + "step": 1213 + }, + { + "epoch": 0.11, + "learning_rate": 0.0019683131603270066, + "loss": 1.7461, + "step": 1214 + }, + { + "epoch": 0.11, + "learning_rate": 0.0019682411602524198, + "loss": 1.6953, + "step": 1215 + }, + { + "epoch": 0.11, + "learning_rate": 0.0019681690797898267, + "loss": 1.6465, + "step": 1216 + }, + { + "epoch": 0.11, + "learning_rate": 0.0019680969189452115, + "loss": 1.6816, + "step": 1217 + }, + { + "epoch": 0.11, + "learning_rate": 0.0019680246777245653, + "loss": 1.707, + "step": 1218 + }, + { + "epoch": 0.11, + "learning_rate": 0.001967952356133886, + "loss": 1.7578, + "step": 1219 + }, + { + "epoch": 0.11, + "learning_rate": 0.001967879954179178, + "loss": 1.8828, + "step": 1220 + }, + { + "epoch": 0.11, + "learning_rate": 0.001967807471866453, + "loss": 1.6016, + "step": 1221 + }, + { + "epoch": 0.11, + "learning_rate": 0.001967734909201728, + "loss": 1.7305, + "step": 1222 + }, + { + "epoch": 0.11, + "learning_rate": 0.001967662266191028, + "loss": 1.6797, + "step": 1223 + }, + { + "epoch": 0.11, + "learning_rate": 0.0019675895428403842, + "loss": 1.7305, + "step": 1224 + }, + { + "epoch": 0.11, + "learning_rate": 0.0019675167391558336, + "loss": 1.6621, + "step": 1225 + }, + { + "epoch": 0.11, + "learning_rate": 0.0019674438551434224, + "loss": 1.7441, + "step": 1226 + }, + { + "epoch": 0.11, + "learning_rate": 0.0019673708908092002, + "loss": 1.7754, + "step": 1227 + }, + { + "epoch": 0.11, + "learning_rate": 0.0019672978461592254, + "loss": 1.7109, + "step": 1228 + }, + { + "epoch": 0.11, + "learning_rate": 0.001967224721199563, + "loss": 1.6562, + "step": 1229 + }, + { + "epoch": 0.11, + "learning_rate": 0.001967151515936283, + "loss": 1.5645, + "step": 1230 + }, + { + "epoch": 0.11, + "learning_rate": 0.0019670782303754645, + "loss": 1.5996, + "step": 1231 + }, + { + "epoch": 0.11, + "learning_rate": 0.0019670048645231914, + "loss": 1.6973, + "step": 1232 + }, + { + "epoch": 0.11, + "learning_rate": 0.0019669314183855553, + "loss": 1.6367, + "step": 1233 + }, + { + "epoch": 0.11, + "learning_rate": 0.0019668578919686533, + "loss": 1.7969, + "step": 1234 + }, + { + "epoch": 0.11, + "learning_rate": 0.0019667842852785907, + "loss": 1.6738, + "step": 1235 + }, + { + "epoch": 0.11, + "learning_rate": 0.001966710598321478, + "loss": 1.6855, + "step": 1236 + }, + { + "epoch": 0.11, + "learning_rate": 0.0019666368311034334, + "loss": 1.7324, + "step": 1237 + }, + { + "epoch": 0.11, + "learning_rate": 0.0019665629836305818, + "loss": 1.625, + "step": 1238 + }, + { + "epoch": 0.11, + "learning_rate": 0.0019664890559090535, + "loss": 1.8184, + "step": 1239 + }, + { + "epoch": 0.11, + "learning_rate": 0.0019664150479449865, + "loss": 1.5449, + "step": 1240 + }, + { + "epoch": 0.11, + "learning_rate": 0.0019663409597445265, + "loss": 1.5801, + "step": 1241 + }, + { + "epoch": 0.11, + "learning_rate": 0.001966266791313823, + "loss": 1.7188, + "step": 1242 + }, + { + "epoch": 0.11, + "learning_rate": 0.0019661925426590347, + "loss": 1.6523, + "step": 1243 + }, + { + "epoch": 0.11, + "learning_rate": 0.001966118213786326, + "loss": 1.6621, + "step": 1244 + }, + { + "epoch": 0.11, + "learning_rate": 0.001966043804701868, + "loss": 1.627, + "step": 1245 + }, + { + "epoch": 0.11, + "learning_rate": 0.0019659693154118383, + "loss": 1.7129, + "step": 1246 + }, + { + "epoch": 0.11, + "learning_rate": 0.0019658947459224216, + "loss": 1.7988, + "step": 1247 + }, + { + "epoch": 0.11, + "learning_rate": 0.001965820096239809, + "loss": 1.7344, + "step": 1248 + }, + { + "epoch": 0.11, + "learning_rate": 0.001965745366370198, + "loss": 1.6426, + "step": 1249 + }, + { + "epoch": 0.11, + "learning_rate": 0.0019656705563197937, + "loss": 1.7754, + "step": 1250 + }, + { + "epoch": 0.11, + "learning_rate": 0.0019655956660948066, + "loss": 1.7539, + "step": 1251 + }, + { + "epoch": 0.11, + "learning_rate": 0.001965520695701454, + "loss": 1.7695, + "step": 1252 + }, + { + "epoch": 0.11, + "learning_rate": 0.0019654456451459615, + "loss": 1.7695, + "step": 1253 + }, + { + "epoch": 0.11, + "learning_rate": 0.0019653705144345593, + "loss": 1.7578, + "step": 1254 + }, + { + "epoch": 0.11, + "learning_rate": 0.0019652953035734855, + "loss": 1.8418, + "step": 1255 + }, + { + "epoch": 0.11, + "learning_rate": 0.001965220012568984, + "loss": 1.7344, + "step": 1256 + }, + { + "epoch": 0.11, + "learning_rate": 0.0019651446414273065, + "loss": 1.7832, + "step": 1257 + }, + { + "epoch": 0.11, + "learning_rate": 0.00196506919015471, + "loss": 1.6621, + "step": 1258 + }, + { + "epoch": 0.11, + "learning_rate": 0.001964993658757459, + "loss": 1.666, + "step": 1259 + }, + { + "epoch": 0.11, + "learning_rate": 0.001964918047241825, + "loss": 1.8926, + "step": 1260 + }, + { + "epoch": 0.11, + "learning_rate": 0.0019648423556140847, + "loss": 1.7168, + "step": 1261 + }, + { + "epoch": 0.11, + "learning_rate": 0.0019647665838805235, + "loss": 1.7715, + "step": 1262 + }, + { + "epoch": 0.11, + "learning_rate": 0.001964690732047431, + "loss": 1.6367, + "step": 1263 + }, + { + "epoch": 0.11, + "learning_rate": 0.0019646148001211065, + "loss": 1.7227, + "step": 1264 + }, + { + "epoch": 0.11, + "learning_rate": 0.0019645387881078525, + "loss": 1.6758, + "step": 1265 + }, + { + "epoch": 0.11, + "learning_rate": 0.001964462696013981, + "loss": 1.6465, + "step": 1266 + }, + { + "epoch": 0.11, + "learning_rate": 0.0019643865238458086, + "loss": 1.5703, + "step": 1267 + }, + { + "epoch": 0.11, + "learning_rate": 0.001964310271609661, + "loss": 1.7598, + "step": 1268 + }, + { + "epoch": 0.11, + "learning_rate": 0.001964233939311867, + "loss": 1.7266, + "step": 1269 + }, + { + "epoch": 0.11, + "learning_rate": 0.001964157526958766, + "loss": 1.6797, + "step": 1270 + }, + { + "epoch": 0.11, + "learning_rate": 0.0019640810345567007, + "loss": 1.6133, + "step": 1271 + }, + { + "epoch": 0.11, + "learning_rate": 0.0019640044621120224, + "loss": 1.6465, + "step": 1272 + }, + { + "epoch": 0.11, + "learning_rate": 0.001963927809631089, + "loss": 1.625, + "step": 1273 + }, + { + "epoch": 0.11, + "learning_rate": 0.0019638510771202637, + "loss": 1.5566, + "step": 1274 + }, + { + "epoch": 0.11, + "learning_rate": 0.0019637742645859176, + "loss": 1.6777, + "step": 1275 + }, + { + "epoch": 0.11, + "learning_rate": 0.0019636973720344284, + "loss": 1.5898, + "step": 1276 + }, + { + "epoch": 0.11, + "learning_rate": 0.0019636203994721794, + "loss": 1.6797, + "step": 1277 + }, + { + "epoch": 0.11, + "learning_rate": 0.0019635433469055615, + "loss": 1.6875, + "step": 1278 + }, + { + "epoch": 0.11, + "learning_rate": 0.001963466214340972, + "loss": 1.7266, + "step": 1279 + }, + { + "epoch": 0.11, + "learning_rate": 0.0019633890017848148, + "loss": 1.6758, + "step": 1280 + }, + { + "epoch": 0.11, + "learning_rate": 0.001963311709243501, + "loss": 1.7949, + "step": 1281 + }, + { + "epoch": 0.11, + "learning_rate": 0.0019632343367234468, + "loss": 1.793, + "step": 1282 + }, + { + "epoch": 0.11, + "learning_rate": 0.0019631568842310766, + "loss": 1.623, + "step": 1283 + }, + { + "epoch": 0.11, + "learning_rate": 0.001963079351772821, + "loss": 1.5859, + "step": 1284 + }, + { + "epoch": 0.11, + "learning_rate": 0.0019630017393551164, + "loss": 1.7812, + "step": 1285 + }, + { + "epoch": 0.11, + "learning_rate": 0.0019629240469844073, + "loss": 1.6562, + "step": 1286 + }, + { + "epoch": 0.11, + "learning_rate": 0.001962846274667144, + "loss": 1.6914, + "step": 1287 + }, + { + "epoch": 0.11, + "learning_rate": 0.001962768422409783, + "loss": 1.6016, + "step": 1288 + }, + { + "epoch": 0.11, + "learning_rate": 0.0019626904902187887, + "loss": 1.7266, + "step": 1289 + }, + { + "epoch": 0.11, + "learning_rate": 0.001962612478100631, + "loss": 1.6602, + "step": 1290 + }, + { + "epoch": 0.11, + "learning_rate": 0.001962534386061787, + "loss": 1.7031, + "step": 1291 + }, + { + "epoch": 0.11, + "learning_rate": 0.0019624562141087403, + "loss": 1.7031, + "step": 1292 + }, + { + "epoch": 0.12, + "learning_rate": 0.0019623779622479805, + "loss": 1.6836, + "step": 1293 + }, + { + "epoch": 0.12, + "learning_rate": 0.0019622996304860052, + "loss": 1.7031, + "step": 1294 + }, + { + "epoch": 0.12, + "learning_rate": 0.001962221218829317, + "loss": 1.623, + "step": 1295 + }, + { + "epoch": 0.12, + "learning_rate": 0.0019621427272844276, + "loss": 1.7227, + "step": 1296 + }, + { + "epoch": 0.12, + "learning_rate": 0.001962064155857852, + "loss": 1.6289, + "step": 1297 + }, + { + "epoch": 0.12, + "learning_rate": 0.0019619855045561146, + "loss": 1.6074, + "step": 1298 + }, + { + "epoch": 0.12, + "learning_rate": 0.001961906773385745, + "loss": 1.6953, + "step": 1299 + }, + { + "epoch": 0.12, + "learning_rate": 0.00196182796235328, + "loss": 1.6523, + "step": 1300 + }, + { + "epoch": 0.12, + "learning_rate": 0.0019617490714652634, + "loss": 1.6719, + "step": 1301 + }, + { + "epoch": 0.12, + "learning_rate": 0.001961670100728244, + "loss": 1.7344, + "step": 1302 + }, + { + "epoch": 0.12, + "learning_rate": 0.0019615910501487787, + "loss": 1.6719, + "step": 1303 + }, + { + "epoch": 0.12, + "learning_rate": 0.001961511919733431, + "loss": 1.7793, + "step": 1304 + }, + { + "epoch": 0.12, + "learning_rate": 0.0019614327094887706, + "loss": 1.6992, + "step": 1305 + }, + { + "epoch": 0.12, + "learning_rate": 0.0019613534194213734, + "loss": 1.6758, + "step": 1306 + }, + { + "epoch": 0.12, + "learning_rate": 0.0019612740495378233, + "loss": 1.707, + "step": 1307 + }, + { + "epoch": 0.12, + "learning_rate": 0.00196119459984471, + "loss": 1.7441, + "step": 1308 + }, + { + "epoch": 0.12, + "learning_rate": 0.0019611150703486282, + "loss": 1.6074, + "step": 1309 + }, + { + "epoch": 0.12, + "learning_rate": 0.0019610354610561827, + "loss": 1.6211, + "step": 1310 + }, + { + "epoch": 0.12, + "learning_rate": 0.001960955771973982, + "loss": 1.7734, + "step": 1311 + }, + { + "epoch": 0.12, + "learning_rate": 0.0019608760031086425, + "loss": 1.7227, + "step": 1312 + }, + { + "epoch": 0.12, + "learning_rate": 0.0019607961544667873, + "loss": 1.5996, + "step": 1313 + }, + { + "epoch": 0.12, + "learning_rate": 0.0019607162260550453, + "loss": 1.6738, + "step": 1314 + }, + { + "epoch": 0.12, + "learning_rate": 0.001960636217880053, + "loss": 1.5801, + "step": 1315 + }, + { + "epoch": 0.12, + "learning_rate": 0.0019605561299484526, + "loss": 1.8379, + "step": 1316 + }, + { + "epoch": 0.12, + "learning_rate": 0.0019604759622668935, + "loss": 1.6914, + "step": 1317 + }, + { + "epoch": 0.12, + "learning_rate": 0.001960395714842032, + "loss": 1.6797, + "step": 1318 + }, + { + "epoch": 0.12, + "learning_rate": 0.0019603153876805303, + "loss": 1.793, + "step": 1319 + }, + { + "epoch": 0.12, + "learning_rate": 0.001960234980789058, + "loss": 1.6602, + "step": 1320 + }, + { + "epoch": 0.12, + "learning_rate": 0.00196015449417429, + "loss": 1.7734, + "step": 1321 + }, + { + "epoch": 0.12, + "learning_rate": 0.001960073927842909, + "loss": 1.6523, + "step": 1322 + }, + { + "epoch": 0.12, + "learning_rate": 0.0019599932818016048, + "loss": 1.5859, + "step": 1323 + }, + { + "epoch": 0.12, + "learning_rate": 0.001959912556057072, + "loss": 1.6953, + "step": 1324 + }, + { + "epoch": 0.12, + "learning_rate": 0.0019598317506160133, + "loss": 1.6289, + "step": 1325 + }, + { + "epoch": 0.12, + "learning_rate": 0.0019597508654851377, + "loss": 1.6543, + "step": 1326 + }, + { + "epoch": 0.12, + "learning_rate": 0.00195966990067116, + "loss": 1.5977, + "step": 1327 + }, + { + "epoch": 0.12, + "learning_rate": 0.001959588856180803, + "loss": 1.707, + "step": 1328 + }, + { + "epoch": 0.12, + "learning_rate": 0.0019595077320207948, + "loss": 1.6309, + "step": 1329 + }, + { + "epoch": 0.12, + "learning_rate": 0.001959426528197871, + "loss": 1.7656, + "step": 1330 + }, + { + "epoch": 0.12, + "learning_rate": 0.0019593452447187742, + "loss": 1.6055, + "step": 1331 + }, + { + "epoch": 0.12, + "learning_rate": 0.001959263881590252, + "loss": 1.748, + "step": 1332 + }, + { + "epoch": 0.12, + "learning_rate": 0.0019591824388190595, + "loss": 1.666, + "step": 1333 + }, + { + "epoch": 0.12, + "learning_rate": 0.001959100916411959, + "loss": 1.5996, + "step": 1334 + }, + { + "epoch": 0.12, + "learning_rate": 0.001959019314375719, + "loss": 1.6973, + "step": 1335 + }, + { + "epoch": 0.12, + "learning_rate": 0.001958937632717114, + "loss": 1.7109, + "step": 1336 + }, + { + "epoch": 0.12, + "learning_rate": 0.001958855871442926, + "loss": 1.709, + "step": 1337 + }, + { + "epoch": 0.12, + "learning_rate": 0.0019587740305599425, + "loss": 1.6289, + "step": 1338 + }, + { + "epoch": 0.12, + "learning_rate": 0.0019586921100749597, + "loss": 1.7695, + "step": 1339 + }, + { + "epoch": 0.12, + "learning_rate": 0.0019586101099947776, + "loss": 1.6738, + "step": 1340 + }, + { + "epoch": 0.12, + "learning_rate": 0.001958528030326205, + "loss": 1.7129, + "step": 1341 + }, + { + "epoch": 0.12, + "learning_rate": 0.001958445871076056, + "loss": 1.6309, + "step": 1342 + }, + { + "epoch": 0.12, + "learning_rate": 0.0019583636322511528, + "loss": 1.6758, + "step": 1343 + }, + { + "epoch": 0.12, + "learning_rate": 0.001958281313858322, + "loss": 1.7129, + "step": 1344 + }, + { + "epoch": 0.12, + "learning_rate": 0.001958198915904399, + "loss": 1.623, + "step": 1345 + }, + { + "epoch": 0.12, + "learning_rate": 0.0019581164383962245, + "loss": 1.6875, + "step": 1346 + }, + { + "epoch": 0.12, + "learning_rate": 0.001958033881340647, + "loss": 1.7578, + "step": 1347 + }, + { + "epoch": 0.12, + "learning_rate": 0.0019579512447445192, + "loss": 1.7969, + "step": 1348 + }, + { + "epoch": 0.12, + "learning_rate": 0.001957868528614703, + "loss": 1.7754, + "step": 1349 + }, + { + "epoch": 0.12, + "learning_rate": 0.0019577857329580663, + "loss": 1.541, + "step": 1350 + }, + { + "epoch": 0.12, + "learning_rate": 0.001957702857781482, + "loss": 1.7773, + "step": 1351 + }, + { + "epoch": 0.12, + "learning_rate": 0.0019576199030918316, + "loss": 1.8203, + "step": 1352 + }, + { + "epoch": 0.12, + "learning_rate": 0.0019575368688960026, + "loss": 1.7734, + "step": 1353 + }, + { + "epoch": 0.12, + "learning_rate": 0.001957453755200888, + "loss": 1.6211, + "step": 1354 + }, + { + "epoch": 0.12, + "learning_rate": 0.001957370562013389, + "loss": 1.7148, + "step": 1355 + }, + { + "epoch": 0.12, + "learning_rate": 0.0019572872893404126, + "loss": 1.6465, + "step": 1356 + }, + { + "epoch": 0.12, + "learning_rate": 0.0019572039371888723, + "loss": 1.7676, + "step": 1357 + }, + { + "epoch": 0.12, + "learning_rate": 0.0019571205055656884, + "loss": 1.6426, + "step": 1358 + }, + { + "epoch": 0.12, + "learning_rate": 0.001957036994477788, + "loss": 1.5352, + "step": 1359 + }, + { + "epoch": 0.12, + "learning_rate": 0.0019569534039321043, + "loss": 1.7148, + "step": 1360 + }, + { + "epoch": 0.12, + "learning_rate": 0.001956869733935578, + "loss": 1.7148, + "step": 1361 + }, + { + "epoch": 0.12, + "learning_rate": 0.0019567859844951546, + "loss": 1.6602, + "step": 1362 + }, + { + "epoch": 0.12, + "learning_rate": 0.001956702155617789, + "loss": 1.6191, + "step": 1363 + }, + { + "epoch": 0.12, + "learning_rate": 0.0019566182473104393, + "loss": 1.8203, + "step": 1364 + }, + { + "epoch": 0.12, + "learning_rate": 0.001956534259580074, + "loss": 1.709, + "step": 1365 + }, + { + "epoch": 0.12, + "learning_rate": 0.001956450192433664, + "loss": 1.6523, + "step": 1366 + }, + { + "epoch": 0.12, + "learning_rate": 0.0019563660458781903, + "loss": 1.6074, + "step": 1367 + }, + { + "epoch": 0.12, + "learning_rate": 0.001956281819920639, + "loss": 1.7793, + "step": 1368 + }, + { + "epoch": 0.12, + "learning_rate": 0.0019561975145680026, + "loss": 1.7109, + "step": 1369 + }, + { + "epoch": 0.12, + "learning_rate": 0.0019561131298272806, + "loss": 1.6113, + "step": 1370 + }, + { + "epoch": 0.12, + "learning_rate": 0.0019560286657054797, + "loss": 1.668, + "step": 1371 + }, + { + "epoch": 0.12, + "learning_rate": 0.001955944122209611, + "loss": 1.7246, + "step": 1372 + }, + { + "epoch": 0.12, + "learning_rate": 0.0019558594993466954, + "loss": 1.668, + "step": 1373 + }, + { + "epoch": 0.12, + "learning_rate": 0.0019557747971237576, + "loss": 1.7988, + "step": 1374 + }, + { + "epoch": 0.12, + "learning_rate": 0.0019556900155478308, + "loss": 1.6367, + "step": 1375 + }, + { + "epoch": 0.12, + "learning_rate": 0.0019556051546259527, + "loss": 1.5684, + "step": 1376 + }, + { + "epoch": 0.12, + "learning_rate": 0.0019555202143651707, + "loss": 1.6523, + "step": 1377 + }, + { + "epoch": 0.12, + "learning_rate": 0.001955435194772535, + "loss": 1.6934, + "step": 1378 + }, + { + "epoch": 0.12, + "learning_rate": 0.0019553500958551053, + "loss": 1.7305, + "step": 1379 + }, + { + "epoch": 0.12, + "learning_rate": 0.0019552649176199474, + "loss": 1.7031, + "step": 1380 + }, + { + "epoch": 0.12, + "learning_rate": 0.001955179660074132, + "loss": 1.7012, + "step": 1381 + }, + { + "epoch": 0.12, + "learning_rate": 0.0019550943232247387, + "loss": 1.7188, + "step": 1382 + }, + { + "epoch": 0.12, + "learning_rate": 0.0019550089070788524, + "loss": 1.6621, + "step": 1383 + }, + { + "epoch": 0.12, + "learning_rate": 0.0019549234116435638, + "loss": 1.7344, + "step": 1384 + }, + { + "epoch": 0.12, + "learning_rate": 0.001954837836925972, + "loss": 1.7051, + "step": 1385 + }, + { + "epoch": 0.12, + "learning_rate": 0.001954752182933182, + "loss": 1.666, + "step": 1386 + }, + { + "epoch": 0.12, + "learning_rate": 0.001954666449672305, + "loss": 1.707, + "step": 1387 + }, + { + "epoch": 0.12, + "learning_rate": 0.0019545806371504583, + "loss": 1.7852, + "step": 1388 + }, + { + "epoch": 0.12, + "learning_rate": 0.001954494745374767, + "loss": 1.7324, + "step": 1389 + }, + { + "epoch": 0.12, + "learning_rate": 0.001954408774352362, + "loss": 1.8223, + "step": 1390 + }, + { + "epoch": 0.12, + "learning_rate": 0.001954322724090382, + "loss": 1.7227, + "step": 1391 + }, + { + "epoch": 0.12, + "learning_rate": 0.00195423659459597, + "loss": 1.7129, + "step": 1392 + }, + { + "epoch": 0.12, + "learning_rate": 0.001954150385876278, + "loss": 1.7344, + "step": 1393 + }, + { + "epoch": 0.12, + "learning_rate": 0.0019540640979384626, + "loss": 1.5195, + "step": 1394 + }, + { + "epoch": 0.12, + "learning_rate": 0.001953977730789688, + "loss": 1.6836, + "step": 1395 + }, + { + "epoch": 0.12, + "learning_rate": 0.001953891284437125, + "loss": 1.6816, + "step": 1396 + }, + { + "epoch": 0.12, + "learning_rate": 0.0019538047588879507, + "loss": 1.6172, + "step": 1397 + }, + { + "epoch": 0.12, + "learning_rate": 0.0019537181541493492, + "loss": 1.6914, + "step": 1398 + }, + { + "epoch": 0.12, + "learning_rate": 0.00195363147022851, + "loss": 1.7363, + "step": 1399 + }, + { + "epoch": 0.12, + "learning_rate": 0.0019535447071326313, + "loss": 1.627, + "step": 1400 + }, + { + "epoch": 0.12, + "learning_rate": 0.0019534578648689152, + "loss": 1.7461, + "step": 1401 + }, + { + "epoch": 0.12, + "learning_rate": 0.001953370943444573, + "loss": 1.5645, + "step": 1402 + }, + { + "epoch": 0.12, + "learning_rate": 0.0019532839428668207, + "loss": 1.7324, + "step": 1403 + }, + { + "epoch": 0.12, + "learning_rate": 0.001953196863142881, + "loss": 1.5293, + "step": 1404 + }, + { + "epoch": 0.12, + "learning_rate": 0.0019531097042799846, + "loss": 1.6992, + "step": 1405 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019530224662853675, + "loss": 1.625, + "step": 1406 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019529351491662723, + "loss": 1.6758, + "step": 1407 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019528477529299488, + "loss": 1.7031, + "step": 1408 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019527602775836535, + "loss": 1.7871, + "step": 1409 + }, + { + "epoch": 0.13, + "learning_rate": 0.001952672723134648, + "loss": 1.832, + "step": 1410 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019525850895902024, + "loss": 1.6113, + "step": 1411 + }, + { + "epoch": 0.13, + "learning_rate": 0.001952497376957592, + "loss": 1.8066, + "step": 1412 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019524095852440988, + "loss": 1.5859, + "step": 1413 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019523217144570128, + "loss": 1.6895, + "step": 1414 + }, + { + "epoch": 0.13, + "learning_rate": 0.001952233764603628, + "loss": 1.6895, + "step": 1415 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019521457356912477, + "loss": 1.7109, + "step": 1416 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019520576277271798, + "loss": 1.6055, + "step": 1417 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019519694407187393, + "loss": 1.6973, + "step": 1418 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019518811746732485, + "loss": 1.7578, + "step": 1419 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019517928295980354, + "loss": 1.6836, + "step": 1420 + }, + { + "epoch": 0.13, + "learning_rate": 0.001951704405500435, + "loss": 1.6445, + "step": 1421 + }, + { + "epoch": 0.13, + "learning_rate": 0.001951615902387788, + "loss": 1.6387, + "step": 1422 + }, + { + "epoch": 0.13, + "learning_rate": 0.001951527320267443, + "loss": 1.7441, + "step": 1423 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019514386591467545, + "loss": 1.7012, + "step": 1424 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019513499190330832, + "loss": 1.6953, + "step": 1425 + }, + { + "epoch": 0.13, + "learning_rate": 0.001951261099933797, + "loss": 1.6348, + "step": 1426 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019511722018562701, + "loss": 1.6719, + "step": 1427 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019510832248078832, + "loss": 1.8047, + "step": 1428 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019509941687960235, + "loss": 1.6523, + "step": 1429 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019509050338280848, + "loss": 1.6992, + "step": 1430 + }, + { + "epoch": 0.13, + "learning_rate": 0.001950815819911468, + "loss": 1.6738, + "step": 1431 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019507265270535794, + "loss": 1.666, + "step": 1432 + }, + { + "epoch": 0.13, + "learning_rate": 0.001950637155261833, + "loss": 1.8105, + "step": 1433 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019505477045436486, + "loss": 1.5684, + "step": 1434 + }, + { + "epoch": 0.13, + "learning_rate": 0.001950458174906453, + "loss": 1.6641, + "step": 1435 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019503685663576791, + "loss": 1.5898, + "step": 1436 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019502788789047673, + "loss": 1.6602, + "step": 1437 + }, + { + "epoch": 0.13, + "learning_rate": 0.001950189112555163, + "loss": 1.6504, + "step": 1438 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019500992673163193, + "loss": 1.6816, + "step": 1439 + }, + { + "epoch": 0.13, + "learning_rate": 0.001950009343195696, + "loss": 1.7871, + "step": 1440 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019499193402007587, + "loss": 1.7168, + "step": 1441 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019498292583389801, + "loss": 1.6523, + "step": 1442 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019497390976178388, + "loss": 1.7812, + "step": 1443 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019496488580448207, + "loss": 1.6016, + "step": 1444 + }, + { + "epoch": 0.13, + "learning_rate": 0.001949558539627418, + "loss": 1.627, + "step": 1445 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019494681423731291, + "loss": 1.6699, + "step": 1446 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019493776662894593, + "loss": 1.6816, + "step": 1447 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019492871113839204, + "loss": 1.666, + "step": 1448 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019491964776640308, + "loss": 1.625, + "step": 1449 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019491057651373153, + "loss": 1.7324, + "step": 1450 + }, + { + "epoch": 0.13, + "learning_rate": 0.001949014973811305, + "loss": 1.6719, + "step": 1451 + }, + { + "epoch": 0.13, + "learning_rate": 0.001948924103693538, + "loss": 1.6816, + "step": 1452 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019488331547915592, + "loss": 1.7031, + "step": 1453 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019487421271129188, + "loss": 1.6523, + "step": 1454 + }, + { + "epoch": 0.13, + "learning_rate": 0.001948651020665175, + "loss": 1.4961, + "step": 1455 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019485598354558916, + "loss": 1.6113, + "step": 1456 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019484685714926395, + "loss": 1.5957, + "step": 1457 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019483772287829954, + "loss": 1.6641, + "step": 1458 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019482858073345435, + "loss": 1.7422, + "step": 1459 + }, + { + "epoch": 0.13, + "learning_rate": 0.001948194307154874, + "loss": 1.6777, + "step": 1460 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019481027282515831, + "loss": 1.7402, + "step": 1461 + }, + { + "epoch": 0.13, + "learning_rate": 0.001948011070632275, + "loss": 1.7832, + "step": 1462 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019479193343045584, + "loss": 1.7266, + "step": 1463 + }, + { + "epoch": 0.13, + "learning_rate": 0.001947827519276051, + "loss": 1.7812, + "step": 1464 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019477356255543747, + "loss": 1.6367, + "step": 1465 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019476436531471598, + "loss": 1.7031, + "step": 1466 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019475516020620416, + "loss": 1.7656, + "step": 1467 + }, + { + "epoch": 0.13, + "learning_rate": 0.001947459472306663, + "loss": 1.7031, + "step": 1468 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019473672638886726, + "loss": 1.6172, + "step": 1469 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019472749768157265, + "loss": 1.627, + "step": 1470 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019471826110954867, + "loss": 1.6211, + "step": 1471 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019470901667356215, + "loss": 1.6387, + "step": 1472 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019469976437438063, + "loss": 1.6602, + "step": 1473 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019469050421277233, + "loss": 1.6855, + "step": 1474 + }, + { + "epoch": 0.13, + "learning_rate": 0.00194681236189506, + "loss": 1.7344, + "step": 1475 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019467196030535114, + "loss": 1.6445, + "step": 1476 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019466267656107787, + "loss": 1.6836, + "step": 1477 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019465338495745698, + "loss": 1.7324, + "step": 1478 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019464408549525994, + "loss": 1.7012, + "step": 1479 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019463477817525873, + "loss": 1.7109, + "step": 1480 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019462546299822622, + "loss": 1.7285, + "step": 1481 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019461613996493573, + "loss": 1.6387, + "step": 1482 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019460680907616129, + "loss": 1.7227, + "step": 1483 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019459747033267762, + "loss": 1.6934, + "step": 1484 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019458812373526007, + "loss": 1.6113, + "step": 1485 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019457876928468463, + "loss": 1.6875, + "step": 1486 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019456940698172795, + "loss": 1.7363, + "step": 1487 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019456003682716734, + "loss": 1.7168, + "step": 1488 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019455065882178079, + "loss": 1.7324, + "step": 1489 + }, + { + "epoch": 0.13, + "learning_rate": 0.001945412729663468, + "loss": 1.7129, + "step": 1490 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019453187926164474, + "loss": 1.6836, + "step": 1491 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019452247770845447, + "loss": 1.5508, + "step": 1492 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019451306830755655, + "loss": 1.6738, + "step": 1493 + }, + { + "epoch": 0.13, + "learning_rate": 0.001945036510597322, + "loss": 1.6035, + "step": 1494 + }, + { + "epoch": 0.13, + "learning_rate": 0.001944942259657633, + "loss": 1.8027, + "step": 1495 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019448479302643237, + "loss": 1.5293, + "step": 1496 + }, + { + "epoch": 0.13, + "learning_rate": 0.001944753522425225, + "loss": 1.6543, + "step": 1497 + }, + { + "epoch": 0.13, + "learning_rate": 0.001944659036148176, + "loss": 1.6758, + "step": 1498 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019445644714410216, + "loss": 1.6309, + "step": 1499 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019444698283116121, + "loss": 1.5605, + "step": 1500 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019443751067678055, + "loss": 1.7344, + "step": 1501 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019442803068174662, + "loss": 1.6309, + "step": 1502 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019441854284684653, + "loss": 1.6035, + "step": 1503 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019440904717286794, + "loss": 1.6992, + "step": 1504 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019439954366059925, + "loss": 1.8281, + "step": 1505 + }, + { + "epoch": 0.13, + "learning_rate": 0.001943900323108295, + "loss": 1.5645, + "step": 1506 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019438051312434834, + "loss": 1.5898, + "step": 1507 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019437098610194615, + "loss": 1.8359, + "step": 1508 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019436145124441385, + "loss": 1.7617, + "step": 1509 + }, + { + "epoch": 0.13, + "learning_rate": 0.001943519085525431, + "loss": 1.7188, + "step": 1510 + }, + { + "epoch": 0.13, + "learning_rate": 0.001943423580271262, + "loss": 1.6367, + "step": 1511 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019433279966895602, + "loss": 1.6738, + "step": 1512 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019432323347882618, + "loss": 1.6465, + "step": 1513 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019431365945753094, + "loss": 1.6191, + "step": 1514 + }, + { + "epoch": 0.13, + "learning_rate": 0.001943040776058651, + "loss": 1.6855, + "step": 1515 + }, + { + "epoch": 0.13, + "learning_rate": 0.0019429448792462427, + "loss": 1.793, + "step": 1516 + }, + { + "epoch": 0.13, + "learning_rate": 0.001942848904146046, + "loss": 1.5898, + "step": 1517 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019427528507660293, + "loss": 1.668, + "step": 1518 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019426567191141672, + "loss": 1.6934, + "step": 1519 + }, + { + "epoch": 0.14, + "learning_rate": 0.001942560509198441, + "loss": 1.6875, + "step": 1520 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019424642210268388, + "loss": 1.6465, + "step": 1521 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019423678546073547, + "loss": 1.7344, + "step": 1522 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019422714099479894, + "loss": 1.7715, + "step": 1523 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019421748870567504, + "loss": 1.7051, + "step": 1524 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019420782859416517, + "loss": 1.6797, + "step": 1525 + }, + { + "epoch": 0.14, + "learning_rate": 0.001941981606610713, + "loss": 1.5957, + "step": 1526 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019418848490719617, + "loss": 1.7012, + "step": 1527 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019417880133334303, + "loss": 1.6875, + "step": 1528 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019416910994031593, + "loss": 1.6426, + "step": 1529 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019415941072891947, + "loss": 1.6543, + "step": 1530 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019414970369995893, + "loss": 1.6855, + "step": 1531 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019413998885424023, + "loss": 1.6738, + "step": 1532 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019413026619256994, + "loss": 1.793, + "step": 1533 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019412053571575525, + "loss": 1.6133, + "step": 1534 + }, + { + "epoch": 0.14, + "learning_rate": 0.001941107974246041, + "loss": 1.6758, + "step": 1535 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019410105131992498, + "loss": 1.6562, + "step": 1536 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019409129740252703, + "loss": 1.6465, + "step": 1537 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019408153567322007, + "loss": 1.6289, + "step": 1538 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019407176613281461, + "loss": 1.7344, + "step": 1539 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019406198878212173, + "loss": 1.6523, + "step": 1540 + }, + { + "epoch": 0.14, + "learning_rate": 0.001940522036219532, + "loss": 1.6953, + "step": 1541 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019404241065312142, + "loss": 1.623, + "step": 1542 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019403260987643947, + "loss": 1.6758, + "step": 1543 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019402280129272105, + "loss": 1.6328, + "step": 1544 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019401298490278052, + "loss": 1.6152, + "step": 1545 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019400316070743286, + "loss": 1.584, + "step": 1546 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019399332870749374, + "loss": 1.6152, + "step": 1547 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019398348890377944, + "loss": 1.6641, + "step": 1548 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019397364129710695, + "loss": 1.6289, + "step": 1549 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019396378588829384, + "loss": 1.5391, + "step": 1550 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019395392267815832, + "loss": 1.502, + "step": 1551 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019394405166751937, + "loss": 1.6465, + "step": 1552 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019393417285719643, + "loss": 1.7363, + "step": 1553 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019392428624800972, + "loss": 1.627, + "step": 1554 + }, + { + "epoch": 0.14, + "learning_rate": 0.001939143918407801, + "loss": 1.623, + "step": 1555 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019390448963632902, + "loss": 1.5645, + "step": 1556 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019389457963547864, + "loss": 1.6797, + "step": 1557 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019388466183905173, + "loss": 1.6094, + "step": 1558 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019387473624787167, + "loss": 1.623, + "step": 1559 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019386480286276257, + "loss": 1.6445, + "step": 1560 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019385486168454914, + "loss": 1.7637, + "step": 1561 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019384491271405672, + "loss": 1.7285, + "step": 1562 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019383495595211135, + "loss": 1.7402, + "step": 1563 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019382499139953971, + "loss": 1.6797, + "step": 1564 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019381501905716904, + "loss": 1.707, + "step": 1565 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019380503892582732, + "loss": 1.6699, + "step": 1566 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019379505100634317, + "loss": 1.6504, + "step": 1567 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019378505529954585, + "loss": 1.7637, + "step": 1568 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019377505180626518, + "loss": 1.6465, + "step": 1569 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019376504052733175, + "loss": 1.7266, + "step": 1570 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019375502146357674, + "loss": 1.707, + "step": 1571 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019374499461583194, + "loss": 1.6465, + "step": 1572 + }, + { + "epoch": 0.14, + "learning_rate": 0.001937349599849299, + "loss": 1.6113, + "step": 1573 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019372491757170367, + "loss": 1.707, + "step": 1574 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019371486737698707, + "loss": 1.6191, + "step": 1575 + }, + { + "epoch": 0.14, + "learning_rate": 0.001937048094016145, + "loss": 1.6309, + "step": 1576 + }, + { + "epoch": 0.14, + "learning_rate": 0.00193694743646421, + "loss": 1.6543, + "step": 1577 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019368467011224228, + "loss": 1.709, + "step": 1578 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019367458879991477, + "loss": 1.6797, + "step": 1579 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019366449971027536, + "loss": 1.6523, + "step": 1580 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019365440284416177, + "loss": 1.7676, + "step": 1581 + }, + { + "epoch": 0.14, + "learning_rate": 0.001936442982024122, + "loss": 1.7051, + "step": 1582 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019363418578586572, + "loss": 1.7383, + "step": 1583 + }, + { + "epoch": 0.14, + "learning_rate": 0.001936240655953618, + "loss": 1.7207, + "step": 1584 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019361393763174068, + "loss": 1.6816, + "step": 1585 + }, + { + "epoch": 0.14, + "learning_rate": 0.001936038018958433, + "loss": 1.5742, + "step": 1586 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019359365838851113, + "loss": 1.6719, + "step": 1587 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019358350711058633, + "loss": 1.6777, + "step": 1588 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019357334806291168, + "loss": 1.7422, + "step": 1589 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019356318124633073, + "loss": 1.9023, + "step": 1590 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019355300666168748, + "loss": 1.6074, + "step": 1591 + }, + { + "epoch": 0.14, + "learning_rate": 0.001935428243098267, + "loss": 1.7305, + "step": 1592 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019353263419159379, + "loss": 1.7344, + "step": 1593 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019352243630783477, + "loss": 1.8516, + "step": 1594 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019351223065939634, + "loss": 1.7344, + "step": 1595 + }, + { + "epoch": 0.14, + "learning_rate": 0.001935020172471258, + "loss": 1.6406, + "step": 1596 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019349179607187112, + "loss": 1.666, + "step": 1597 + }, + { + "epoch": 0.14, + "learning_rate": 0.001934815671344809, + "loss": 1.707, + "step": 1598 + }, + { + "epoch": 0.14, + "learning_rate": 0.001934713304358044, + "loss": 1.5449, + "step": 1599 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019346108597669157, + "loss": 1.6445, + "step": 1600 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019345083375799286, + "loss": 1.7031, + "step": 1601 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019344057378055954, + "loss": 1.5938, + "step": 1602 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019343030604524341, + "loss": 1.6055, + "step": 1603 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019342003055289694, + "loss": 1.6367, + "step": 1604 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019340974730437327, + "loss": 1.6621, + "step": 1605 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019339945630052614, + "loss": 1.6895, + "step": 1606 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019338915754220998, + "loss": 1.748, + "step": 1607 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019337885103027982, + "loss": 1.5957, + "step": 1608 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019336853676559138, + "loss": 1.6602, + "step": 1609 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019335821474900097, + "loss": 1.7754, + "step": 1610 + }, + { + "epoch": 0.14, + "learning_rate": 0.001933478849813656, + "loss": 1.6289, + "step": 1611 + }, + { + "epoch": 0.14, + "learning_rate": 0.001933375474635429, + "loss": 1.6387, + "step": 1612 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019332720219639114, + "loss": 1.5645, + "step": 1613 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019331684918076919, + "loss": 1.6523, + "step": 1614 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019330648841753666, + "loss": 1.582, + "step": 1615 + }, + { + "epoch": 0.14, + "learning_rate": 0.001932961199075537, + "loss": 1.7168, + "step": 1616 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019328574365168123, + "loss": 1.7344, + "step": 1617 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019327535965078066, + "loss": 1.6816, + "step": 1618 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019326496790571415, + "loss": 1.7051, + "step": 1619 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019325456841734448, + "loss": 1.7617, + "step": 1620 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019324416118653503, + "loss": 1.7012, + "step": 1621 + }, + { + "epoch": 0.14, + "learning_rate": 0.001932337462141499, + "loss": 1.5938, + "step": 1622 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019322332350105377, + "loss": 1.7266, + "step": 1623 + }, + { + "epoch": 0.14, + "learning_rate": 0.00193212893048112, + "loss": 1.6836, + "step": 1624 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019320245485619054, + "loss": 1.7266, + "step": 1625 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019319200892615608, + "loss": 1.5859, + "step": 1626 + }, + { + "epoch": 0.14, + "learning_rate": 0.001931815552588758, + "loss": 1.6582, + "step": 1627 + }, + { + "epoch": 0.14, + "learning_rate": 0.001931710938552177, + "loss": 1.6152, + "step": 1628 + }, + { + "epoch": 0.14, + "learning_rate": 0.0019316062471605032, + "loss": 1.623, + "step": 1629 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019315014784224282, + "loss": 1.5391, + "step": 1630 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019313966323466505, + "loss": 1.6641, + "step": 1631 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019312917089418752, + "loss": 1.623, + "step": 1632 + }, + { + "epoch": 0.15, + "learning_rate": 0.001931186708216813, + "loss": 1.7266, + "step": 1633 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019310816301801824, + "loss": 1.6719, + "step": 1634 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019309764748407067, + "loss": 1.5762, + "step": 1635 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019308712422071168, + "loss": 1.6992, + "step": 1636 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019307659322881498, + "loss": 1.623, + "step": 1637 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019306605450925482, + "loss": 1.7422, + "step": 1638 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019305550806290627, + "loss": 1.6582, + "step": 1639 + }, + { + "epoch": 0.15, + "learning_rate": 0.001930449538906449, + "loss": 1.6055, + "step": 1640 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019303439199334696, + "loss": 1.7031, + "step": 1641 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019302382237188938, + "loss": 1.5547, + "step": 1642 + }, + { + "epoch": 0.15, + "learning_rate": 0.001930132450271497, + "loss": 1.75, + "step": 1643 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019300265996000606, + "loss": 1.7402, + "step": 1644 + }, + { + "epoch": 0.15, + "learning_rate": 0.001929920671713373, + "loss": 1.6562, + "step": 1645 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019298146666202293, + "loss": 1.6641, + "step": 1646 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019297085843294297, + "loss": 1.6602, + "step": 1647 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019296024248497826, + "loss": 1.7422, + "step": 1648 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019294961881901013, + "loss": 1.7402, + "step": 1649 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019293898743592058, + "loss": 1.7793, + "step": 1650 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019292834833659234, + "loss": 1.5938, + "step": 1651 + }, + { + "epoch": 0.15, + "learning_rate": 0.001929177015219087, + "loss": 1.752, + "step": 1652 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019290704699275359, + "loss": 1.7363, + "step": 1653 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019289638475001164, + "loss": 1.7363, + "step": 1654 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019288571479456804, + "loss": 1.8223, + "step": 1655 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019287503712730864, + "loss": 1.6895, + "step": 1656 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019286435174912006, + "loss": 1.75, + "step": 1657 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019285365866088934, + "loss": 1.7383, + "step": 1658 + }, + { + "epoch": 0.15, + "learning_rate": 0.001928429578635043, + "loss": 1.6504, + "step": 1659 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019283224935785338, + "loss": 1.623, + "step": 1660 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019282153314482565, + "loss": 1.6953, + "step": 1661 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019281080922531081, + "loss": 1.6094, + "step": 1662 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019280007760019925, + "loss": 1.6328, + "step": 1663 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019278933827038193, + "loss": 1.6504, + "step": 1664 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019277859123675047, + "loss": 1.6875, + "step": 1665 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019276783650019717, + "loss": 1.5781, + "step": 1666 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019275707406161491, + "loss": 1.6914, + "step": 1667 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019274630392189728, + "loss": 1.6582, + "step": 1668 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019273552608193843, + "loss": 1.6699, + "step": 1669 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019272474054263318, + "loss": 1.7012, + "step": 1670 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019271394730487703, + "loss": 1.7383, + "step": 1671 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019270314636956607, + "loss": 1.7305, + "step": 1672 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019269233773759704, + "loss": 1.6641, + "step": 1673 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019268152140986734, + "loss": 1.6953, + "step": 1674 + }, + { + "epoch": 0.15, + "learning_rate": 0.00192670697387275, + "loss": 1.8652, + "step": 1675 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019265986567071867, + "loss": 1.5918, + "step": 1676 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019264902626109763, + "loss": 1.5918, + "step": 1677 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019263817915931183, + "loss": 1.6973, + "step": 1678 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019262732436626188, + "loss": 1.6055, + "step": 1679 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019261646188284893, + "loss": 1.6738, + "step": 1680 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019260559170997493, + "loss": 1.5273, + "step": 1681 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019259471384854231, + "loss": 1.6445, + "step": 1682 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019258382829945417, + "loss": 1.625, + "step": 1683 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019257293506361438, + "loss": 1.627, + "step": 1684 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019256203414192725, + "loss": 1.7363, + "step": 1685 + }, + { + "epoch": 0.15, + "learning_rate": 0.001925511255352979, + "loss": 1.707, + "step": 1686 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019254020924463196, + "loss": 1.8086, + "step": 1687 + }, + { + "epoch": 0.15, + "learning_rate": 0.001925292852708358, + "loss": 1.5938, + "step": 1688 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019251835361481634, + "loss": 1.8379, + "step": 1689 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019250741427748119, + "loss": 1.6172, + "step": 1690 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019249646725973861, + "loss": 1.6895, + "step": 1691 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019248551256249745, + "loss": 1.6855, + "step": 1692 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019247455018666722, + "loss": 1.668, + "step": 1693 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019246358013315808, + "loss": 1.6602, + "step": 1694 + }, + { + "epoch": 0.15, + "learning_rate": 0.001924526024028808, + "loss": 1.627, + "step": 1695 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019244161699674685, + "loss": 1.7383, + "step": 1696 + }, + { + "epoch": 0.15, + "learning_rate": 0.001924306239156682, + "loss": 1.6934, + "step": 1697 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019241962316055763, + "loss": 1.625, + "step": 1698 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019240861473232846, + "loss": 1.5723, + "step": 1699 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019239759863189465, + "loss": 1.6152, + "step": 1700 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019238657486017078, + "loss": 1.4922, + "step": 1701 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019237554341807218, + "loss": 1.7031, + "step": 1702 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019236450430651464, + "loss": 1.6309, + "step": 1703 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019235345752641473, + "loss": 1.6504, + "step": 1704 + }, + { + "epoch": 0.15, + "learning_rate": 0.001923424030786896, + "loss": 1.6777, + "step": 1705 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019233134096425704, + "loss": 1.7539, + "step": 1706 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019232027118403546, + "loss": 1.7168, + "step": 1707 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019230919373894398, + "loss": 1.6289, + "step": 1708 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019229810862990222, + "loss": 1.5977, + "step": 1709 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019228701585783062, + "loss": 1.625, + "step": 1710 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019227591542365005, + "loss": 1.668, + "step": 1711 + }, + { + "epoch": 0.15, + "learning_rate": 0.001922648073282822, + "loss": 1.6562, + "step": 1712 + }, + { + "epoch": 0.15, + "learning_rate": 0.001922536915726493, + "loss": 1.6348, + "step": 1713 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019224256815767418, + "loss": 1.668, + "step": 1714 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019223143708428044, + "loss": 1.6836, + "step": 1715 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019222029835339219, + "loss": 1.6289, + "step": 1716 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019220915196593422, + "loss": 1.6816, + "step": 1717 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019219799792283196, + "loss": 1.6953, + "step": 1718 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019218683622501146, + "loss": 1.6426, + "step": 1719 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019217566687339948, + "loss": 1.7422, + "step": 1720 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019216448986892325, + "loss": 1.6895, + "step": 1721 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019215330521251085, + "loss": 1.7109, + "step": 1722 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019214211290509077, + "loss": 1.6934, + "step": 1723 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019213091294759232, + "loss": 1.498, + "step": 1724 + }, + { + "epoch": 0.15, + "learning_rate": 0.001921197053409454, + "loss": 1.6777, + "step": 1725 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019210849008608046, + "loss": 1.7051, + "step": 1726 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019209726718392864, + "loss": 1.6562, + "step": 1727 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019208603663542175, + "loss": 1.7617, + "step": 1728 + }, + { + "epoch": 0.15, + "learning_rate": 0.001920747984414922, + "loss": 1.6504, + "step": 1729 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019206355260307304, + "loss": 1.6094, + "step": 1730 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019205229912109796, + "loss": 1.6387, + "step": 1731 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019204103799650124, + "loss": 1.5449, + "step": 1732 + }, + { + "epoch": 0.15, + "learning_rate": 0.001920297692302179, + "loss": 1.6836, + "step": 1733 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019201849282318342, + "loss": 1.6504, + "step": 1734 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019200720877633414, + "loss": 1.6113, + "step": 1735 + }, + { + "epoch": 0.15, + "learning_rate": 0.001919959170906068, + "loss": 1.6406, + "step": 1736 + }, + { + "epoch": 0.15, + "learning_rate": 0.00191984617766939, + "loss": 1.6934, + "step": 1737 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019197331080626882, + "loss": 1.6895, + "step": 1738 + }, + { + "epoch": 0.15, + "learning_rate": 0.00191961996209535, + "loss": 1.6953, + "step": 1739 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019195067397767694, + "loss": 1.6992, + "step": 1740 + }, + { + "epoch": 0.15, + "learning_rate": 0.001919393441116347, + "loss": 1.8359, + "step": 1741 + }, + { + "epoch": 0.15, + "learning_rate": 0.0019192800661234886, + "loss": 1.6348, + "step": 1742 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019191666148076082, + "loss": 1.5527, + "step": 1743 + }, + { + "epoch": 0.16, + "learning_rate": 0.001919053087178124, + "loss": 1.6816, + "step": 1744 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019189394832444622, + "loss": 1.8086, + "step": 1745 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019188258030160548, + "loss": 1.7812, + "step": 1746 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019187120465023399, + "loss": 1.7754, + "step": 1747 + }, + { + "epoch": 0.16, + "learning_rate": 0.001918598213712762, + "loss": 1.6445, + "step": 1748 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019184843046567723, + "loss": 1.6953, + "step": 1749 + }, + { + "epoch": 0.16, + "learning_rate": 0.001918370319343828, + "loss": 1.5996, + "step": 1750 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019182562577833926, + "loss": 1.6113, + "step": 1751 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019181421199849363, + "loss": 1.6855, + "step": 1752 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019180279059579349, + "loss": 1.6582, + "step": 1753 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019179136157118713, + "loss": 1.6719, + "step": 1754 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019177992492562343, + "loss": 1.6289, + "step": 1755 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019176848066005192, + "loss": 1.5586, + "step": 1756 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019175702877542277, + "loss": 1.5176, + "step": 1757 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019174556927268678, + "loss": 1.5566, + "step": 1758 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019173410215279532, + "loss": 1.5762, + "step": 1759 + }, + { + "epoch": 0.16, + "learning_rate": 0.001917226274167005, + "loss": 1.6777, + "step": 1760 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019171114506535496, + "loss": 1.623, + "step": 1761 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019169965509971206, + "loss": 1.582, + "step": 1762 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019168815752072575, + "loss": 1.5156, + "step": 1763 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019167665232935058, + "loss": 1.7031, + "step": 1764 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019166513952654182, + "loss": 1.668, + "step": 1765 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019165361911325522, + "loss": 1.666, + "step": 1766 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019164209109044739, + "loss": 1.6797, + "step": 1767 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019163055545907534, + "loss": 1.7031, + "step": 1768 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019161901222009689, + "loss": 1.6953, + "step": 1769 + }, + { + "epoch": 0.16, + "learning_rate": 0.001916074613744703, + "loss": 1.748, + "step": 1770 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019159590292315472, + "loss": 1.6797, + "step": 1771 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019158433686710969, + "loss": 1.6172, + "step": 1772 + }, + { + "epoch": 0.16, + "learning_rate": 0.001915727632072955, + "loss": 1.791, + "step": 1773 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019156118194467307, + "loss": 1.6426, + "step": 1774 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019154959308020393, + "loss": 1.5879, + "step": 1775 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019153799661485022, + "loss": 1.6875, + "step": 1776 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019152639254957473, + "loss": 1.6211, + "step": 1777 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019151478088534092, + "loss": 1.6895, + "step": 1778 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019150316162311284, + "loss": 1.627, + "step": 1779 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019149153476385515, + "loss": 1.6738, + "step": 1780 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019147990030853315, + "loss": 1.6758, + "step": 1781 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019146825825811288, + "loss": 1.6719, + "step": 1782 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019145660861356083, + "loss": 1.7051, + "step": 1783 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019144495137584424, + "loss": 1.6582, + "step": 1784 + }, + { + "epoch": 0.16, + "learning_rate": 0.001914332865459309, + "loss": 1.7188, + "step": 1785 + }, + { + "epoch": 0.16, + "learning_rate": 0.001914216141247894, + "loss": 1.4785, + "step": 1786 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019140993411338872, + "loss": 1.627, + "step": 1787 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019139824651269867, + "loss": 1.5645, + "step": 1788 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019138655132368953, + "loss": 1.6309, + "step": 1789 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019137484854733237, + "loss": 1.625, + "step": 1790 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019136313818459878, + "loss": 1.7637, + "step": 1791 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019135142023646102, + "loss": 1.5918, + "step": 1792 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019133969470389194, + "loss": 1.6973, + "step": 1793 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019132796158786505, + "loss": 1.6035, + "step": 1794 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019131622088935454, + "loss": 1.6641, + "step": 1795 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019130447260933515, + "loss": 1.6797, + "step": 1796 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019129271674878227, + "loss": 1.627, + "step": 1797 + }, + { + "epoch": 0.16, + "learning_rate": 0.001912809533086719, + "loss": 1.7109, + "step": 1798 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019126918228998075, + "loss": 1.6602, + "step": 1799 + }, + { + "epoch": 0.16, + "learning_rate": 0.001912574036936861, + "loss": 1.6445, + "step": 1800 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019124561752076584, + "loss": 1.7363, + "step": 1801 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019123382377219853, + "loss": 1.6836, + "step": 1802 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019122202244896331, + "loss": 1.6875, + "step": 1803 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019121021355204004, + "loss": 1.6309, + "step": 1804 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019119839708240912, + "loss": 1.6289, + "step": 1805 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019118657304105164, + "loss": 1.8438, + "step": 1806 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019117474142894921, + "loss": 1.6426, + "step": 1807 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019116290224708422, + "loss": 1.5605, + "step": 1808 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019115105549643958, + "loss": 1.7363, + "step": 1809 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019113920117799887, + "loss": 1.7363, + "step": 1810 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019112733929274636, + "loss": 1.7266, + "step": 1811 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019111546984166674, + "loss": 1.6328, + "step": 1812 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019110359282574559, + "loss": 1.6738, + "step": 1813 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019109170824596895, + "loss": 1.6289, + "step": 1814 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019107981610332354, + "loss": 1.6934, + "step": 1815 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019106791639879668, + "loss": 1.6562, + "step": 1816 + }, + { + "epoch": 0.16, + "learning_rate": 0.001910560091333764, + "loss": 1.7656, + "step": 1817 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019104409430805123, + "loss": 1.7461, + "step": 1818 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019103217192381044, + "loss": 1.6777, + "step": 1819 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019102024198164385, + "loss": 1.7031, + "step": 1820 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019100830448254196, + "loss": 1.6445, + "step": 1821 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019099635942749589, + "loss": 1.6777, + "step": 1822 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019098440681749739, + "loss": 1.668, + "step": 1823 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019097244665353874, + "loss": 1.6133, + "step": 1824 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019096047893661303, + "loss": 1.7676, + "step": 1825 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019094850366771384, + "loss": 1.7344, + "step": 1826 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019093652084783534, + "loss": 1.582, + "step": 1827 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019092453047797253, + "loss": 1.6816, + "step": 1828 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019091253255912083, + "loss": 1.6797, + "step": 1829 + }, + { + "epoch": 0.16, + "learning_rate": 0.001909005270922764, + "loss": 1.6289, + "step": 1830 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019088851407843596, + "loss": 1.584, + "step": 1831 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019087649351859687, + "loss": 1.8027, + "step": 1832 + }, + { + "epoch": 0.16, + "learning_rate": 0.001908644654137572, + "loss": 1.7148, + "step": 1833 + }, + { + "epoch": 0.16, + "learning_rate": 0.001908524297649155, + "loss": 1.6934, + "step": 1834 + }, + { + "epoch": 0.16, + "learning_rate": 0.001908403865730711, + "loss": 1.7324, + "step": 1835 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019082833583922386, + "loss": 1.6211, + "step": 1836 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019081627756437427, + "loss": 1.709, + "step": 1837 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019080421174952349, + "loss": 1.666, + "step": 1838 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019079213839567326, + "loss": 1.6953, + "step": 1839 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019078005750382601, + "loss": 1.6191, + "step": 1840 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019076796907498469, + "loss": 1.7305, + "step": 1841 + }, + { + "epoch": 0.16, + "learning_rate": 0.00190755873110153, + "loss": 1.7461, + "step": 1842 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019074376961033513, + "loss": 1.6738, + "step": 1843 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019073165857653605, + "loss": 1.5918, + "step": 1844 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019071954000976124, + "loss": 1.6562, + "step": 1845 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019070741391101684, + "loss": 1.752, + "step": 1846 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019069528028130961, + "loss": 1.623, + "step": 1847 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019068313912164697, + "loss": 1.6309, + "step": 1848 + }, + { + "epoch": 0.16, + "learning_rate": 0.001906709904330369, + "loss": 1.7441, + "step": 1849 + }, + { + "epoch": 0.16, + "learning_rate": 0.001906588342164881, + "loss": 1.5938, + "step": 1850 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019064667047300975, + "loss": 1.6328, + "step": 1851 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019063449920361183, + "loss": 1.6562, + "step": 1852 + }, + { + "epoch": 0.16, + "learning_rate": 0.0019062232040930479, + "loss": 1.5957, + "step": 1853 + }, + { + "epoch": 0.16, + "learning_rate": 0.001906101340910998, + "loss": 1.6504, + "step": 1854 + }, + { + "epoch": 0.17, + "learning_rate": 0.0019059794025000866, + "loss": 1.6699, + "step": 1855 + }, + { + "epoch": 0.17, + "learning_rate": 0.0019058573888704369, + "loss": 1.6875, + "step": 1856 + }, + { + "epoch": 0.17, + "learning_rate": 0.0019057353000321796, + "loss": 1.6758, + "step": 1857 + }, + { + "epoch": 0.17, + "learning_rate": 0.0019056131359954508, + "loss": 1.7246, + "step": 1858 + }, + { + "epoch": 0.17, + "learning_rate": 0.0019054908967703933, + "loss": 1.752, + "step": 1859 + }, + { + "epoch": 0.17, + "learning_rate": 0.001905368582367156, + "loss": 1.6719, + "step": 1860 + }, + { + "epoch": 0.17, + "learning_rate": 0.001905246192795894, + "loss": 1.8105, + "step": 1861 + }, + { + "epoch": 0.17, + "learning_rate": 0.0019051237280667684, + "loss": 1.6562, + "step": 1862 + }, + { + "epoch": 0.17, + "learning_rate": 0.0019050011881899473, + "loss": 1.7422, + "step": 1863 + }, + { + "epoch": 0.17, + "learning_rate": 0.001904878573175604, + "loss": 1.6777, + "step": 1864 + }, + { + "epoch": 0.17, + "learning_rate": 0.0019047558830339192, + "loss": 1.6016, + "step": 1865 + }, + { + "epoch": 0.17, + "learning_rate": 0.0019046331177750787, + "loss": 1.5312, + "step": 1866 + }, + { + "epoch": 0.17, + "learning_rate": 0.0019045102774092753, + "loss": 1.6699, + "step": 1867 + }, + { + "epoch": 0.17, + "learning_rate": 0.001904387361946708, + "loss": 1.6484, + "step": 1868 + }, + { + "epoch": 0.17, + "learning_rate": 0.0019042643713975812, + "loss": 1.7246, + "step": 1869 + }, + { + "epoch": 0.17, + "learning_rate": 0.0019041413057721069, + "loss": 1.666, + "step": 1870 + }, + { + "epoch": 0.17, + "learning_rate": 0.001904018165080502, + "loss": 1.7324, + "step": 1871 + }, + { + "epoch": 0.17, + "learning_rate": 0.0019038949493329904, + "loss": 1.709, + "step": 1872 + }, + { + "epoch": 0.17, + "learning_rate": 0.0019037716585398022, + "loss": 1.7227, + "step": 1873 + }, + { + "epoch": 0.17, + "learning_rate": 0.0019036482927111736, + "loss": 1.7012, + "step": 1874 + }, + { + "epoch": 0.17, + "learning_rate": 0.001903524851857347, + "loss": 1.6094, + "step": 1875 + }, + { + "epoch": 0.17, + "learning_rate": 0.0019034013359885708, + "loss": 1.6855, + "step": 1876 + }, + { + "epoch": 0.17, + "learning_rate": 0.0019032777451151, + "loss": 1.623, + "step": 1877 + }, + { + "epoch": 0.17, + "learning_rate": 0.0019031540792471963, + "loss": 1.6504, + "step": 1878 + }, + { + "epoch": 0.17, + "learning_rate": 0.001903030338395126, + "loss": 1.666, + "step": 1879 + }, + { + "epoch": 0.17, + "learning_rate": 0.001902906522569163, + "loss": 1.5625, + "step": 1880 + }, + { + "epoch": 0.17, + "learning_rate": 0.0019027826317795876, + "loss": 1.6719, + "step": 1881 + }, + { + "epoch": 0.17, + "learning_rate": 0.0019026586660366854, + "loss": 1.543, + "step": 1882 + }, + { + "epoch": 0.17, + "learning_rate": 0.0019025346253507486, + "loss": 1.7148, + "step": 1883 + }, + { + "epoch": 0.17, + "learning_rate": 0.0019024105097320756, + "loss": 1.5801, + "step": 1884 + }, + { + "epoch": 0.17, + "learning_rate": 0.0019022863191909714, + "loss": 1.6914, + "step": 1885 + }, + { + "epoch": 0.17, + "learning_rate": 0.0019021620537377467, + "loss": 1.6836, + "step": 1886 + }, + { + "epoch": 0.17, + "learning_rate": 0.0019020377133827184, + "loss": 1.6406, + "step": 1887 + }, + { + "epoch": 0.17, + "learning_rate": 0.0019019132981362102, + "loss": 1.623, + "step": 1888 + }, + { + "epoch": 0.17, + "learning_rate": 0.0019017888080085515, + "loss": 1.7441, + "step": 1889 + }, + { + "epoch": 0.17, + "learning_rate": 0.0019016642430100777, + "loss": 1.6699, + "step": 1890 + }, + { + "epoch": 0.17, + "learning_rate": 0.0019015396031511313, + "loss": 1.7598, + "step": 1891 + }, + { + "epoch": 0.17, + "learning_rate": 0.0019014148884420605, + "loss": 1.5488, + "step": 1892 + }, + { + "epoch": 0.17, + "learning_rate": 0.0019012900988932193, + "loss": 1.6523, + "step": 1893 + }, + { + "epoch": 0.17, + "learning_rate": 0.0019011652345149689, + "loss": 1.6602, + "step": 1894 + }, + { + "epoch": 0.17, + "learning_rate": 0.0019010402953176756, + "loss": 1.7266, + "step": 1895 + }, + { + "epoch": 0.17, + "learning_rate": 0.0019009152813117126, + "loss": 1.7266, + "step": 1896 + }, + { + "epoch": 0.17, + "learning_rate": 0.001900790192507459, + "loss": 1.5664, + "step": 1897 + }, + { + "epoch": 0.17, + "learning_rate": 0.0019006650289153009, + "loss": 1.6914, + "step": 1898 + }, + { + "epoch": 0.17, + "learning_rate": 0.001900539790545629, + "loss": 1.7266, + "step": 1899 + }, + { + "epoch": 0.17, + "learning_rate": 0.0019004144774088423, + "loss": 1.6406, + "step": 1900 + }, + { + "epoch": 0.17, + "learning_rate": 0.001900289089515344, + "loss": 1.6445, + "step": 1901 + }, + { + "epoch": 0.17, + "learning_rate": 0.0019001636268755448, + "loss": 1.6445, + "step": 1902 + }, + { + "epoch": 0.17, + "learning_rate": 0.0019000380894998612, + "loss": 1.8223, + "step": 1903 + }, + { + "epoch": 0.17, + "learning_rate": 0.0018999124773987157, + "loss": 1.5645, + "step": 1904 + }, + { + "epoch": 0.17, + "learning_rate": 0.0018997867905825375, + "loss": 1.6875, + "step": 1905 + }, + { + "epoch": 0.17, + "learning_rate": 0.0018996610290617614, + "loss": 1.5742, + "step": 1906 + }, + { + "epoch": 0.17, + "learning_rate": 0.0018995351928468294, + "loss": 1.6523, + "step": 1907 + }, + { + "epoch": 0.17, + "learning_rate": 0.001899409281948188, + "loss": 1.5684, + "step": 1908 + }, + { + "epoch": 0.17, + "learning_rate": 0.0018992832963762917, + "loss": 1.5762, + "step": 1909 + }, + { + "epoch": 0.17, + "learning_rate": 0.0018991572361416003, + "loss": 1.5898, + "step": 1910 + }, + { + "epoch": 0.17, + "learning_rate": 0.0018990311012545796, + "loss": 1.5781, + "step": 1911 + }, + { + "epoch": 0.17, + "learning_rate": 0.001898904891725702, + "loss": 1.7285, + "step": 1912 + }, + { + "epoch": 0.17, + "learning_rate": 0.0018987786075654466, + "loss": 1.7285, + "step": 1913 + }, + { + "epoch": 0.17, + "learning_rate": 0.0018986522487842974, + "loss": 1.7246, + "step": 1914 + }, + { + "epoch": 0.17, + "learning_rate": 0.0018985258153927456, + "loss": 1.6816, + "step": 1915 + }, + { + "epoch": 0.17, + "learning_rate": 0.0018983993074012883, + "loss": 1.7617, + "step": 1916 + }, + { + "epoch": 0.17, + "learning_rate": 0.0018982727248204286, + "loss": 1.5879, + "step": 1917 + }, + { + "epoch": 0.17, + "learning_rate": 0.0018981460676606764, + "loss": 1.666, + "step": 1918 + }, + { + "epoch": 0.17, + "learning_rate": 0.001898019335932547, + "loss": 1.8477, + "step": 1919 + }, + { + "epoch": 0.17, + "learning_rate": 0.0018978925296465623, + "loss": 1.5781, + "step": 1920 + }, + { + "epoch": 0.17, + "learning_rate": 0.0018977656488132506, + "loss": 1.6855, + "step": 1921 + }, + { + "epoch": 0.17, + "learning_rate": 0.001897638693443146, + "loss": 1.6328, + "step": 1922 + }, + { + "epoch": 0.17, + "learning_rate": 0.0018975116635467893, + "loss": 1.6621, + "step": 1923 + }, + { + "epoch": 0.17, + "learning_rate": 0.0018973845591347264, + "loss": 1.6504, + "step": 1924 + }, + { + "epoch": 0.17, + "learning_rate": 0.0018972573802175105, + "loss": 1.7109, + "step": 1925 + }, + { + "epoch": 0.17, + "learning_rate": 0.0018971301268057008, + "loss": 1.6816, + "step": 1926 + }, + { + "epoch": 0.17, + "learning_rate": 0.0018970027989098624, + "loss": 1.6348, + "step": 1927 + }, + { + "epoch": 0.17, + "learning_rate": 0.0018968753965405666, + "loss": 1.6699, + "step": 1928 + }, + { + "epoch": 0.17, + "learning_rate": 0.0018967479197083909, + "loss": 1.6562, + "step": 1929 + }, + { + "epoch": 0.17, + "learning_rate": 0.0018966203684239189, + "loss": 1.5332, + "step": 1930 + }, + { + "epoch": 0.17, + "learning_rate": 0.0018964927426977406, + "loss": 1.7363, + "step": 1931 + }, + { + "epoch": 0.17, + "learning_rate": 0.0018963650425404524, + "loss": 1.75, + "step": 1932 + }, + { + "epoch": 0.17, + "learning_rate": 0.0018962372679626562, + "loss": 1.5781, + "step": 1933 + }, + { + "epoch": 0.17, + "learning_rate": 0.0018961094189749608, + "loss": 1.8086, + "step": 1934 + }, + { + "epoch": 0.17, + "learning_rate": 0.0018959814955879805, + "loss": 1.6836, + "step": 1935 + }, + { + "epoch": 0.17, + "learning_rate": 0.0018958534978123366, + "loss": 1.6328, + "step": 1936 + }, + { + "epoch": 0.17, + "learning_rate": 0.001895725425658655, + "loss": 1.6309, + "step": 1937 + }, + { + "epoch": 0.17, + "learning_rate": 0.0018955972791375704, + "loss": 1.6367, + "step": 1938 + }, + { + "epoch": 0.17, + "learning_rate": 0.001895469058259721, + "loss": 1.666, + "step": 1939 + }, + { + "epoch": 0.17, + "learning_rate": 0.0018953407630357526, + "loss": 1.6191, + "step": 1940 + }, + { + "epoch": 0.17, + "learning_rate": 0.001895212393476317, + "loss": 1.666, + "step": 1941 + }, + { + "epoch": 0.17, + "learning_rate": 0.0018950839495920722, + "loss": 1.7461, + "step": 1942 + }, + { + "epoch": 0.17, + "learning_rate": 0.0018949554313936817, + "loss": 1.5508, + "step": 1943 + }, + { + "epoch": 0.17, + "learning_rate": 0.001894826838891816, + "loss": 1.6973, + "step": 1944 + }, + { + "epoch": 0.17, + "learning_rate": 0.0018946981720971516, + "loss": 1.7266, + "step": 1945 + }, + { + "epoch": 0.17, + "learning_rate": 0.001894569431020371, + "loss": 1.7031, + "step": 1946 + }, + { + "epoch": 0.17, + "learning_rate": 0.0018944406156721626, + "loss": 1.5527, + "step": 1947 + }, + { + "epoch": 0.17, + "learning_rate": 0.0018943117260632216, + "loss": 1.7227, + "step": 1948 + }, + { + "epoch": 0.17, + "learning_rate": 0.0018941827622042488, + "loss": 1.6445, + "step": 1949 + }, + { + "epoch": 0.17, + "learning_rate": 0.0018940537241059518, + "loss": 1.6426, + "step": 1950 + }, + { + "epoch": 0.17, + "learning_rate": 0.0018939246117790432, + "loss": 1.6543, + "step": 1951 + }, + { + "epoch": 0.17, + "learning_rate": 0.0018937954252342432, + "loss": 1.7461, + "step": 1952 + }, + { + "epoch": 0.17, + "learning_rate": 0.0018936661644822772, + "loss": 1.6992, + "step": 1953 + }, + { + "epoch": 0.17, + "learning_rate": 0.0018935368295338773, + "loss": 1.6074, + "step": 1954 + }, + { + "epoch": 0.17, + "learning_rate": 0.0018934074203997813, + "loss": 1.6289, + "step": 1955 + }, + { + "epoch": 0.17, + "learning_rate": 0.0018932779370907334, + "loss": 1.5898, + "step": 1956 + }, + { + "epoch": 0.17, + "learning_rate": 0.0018931483796174838, + "loss": 1.6582, + "step": 1957 + }, + { + "epoch": 0.17, + "learning_rate": 0.001893018747990789, + "loss": 1.6016, + "step": 1958 + }, + { + "epoch": 0.17, + "learning_rate": 0.0018928890422214115, + "loss": 1.8301, + "step": 1959 + }, + { + "epoch": 0.17, + "learning_rate": 0.0018927592623201206, + "loss": 1.6914, + "step": 1960 + }, + { + "epoch": 0.17, + "learning_rate": 0.001892629408297691, + "loss": 1.6992, + "step": 1961 + }, + { + "epoch": 0.17, + "learning_rate": 0.001892499480164904, + "loss": 1.7188, + "step": 1962 + }, + { + "epoch": 0.17, + "learning_rate": 0.001892369477932546, + "loss": 1.748, + "step": 1963 + }, + { + "epoch": 0.17, + "learning_rate": 0.0018922394016114112, + "loss": 1.7012, + "step": 1964 + }, + { + "epoch": 0.17, + "learning_rate": 0.0018921092512122993, + "loss": 1.6582, + "step": 1965 + }, + { + "epoch": 0.17, + "learning_rate": 0.0018919790267460154, + "loss": 1.709, + "step": 1966 + }, + { + "epoch": 0.17, + "learning_rate": 0.0018918487282233718, + "loss": 1.7012, + "step": 1967 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018917183556551862, + "loss": 1.7266, + "step": 1968 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018915879090522832, + "loss": 1.6113, + "step": 1969 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018914573884254923, + "loss": 1.584, + "step": 1970 + }, + { + "epoch": 0.18, + "learning_rate": 0.001891326793785651, + "loss": 1.5547, + "step": 1971 + }, + { + "epoch": 0.18, + "learning_rate": 0.001891196125143601, + "loss": 1.7285, + "step": 1972 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018910653825101914, + "loss": 1.623, + "step": 1973 + }, + { + "epoch": 0.18, + "learning_rate": 0.001890934565896277, + "loss": 1.6387, + "step": 1974 + }, + { + "epoch": 0.18, + "learning_rate": 0.001890803675312719, + "loss": 1.7344, + "step": 1975 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018906727107703843, + "loss": 1.6172, + "step": 1976 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018905416722801464, + "loss": 1.4727, + "step": 1977 + }, + { + "epoch": 0.18, + "learning_rate": 0.001890410559852885, + "loss": 1.6875, + "step": 1978 + }, + { + "epoch": 0.18, + "learning_rate": 0.001890279373499485, + "loss": 1.6309, + "step": 1979 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018901481132308385, + "loss": 1.7812, + "step": 1980 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018900167790578432, + "loss": 1.6816, + "step": 1981 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018898853709914034, + "loss": 1.7383, + "step": 1982 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018897538890424294, + "loss": 1.7344, + "step": 1983 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018896223332218365, + "loss": 1.5703, + "step": 1984 + }, + { + "epoch": 0.18, + "learning_rate": 0.001889490703540548, + "loss": 1.7305, + "step": 1985 + }, + { + "epoch": 0.18, + "learning_rate": 0.001889359000009492, + "loss": 1.7324, + "step": 1986 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018892272226396032, + "loss": 1.6973, + "step": 1987 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018890953714418225, + "loss": 1.7266, + "step": 1988 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018889634464270971, + "loss": 1.7129, + "step": 1989 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018888314476063796, + "loss": 1.627, + "step": 1990 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018886993749906292, + "loss": 1.8145, + "step": 1991 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018885672285908113, + "loss": 1.6621, + "step": 1992 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018884350084178972, + "loss": 1.7773, + "step": 1993 + }, + { + "epoch": 0.18, + "learning_rate": 0.001888302714482865, + "loss": 1.7188, + "step": 1994 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018881703467966973, + "loss": 1.6328, + "step": 1995 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018880379053703852, + "loss": 1.6055, + "step": 1996 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018879053902149236, + "loss": 1.7344, + "step": 1997 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018877728013413147, + "loss": 1.6367, + "step": 1998 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018876401387605674, + "loss": 1.7188, + "step": 1999 + }, + { + "epoch": 0.18, + "learning_rate": 0.001887507402483695, + "loss": 1.6055, + "step": 2000 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018873745925217186, + "loss": 1.6172, + "step": 2001 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018872417088856644, + "loss": 1.6523, + "step": 2002 + }, + { + "epoch": 0.18, + "learning_rate": 0.001887108751586565, + "loss": 1.7227, + "step": 2003 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018869757206354594, + "loss": 1.5996, + "step": 2004 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018868426160433922, + "loss": 1.7246, + "step": 2005 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018867094378214144, + "loss": 1.6777, + "step": 2006 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018865761859805836, + "loss": 1.584, + "step": 2007 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018864428605319624, + "loss": 1.6934, + "step": 2008 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018863094614866203, + "loss": 1.6406, + "step": 2009 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018861759888556327, + "loss": 1.748, + "step": 2010 + }, + { + "epoch": 0.18, + "learning_rate": 0.001886042442650081, + "loss": 1.7363, + "step": 2011 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018859088228810532, + "loss": 1.6465, + "step": 2012 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018857751295596429, + "loss": 1.6914, + "step": 2013 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018856413626969497, + "loss": 1.6445, + "step": 2014 + }, + { + "epoch": 0.18, + "learning_rate": 0.00188550752230408, + "loss": 1.7168, + "step": 2015 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018853736083921456, + "loss": 1.627, + "step": 2016 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018852396209722646, + "loss": 1.5645, + "step": 2017 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018851055600555618, + "loss": 1.7266, + "step": 2018 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018849714256531666, + "loss": 1.7578, + "step": 2019 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018848372177762166, + "loss": 1.6562, + "step": 2020 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018847029364358536, + "loss": 1.6953, + "step": 2021 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018845685816432267, + "loss": 1.748, + "step": 2022 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018844341534094901, + "loss": 1.7715, + "step": 2023 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018842996517458054, + "loss": 1.707, + "step": 2024 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018841650766633392, + "loss": 1.7051, + "step": 2025 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018840304281732645, + "loss": 1.7441, + "step": 2026 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018838957062867606, + "loss": 1.7754, + "step": 2027 + }, + { + "epoch": 0.18, + "learning_rate": 0.001883760911015013, + "loss": 1.6465, + "step": 2028 + }, + { + "epoch": 0.18, + "learning_rate": 0.001883626042369212, + "loss": 1.6602, + "step": 2029 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018834911003605566, + "loss": 1.7812, + "step": 2030 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018833560850002491, + "loss": 1.6309, + "step": 2031 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018832209962995, + "loss": 1.5566, + "step": 2032 + }, + { + "epoch": 0.18, + "learning_rate": 0.001883085834269524, + "loss": 1.6855, + "step": 2033 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018829505989215439, + "loss": 1.6621, + "step": 2034 + }, + { + "epoch": 0.18, + "learning_rate": 0.001882815290266787, + "loss": 1.6504, + "step": 2035 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018826799083164873, + "loss": 1.7051, + "step": 2036 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018825444530818853, + "loss": 1.7031, + "step": 2037 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018824089245742263, + "loss": 1.6504, + "step": 2038 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018822733228047635, + "loss": 1.6562, + "step": 2039 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018821376477847545, + "loss": 1.6699, + "step": 2040 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018820018995254642, + "loss": 1.5723, + "step": 2041 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018818660780381626, + "loss": 1.6738, + "step": 2042 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018817301833341264, + "loss": 1.6289, + "step": 2043 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018815942154246383, + "loss": 1.668, + "step": 2044 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018814581743209872, + "loss": 1.7852, + "step": 2045 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018813220600344676, + "loss": 1.6074, + "step": 2046 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018811858725763804, + "loss": 1.5938, + "step": 2047 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018810496119580325, + "loss": 1.6152, + "step": 2048 + }, + { + "epoch": 0.18, + "learning_rate": 0.001880913278190737, + "loss": 1.6816, + "step": 2049 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018807768712858128, + "loss": 1.75, + "step": 2050 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018806403912545852, + "loss": 1.6602, + "step": 2051 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018805038381083858, + "loss": 1.7246, + "step": 2052 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018803672118585513, + "loss": 1.6426, + "step": 2053 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018802305125164254, + "loss": 1.6074, + "step": 2054 + }, + { + "epoch": 0.18, + "learning_rate": 0.001880093740093357, + "loss": 1.666, + "step": 2055 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018799568946007024, + "loss": 1.7852, + "step": 2056 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018798199760498226, + "loss": 1.6641, + "step": 2057 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018796829844520855, + "loss": 1.5957, + "step": 2058 + }, + { + "epoch": 0.18, + "learning_rate": 0.001879545919818865, + "loss": 1.7246, + "step": 2059 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018794087821615401, + "loss": 1.6387, + "step": 2060 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018792715714914974, + "loss": 1.8047, + "step": 2061 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018791342878201284, + "loss": 1.6543, + "step": 2062 + }, + { + "epoch": 0.18, + "learning_rate": 0.001878996931158831, + "loss": 1.5527, + "step": 2063 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018788595015190093, + "loss": 1.6367, + "step": 2064 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018787219989120736, + "loss": 1.6914, + "step": 2065 + }, + { + "epoch": 0.18, + "learning_rate": 0.00187858442334944, + "loss": 1.7129, + "step": 2066 + }, + { + "epoch": 0.18, + "learning_rate": 0.00187844677484253, + "loss": 1.6348, + "step": 2067 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018783090534027728, + "loss": 1.7383, + "step": 2068 + }, + { + "epoch": 0.18, + "learning_rate": 0.001878171259041602, + "loss": 1.5352, + "step": 2069 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018780333917704582, + "loss": 1.6211, + "step": 2070 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018778954516007877, + "loss": 1.7227, + "step": 2071 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018777574385440432, + "loss": 1.666, + "step": 2072 + }, + { + "epoch": 0.18, + "learning_rate": 0.001877619352611683, + "loss": 1.5449, + "step": 2073 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018774811938151714, + "loss": 1.625, + "step": 2074 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018773429621659796, + "loss": 1.6348, + "step": 2075 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018772046576755839, + "loss": 1.8379, + "step": 2076 + }, + { + "epoch": 0.18, + "learning_rate": 0.001877066280355467, + "loss": 1.627, + "step": 2077 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018769278302171177, + "loss": 1.6465, + "step": 2078 + }, + { + "epoch": 0.18, + "learning_rate": 0.0018767893072720308, + "loss": 1.7285, + "step": 2079 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018766507115317071, + "loss": 1.6875, + "step": 2080 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018765120430076535, + "loss": 1.6133, + "step": 2081 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018763733017113828, + "loss": 1.6562, + "step": 2082 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018762344876544143, + "loss": 1.6348, + "step": 2083 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018760956008482725, + "loss": 1.5781, + "step": 2084 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018759566413044891, + "loss": 1.7285, + "step": 2085 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018758176090346006, + "loss": 1.6875, + "step": 2086 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018756785040501503, + "loss": 1.6973, + "step": 2087 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018755393263626874, + "loss": 1.668, + "step": 2088 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018754000759837672, + "loss": 1.7578, + "step": 2089 + }, + { + "epoch": 0.19, + "learning_rate": 0.001875260752924951, + "loss": 1.6992, + "step": 2090 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018751213571978056, + "loss": 1.5996, + "step": 2091 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018749818888139048, + "loss": 1.7344, + "step": 2092 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018748423477848277, + "loss": 1.627, + "step": 2093 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018747027341221596, + "loss": 1.6445, + "step": 2094 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018745630478374919, + "loss": 1.6582, + "step": 2095 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018744232889424224, + "loss": 1.6074, + "step": 2096 + }, + { + "epoch": 0.19, + "learning_rate": 0.001874283457448554, + "loss": 1.6562, + "step": 2097 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018741435533674966, + "loss": 1.5078, + "step": 2098 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018740035767108654, + "loss": 1.7305, + "step": 2099 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018738635274902823, + "loss": 1.6719, + "step": 2100 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018737234057173745, + "loss": 1.6543, + "step": 2101 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018735832114037756, + "loss": 1.6289, + "step": 2102 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018734429445611254, + "loss": 1.6875, + "step": 2103 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018733026052010695, + "loss": 1.6328, + "step": 2104 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018731621933352593, + "loss": 1.7422, + "step": 2105 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018730217089753527, + "loss": 1.7246, + "step": 2106 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018728811521330135, + "loss": 1.6328, + "step": 2107 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018727405228199112, + "loss": 1.6543, + "step": 2108 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018725998210477215, + "loss": 1.7754, + "step": 2109 + }, + { + "epoch": 0.19, + "learning_rate": 0.001872459046828126, + "loss": 1.6348, + "step": 2110 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018723182001728128, + "loss": 1.6855, + "step": 2111 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018721772810934754, + "loss": 1.6152, + "step": 2112 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018720362896018138, + "loss": 1.752, + "step": 2113 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018718952257095335, + "loss": 1.623, + "step": 2114 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018717540894283465, + "loss": 1.7422, + "step": 2115 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018716128807699706, + "loss": 1.6641, + "step": 2116 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018714715997461293, + "loss": 1.6367, + "step": 2117 + }, + { + "epoch": 0.19, + "learning_rate": 0.001871330246368553, + "loss": 1.7344, + "step": 2118 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018711888206489774, + "loss": 1.7676, + "step": 2119 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018710473225991438, + "loss": 1.6445, + "step": 2120 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018709057522308004, + "loss": 1.709, + "step": 2121 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018707641095557013, + "loss": 1.6797, + "step": 2122 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018706223945856061, + "loss": 1.7773, + "step": 2123 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018704806073322803, + "loss": 1.6133, + "step": 2124 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018703387478074965, + "loss": 1.6543, + "step": 2125 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018701968160230319, + "loss": 1.6992, + "step": 2126 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018700548119906708, + "loss": 1.627, + "step": 2127 + }, + { + "epoch": 0.19, + "learning_rate": 0.001869912735722203, + "loss": 1.6738, + "step": 2128 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018697705872294241, + "loss": 1.6543, + "step": 2129 + }, + { + "epoch": 0.19, + "learning_rate": 0.001869628366524136, + "loss": 1.6836, + "step": 2130 + }, + { + "epoch": 0.19, + "learning_rate": 0.001869486073618147, + "loss": 1.625, + "step": 2131 + }, + { + "epoch": 0.19, + "learning_rate": 0.00186934370852327, + "loss": 1.666, + "step": 2132 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018692012712513257, + "loss": 1.625, + "step": 2133 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018690587618141396, + "loss": 1.7559, + "step": 2134 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018689161802235438, + "loss": 1.7891, + "step": 2135 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018687735264913755, + "loss": 1.5488, + "step": 2136 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018686308006294788, + "loss": 1.7168, + "step": 2137 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018684880026497036, + "loss": 1.6777, + "step": 2138 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018683451325639054, + "loss": 1.6191, + "step": 2139 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018682021903839465, + "loss": 1.6406, + "step": 2140 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018680591761216939, + "loss": 1.7246, + "step": 2141 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018679160897890222, + "loss": 1.668, + "step": 2142 + }, + { + "epoch": 0.19, + "learning_rate": 0.00186777293139781, + "loss": 1.75, + "step": 2143 + }, + { + "epoch": 0.19, + "learning_rate": 0.001867629700959944, + "loss": 1.7246, + "step": 2144 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018674863984873156, + "loss": 1.6348, + "step": 2145 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018673430239918218, + "loss": 1.7695, + "step": 2146 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018671995774853674, + "loss": 1.6426, + "step": 2147 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018670560589798613, + "loss": 1.6816, + "step": 2148 + }, + { + "epoch": 0.19, + "learning_rate": 0.001866912468487219, + "loss": 1.6055, + "step": 2149 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018667688060193621, + "loss": 1.6328, + "step": 2150 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018666250715882184, + "loss": 1.7402, + "step": 2151 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018664812652057214, + "loss": 1.6387, + "step": 2152 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018663373868838104, + "loss": 1.6895, + "step": 2153 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018661934366344312, + "loss": 1.6426, + "step": 2154 + }, + { + "epoch": 0.19, + "learning_rate": 0.001866049414469535, + "loss": 1.6055, + "step": 2155 + }, + { + "epoch": 0.19, + "learning_rate": 0.001865905320401079, + "loss": 1.6348, + "step": 2156 + }, + { + "epoch": 0.19, + "learning_rate": 0.001865761154441027, + "loss": 1.7344, + "step": 2157 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018656169166013478, + "loss": 1.6387, + "step": 2158 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018654726068940176, + "loss": 1.7012, + "step": 2159 + }, + { + "epoch": 0.19, + "learning_rate": 0.001865328225331017, + "loss": 1.752, + "step": 2160 + }, + { + "epoch": 0.19, + "learning_rate": 0.001865183771924333, + "loss": 1.7402, + "step": 2161 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018650392466859596, + "loss": 1.6738, + "step": 2162 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018648946496278953, + "loss": 1.6562, + "step": 2163 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018647499807621455, + "loss": 1.707, + "step": 2164 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018646052401007217, + "loss": 1.5898, + "step": 2165 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018644604276556404, + "loss": 1.8594, + "step": 2166 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018643155434389246, + "loss": 1.7441, + "step": 2167 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018641705874626036, + "loss": 1.7812, + "step": 2168 + }, + { + "epoch": 0.19, + "learning_rate": 0.001864025559738712, + "loss": 1.7148, + "step": 2169 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018638804602792913, + "loss": 1.6074, + "step": 2170 + }, + { + "epoch": 0.19, + "learning_rate": 0.001863735289096388, + "loss": 1.6602, + "step": 2171 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018635900462020546, + "loss": 1.6699, + "step": 2172 + }, + { + "epoch": 0.19, + "learning_rate": 0.00186344473160835, + "loss": 1.668, + "step": 2173 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018632993453273393, + "loss": 1.7324, + "step": 2174 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018631538873710926, + "loss": 1.6992, + "step": 2175 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018630083577516872, + "loss": 1.6855, + "step": 2176 + }, + { + "epoch": 0.19, + "learning_rate": 0.001862862756481205, + "loss": 1.7754, + "step": 2177 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018627170835717349, + "loss": 1.6602, + "step": 2178 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018625713390353713, + "loss": 1.6797, + "step": 2179 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018624255228842147, + "loss": 1.6309, + "step": 2180 + }, + { + "epoch": 0.19, + "learning_rate": 0.001862279635130371, + "loss": 1.6973, + "step": 2181 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018621336757859528, + "loss": 1.8047, + "step": 2182 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018619876448630786, + "loss": 1.6348, + "step": 2183 + }, + { + "epoch": 0.19, + "learning_rate": 0.001861841542373872, + "loss": 1.7051, + "step": 2184 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018616953683304638, + "loss": 1.8496, + "step": 2185 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018615491227449896, + "loss": 1.6387, + "step": 2186 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018614028056295915, + "loss": 1.6484, + "step": 2187 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018612564169964176, + "loss": 1.8359, + "step": 2188 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018611099568576215, + "loss": 1.6992, + "step": 2189 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018609634252253634, + "loss": 1.6621, + "step": 2190 + }, + { + "epoch": 0.19, + "learning_rate": 0.0018608168221118087, + "loss": 1.6309, + "step": 2191 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018606701475291291, + "loss": 1.6797, + "step": 2192 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018605234014895027, + "loss": 1.6836, + "step": 2193 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018603765840051129, + "loss": 1.6172, + "step": 2194 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018602296950881484, + "loss": 1.6133, + "step": 2195 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018600827347508054, + "loss": 1.7461, + "step": 2196 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018599357030052853, + "loss": 1.7285, + "step": 2197 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018597885998637952, + "loss": 1.666, + "step": 2198 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018596414253385478, + "loss": 1.6797, + "step": 2199 + }, + { + "epoch": 0.2, + "learning_rate": 0.001859494179441763, + "loss": 1.7285, + "step": 2200 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018593468621856655, + "loss": 1.6602, + "step": 2201 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018591994735824863, + "loss": 1.6641, + "step": 2202 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018590520136444624, + "loss": 1.8125, + "step": 2203 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018589044823838364, + "loss": 1.7656, + "step": 2204 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018587568798128574, + "loss": 1.4941, + "step": 2205 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018586092059437798, + "loss": 1.5078, + "step": 2206 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018584614607888646, + "loss": 1.6582, + "step": 2207 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018583136443603774, + "loss": 1.6621, + "step": 2208 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018581657566705916, + "loss": 1.7031, + "step": 2209 + }, + { + "epoch": 0.2, + "learning_rate": 0.001858017797731785, + "loss": 1.7344, + "step": 2210 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018578697675562421, + "loss": 1.7676, + "step": 2211 + }, + { + "epoch": 0.2, + "learning_rate": 0.001857721666156253, + "loss": 1.6543, + "step": 2212 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018575734935441137, + "loss": 1.6133, + "step": 2213 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018574252497321263, + "loss": 1.666, + "step": 2214 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018572769347325986, + "loss": 1.7383, + "step": 2215 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018571285485578445, + "loss": 1.7148, + "step": 2216 + }, + { + "epoch": 0.2, + "learning_rate": 0.001856980091220184, + "loss": 1.5918, + "step": 2217 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018568315627319422, + "loss": 1.6777, + "step": 2218 + }, + { + "epoch": 0.2, + "learning_rate": 0.001856682963105451, + "loss": 1.6191, + "step": 2219 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018565342923530475, + "loss": 1.623, + "step": 2220 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018563855504870757, + "loss": 1.6621, + "step": 2221 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018562367375198844, + "loss": 1.7559, + "step": 2222 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018560878534638287, + "loss": 1.7656, + "step": 2223 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018559388983312697, + "loss": 1.8164, + "step": 2224 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018557898721345745, + "loss": 1.7051, + "step": 2225 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018556407748861162, + "loss": 1.6406, + "step": 2226 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018554916065982733, + "loss": 1.7734, + "step": 2227 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018553423672834303, + "loss": 1.7031, + "step": 2228 + }, + { + "epoch": 0.2, + "learning_rate": 0.001855193056953978, + "loss": 1.6367, + "step": 2229 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018550436756223127, + "loss": 1.666, + "step": 2230 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018548942233008367, + "loss": 1.6133, + "step": 2231 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018547447000019587, + "loss": 1.6641, + "step": 2232 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018545951057380922, + "loss": 1.7246, + "step": 2233 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018544454405216577, + "loss": 1.7168, + "step": 2234 + }, + { + "epoch": 0.2, + "learning_rate": 0.001854295704365081, + "loss": 1.5449, + "step": 2235 + }, + { + "epoch": 0.2, + "learning_rate": 0.001854145897280794, + "loss": 1.6484, + "step": 2236 + }, + { + "epoch": 0.2, + "learning_rate": 0.001853996019281234, + "loss": 1.6992, + "step": 2237 + }, + { + "epoch": 0.2, + "learning_rate": 0.001853846070378845, + "loss": 1.5977, + "step": 2238 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018536960505860765, + "loss": 1.668, + "step": 2239 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018535459599153834, + "loss": 1.6934, + "step": 2240 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018533957983792276, + "loss": 1.6543, + "step": 2241 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018532455659900754, + "loss": 1.6367, + "step": 2242 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018530952627604008, + "loss": 1.582, + "step": 2243 + }, + { + "epoch": 0.2, + "learning_rate": 0.001852944888702682, + "loss": 1.5156, + "step": 2244 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018527944438294038, + "loss": 1.6641, + "step": 2245 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018526439281530572, + "loss": 1.7422, + "step": 2246 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018524933416861386, + "loss": 1.6113, + "step": 2247 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018523426844411501, + "loss": 1.793, + "step": 2248 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018521919564306003, + "loss": 1.7246, + "step": 2249 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018520411576670034, + "loss": 1.6641, + "step": 2250 + }, + { + "epoch": 0.2, + "learning_rate": 0.001851890288162879, + "loss": 1.7031, + "step": 2251 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018517393479307537, + "loss": 1.6465, + "step": 2252 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018515883369831588, + "loss": 1.8379, + "step": 2253 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018514372553326318, + "loss": 1.6621, + "step": 2254 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018512861029917167, + "loss": 1.7383, + "step": 2255 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018511348799729627, + "loss": 1.6484, + "step": 2256 + }, + { + "epoch": 0.2, + "learning_rate": 0.001850983586288925, + "loss": 1.5273, + "step": 2257 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018508322219521647, + "loss": 1.7109, + "step": 2258 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018506807869752488, + "loss": 1.8125, + "step": 2259 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018505292813707503, + "loss": 1.6309, + "step": 2260 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018503777051512478, + "loss": 1.5996, + "step": 2261 + }, + { + "epoch": 0.2, + "learning_rate": 0.001850226058329326, + "loss": 1.7363, + "step": 2262 + }, + { + "epoch": 0.2, + "learning_rate": 0.001850074340917575, + "loss": 1.6367, + "step": 2263 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018499225529285915, + "loss": 1.6289, + "step": 2264 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018497706943749774, + "loss": 1.5879, + "step": 2265 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018496187652693409, + "loss": 1.7559, + "step": 2266 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018494667656242959, + "loss": 1.6504, + "step": 2267 + }, + { + "epoch": 0.2, + "learning_rate": 0.001849314695452462, + "loss": 1.668, + "step": 2268 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018491625547664647, + "loss": 1.7227, + "step": 2269 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018490103435789356, + "loss": 1.6738, + "step": 2270 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018488580619025119, + "loss": 1.7598, + "step": 2271 + }, + { + "epoch": 0.2, + "learning_rate": 0.001848705709749837, + "loss": 1.5664, + "step": 2272 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018485532871335597, + "loss": 1.7832, + "step": 2273 + }, + { + "epoch": 0.2, + "learning_rate": 0.001848400794066335, + "loss": 1.6348, + "step": 2274 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018482482305608233, + "loss": 1.6504, + "step": 2275 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018480955966296915, + "loss": 1.627, + "step": 2276 + }, + { + "epoch": 0.2, + "learning_rate": 0.001847942892285612, + "loss": 1.6758, + "step": 2277 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018477901175412623, + "loss": 1.6367, + "step": 2278 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018476372724093278, + "loss": 1.5547, + "step": 2279 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018474843569024973, + "loss": 1.7031, + "step": 2280 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018473313710334672, + "loss": 1.6309, + "step": 2281 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018471783148149385, + "loss": 1.6895, + "step": 2282 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018470251882596195, + "loss": 1.709, + "step": 2283 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018468719913802229, + "loss": 1.748, + "step": 2284 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018467187241894677, + "loss": 1.7402, + "step": 2285 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018465653867000796, + "loss": 1.9023, + "step": 2286 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018464119789247892, + "loss": 1.5898, + "step": 2287 + }, + { + "epoch": 0.2, + "learning_rate": 0.001846258500876332, + "loss": 1.6445, + "step": 2288 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018461049525674523, + "loss": 1.5957, + "step": 2289 + }, + { + "epoch": 0.2, + "learning_rate": 0.001845951334010897, + "loss": 1.5801, + "step": 2290 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018457976452194212, + "loss": 1.5898, + "step": 2291 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018456438862057842, + "loss": 1.75, + "step": 2292 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018454900569827523, + "loss": 1.5996, + "step": 2293 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018453361575630964, + "loss": 1.6895, + "step": 2294 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018451821879595948, + "loss": 1.7305, + "step": 2295 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018450281481850303, + "loss": 1.7012, + "step": 2296 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018448740382521925, + "loss": 1.6172, + "step": 2297 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018447198581738756, + "loss": 1.6211, + "step": 2298 + }, + { + "epoch": 0.2, + "learning_rate": 0.001844565607962881, + "loss": 1.6523, + "step": 2299 + }, + { + "epoch": 0.2, + "learning_rate": 0.001844411287632015, + "loss": 1.7148, + "step": 2300 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018442568971940901, + "loss": 1.7051, + "step": 2301 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018441024366619243, + "loss": 1.5781, + "step": 2302 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018439479060483421, + "loss": 1.7246, + "step": 2303 + }, + { + "epoch": 0.2, + "learning_rate": 0.0018437933053661731, + "loss": 1.5039, + "step": 2304 + }, + { + "epoch": 0.21, + "learning_rate": 0.001843638634628253, + "loss": 1.7012, + "step": 2305 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018434838938474235, + "loss": 1.7754, + "step": 2306 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018433290830365317, + "loss": 1.6738, + "step": 2307 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018431742022084305, + "loss": 1.6895, + "step": 2308 + }, + { + "epoch": 0.21, + "learning_rate": 0.001843019251375979, + "loss": 1.6504, + "step": 2309 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018428642305520426, + "loss": 1.7266, + "step": 2310 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018427091397494914, + "loss": 1.666, + "step": 2311 + }, + { + "epoch": 0.21, + "learning_rate": 0.001842553978981201, + "loss": 1.6387, + "step": 2312 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018423987482600551, + "loss": 1.6738, + "step": 2313 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018422434475989405, + "loss": 1.5332, + "step": 2314 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018420880770107513, + "loss": 1.6855, + "step": 2315 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018419326365083873, + "loss": 1.6934, + "step": 2316 + }, + { + "epoch": 0.21, + "learning_rate": 0.001841777126104754, + "loss": 1.6816, + "step": 2317 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018416215458127625, + "loss": 1.7578, + "step": 2318 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018414658956453296, + "loss": 1.7207, + "step": 2319 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018413101756153782, + "loss": 1.6602, + "step": 2320 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018411543857358372, + "loss": 1.6016, + "step": 2321 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018409985260196408, + "loss": 1.5352, + "step": 2322 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018408425964797292, + "loss": 1.7734, + "step": 2323 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018406865971290485, + "loss": 1.668, + "step": 2324 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018405305279805507, + "loss": 1.6191, + "step": 2325 + }, + { + "epoch": 0.21, + "learning_rate": 0.001840374389047193, + "loss": 1.6191, + "step": 2326 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018402181803419387, + "loss": 1.5977, + "step": 2327 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018400619018777576, + "loss": 1.6895, + "step": 2328 + }, + { + "epoch": 0.21, + "learning_rate": 0.001839905553667624, + "loss": 1.5352, + "step": 2329 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018397491357245194, + "loss": 1.707, + "step": 2330 + }, + { + "epoch": 0.21, + "learning_rate": 0.00183959264806143, + "loss": 1.6797, + "step": 2331 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018394360906913482, + "loss": 1.6602, + "step": 2332 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018392794636272719, + "loss": 1.7461, + "step": 2333 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018391227668822052, + "loss": 1.7324, + "step": 2334 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018389660004691582, + "loss": 1.6621, + "step": 2335 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018388091644011455, + "loss": 1.6328, + "step": 2336 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018386522586911892, + "loss": 1.668, + "step": 2337 + }, + { + "epoch": 0.21, + "learning_rate": 0.001838495283352316, + "loss": 1.6328, + "step": 2338 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018383382383975588, + "loss": 1.5293, + "step": 2339 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018381811238399564, + "loss": 1.5879, + "step": 2340 + }, + { + "epoch": 0.21, + "learning_rate": 0.001838023939692553, + "loss": 1.6836, + "step": 2341 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018378666859683985, + "loss": 1.6094, + "step": 2342 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018377093626805495, + "loss": 1.6562, + "step": 2343 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018375519698420675, + "loss": 1.6133, + "step": 2344 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018373945074660197, + "loss": 1.6875, + "step": 2345 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018372369755654796, + "loss": 1.7754, + "step": 2346 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018370793741535262, + "loss": 1.625, + "step": 2347 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018369217032432443, + "loss": 1.6621, + "step": 2348 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018367639628477249, + "loss": 1.6055, + "step": 2349 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018366061529800637, + "loss": 1.5605, + "step": 2350 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018364482736533635, + "loss": 1.5859, + "step": 2351 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018362903248807312, + "loss": 1.6719, + "step": 2352 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018361323066752818, + "loss": 1.6191, + "step": 2353 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018359742190501335, + "loss": 1.6035, + "step": 2354 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018358160620184122, + "loss": 1.7188, + "step": 2355 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018356578355932488, + "loss": 1.623, + "step": 2356 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018354995397877798, + "loss": 1.6797, + "step": 2357 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018353411746151475, + "loss": 1.543, + "step": 2358 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018351827400885008, + "loss": 1.6328, + "step": 2359 + }, + { + "epoch": 0.21, + "learning_rate": 0.001835024236220993, + "loss": 1.7188, + "step": 2360 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018348656630257842, + "loss": 1.6777, + "step": 2361 + }, + { + "epoch": 0.21, + "learning_rate": 0.00183470702051604, + "loss": 1.6836, + "step": 2362 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018345483087049313, + "loss": 1.6348, + "step": 2363 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018343895276056353, + "loss": 1.707, + "step": 2364 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018342306772313349, + "loss": 1.6758, + "step": 2365 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018340717575952184, + "loss": 1.6719, + "step": 2366 + }, + { + "epoch": 0.21, + "learning_rate": 0.00183391276871048, + "loss": 1.4297, + "step": 2367 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018337537105903201, + "loss": 1.6777, + "step": 2368 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018335945832479443, + "loss": 1.5977, + "step": 2369 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018334353866965639, + "loss": 1.6895, + "step": 2370 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018332761209493963, + "loss": 1.7891, + "step": 2371 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018331167860196644, + "loss": 1.8027, + "step": 2372 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018329573819205974, + "loss": 1.5195, + "step": 2373 + }, + { + "epoch": 0.21, + "learning_rate": 0.001832797908665429, + "loss": 1.6699, + "step": 2374 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018326383662674002, + "loss": 1.627, + "step": 2375 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018324787547397566, + "loss": 1.7188, + "step": 2376 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018323190740957502, + "loss": 1.5234, + "step": 2377 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018321593243486377, + "loss": 1.5625, + "step": 2378 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018319995055116832, + "loss": 1.7598, + "step": 2379 + }, + { + "epoch": 0.21, + "learning_rate": 0.001831839617598155, + "loss": 1.6055, + "step": 2380 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018316796606213282, + "loss": 1.709, + "step": 2381 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018315196345944826, + "loss": 1.6797, + "step": 2382 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018313595395309048, + "loss": 1.6621, + "step": 2383 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018311993754438869, + "loss": 1.6543, + "step": 2384 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018310391423467258, + "loss": 1.5918, + "step": 2385 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018308788402527254, + "loss": 1.6191, + "step": 2386 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018307184691751944, + "loss": 1.7207, + "step": 2387 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018305580291274476, + "loss": 1.6074, + "step": 2388 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018303975201228058, + "loss": 1.8145, + "step": 2389 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018302369421745945, + "loss": 1.625, + "step": 2390 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018300762952961466, + "loss": 1.6797, + "step": 2391 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018299155795007992, + "loss": 1.5957, + "step": 2392 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018297547948018958, + "loss": 1.6875, + "step": 2393 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018295939412127855, + "loss": 1.6914, + "step": 2394 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018294330187468234, + "loss": 1.6582, + "step": 2395 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018292720274173696, + "loss": 1.6484, + "step": 2396 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018291109672377908, + "loss": 1.7402, + "step": 2397 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018289498382214585, + "loss": 1.6797, + "step": 2398 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018287886403817507, + "loss": 1.5996, + "step": 2399 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018286273737320507, + "loss": 1.5312, + "step": 2400 + }, + { + "epoch": 0.21, + "learning_rate": 0.001828466038285748, + "loss": 1.6797, + "step": 2401 + }, + { + "epoch": 0.21, + "learning_rate": 0.001828304634056237, + "loss": 1.6973, + "step": 2402 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018281431610569184, + "loss": 1.627, + "step": 2403 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018279816193011984, + "loss": 1.7227, + "step": 2404 + }, + { + "epoch": 0.21, + "learning_rate": 0.001827820008802489, + "loss": 1.6094, + "step": 2405 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018276583295742082, + "loss": 1.7676, + "step": 2406 + }, + { + "epoch": 0.21, + "learning_rate": 0.001827496581629779, + "loss": 1.6152, + "step": 2407 + }, + { + "epoch": 0.21, + "learning_rate": 0.00182733476498263, + "loss": 1.6699, + "step": 2408 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018271728796461972, + "loss": 1.666, + "step": 2409 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018270109256339202, + "loss": 1.6836, + "step": 2410 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018268489029592456, + "loss": 1.7188, + "step": 2411 + }, + { + "epoch": 0.21, + "learning_rate": 0.001826686811635625, + "loss": 1.5898, + "step": 2412 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018265246516765163, + "loss": 1.6914, + "step": 2413 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018263624230953827, + "loss": 1.6328, + "step": 2414 + }, + { + "epoch": 0.21, + "learning_rate": 0.001826200125905693, + "loss": 1.7285, + "step": 2415 + }, + { + "epoch": 0.21, + "learning_rate": 0.0018260377601209221, + "loss": 1.6445, + "step": 2416 + }, + { + "epoch": 0.22, + "learning_rate": 0.00182587532575455, + "loss": 1.7051, + "step": 2417 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018257128228200636, + "loss": 1.7539, + "step": 2418 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018255502513309537, + "loss": 1.7773, + "step": 2419 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018253876113007183, + "loss": 1.6895, + "step": 2420 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018252249027428608, + "loss": 1.6367, + "step": 2421 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018250621256708892, + "loss": 1.6504, + "step": 2422 + }, + { + "epoch": 0.22, + "learning_rate": 0.001824899280098319, + "loss": 1.707, + "step": 2423 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018247363660386698, + "loss": 1.666, + "step": 2424 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018245733835054675, + "loss": 1.5137, + "step": 2425 + }, + { + "epoch": 0.22, + "learning_rate": 0.001824410332512244, + "loss": 1.6016, + "step": 2426 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018242472130725364, + "loss": 1.6855, + "step": 2427 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018240840251998877, + "loss": 1.7207, + "step": 2428 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018239207689078462, + "loss": 1.6211, + "step": 2429 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018237574442099666, + "loss": 1.5449, + "step": 2430 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018235940511198093, + "loss": 1.7031, + "step": 2431 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018234305896509392, + "loss": 1.6289, + "step": 2432 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018232670598169278, + "loss": 1.5918, + "step": 2433 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018231034616313525, + "loss": 1.7324, + "step": 2434 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018229397951077958, + "loss": 1.6504, + "step": 2435 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018227760602598459, + "loss": 1.5898, + "step": 2436 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018226122571010973, + "loss": 1.6172, + "step": 2437 + }, + { + "epoch": 0.22, + "learning_rate": 0.001822448385645149, + "loss": 1.709, + "step": 2438 + }, + { + "epoch": 0.22, + "learning_rate": 0.001822284445905607, + "loss": 1.7266, + "step": 2439 + }, + { + "epoch": 0.22, + "learning_rate": 0.001822120437896082, + "loss": 1.5918, + "step": 2440 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018219563616301913, + "loss": 1.6738, + "step": 2441 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018217922171215567, + "loss": 1.7129, + "step": 2442 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018216280043838065, + "loss": 1.7285, + "step": 2443 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018214637234305745, + "loss": 1.6875, + "step": 2444 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018212993742754997, + "loss": 1.623, + "step": 2445 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018211349569322278, + "loss": 1.7266, + "step": 2446 + }, + { + "epoch": 0.22, + "learning_rate": 0.001820970471414409, + "loss": 1.6035, + "step": 2447 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018208059177356995, + "loss": 1.7109, + "step": 2448 + }, + { + "epoch": 0.22, + "learning_rate": 0.001820641295909762, + "loss": 1.5098, + "step": 2449 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018204766059502638, + "loss": 1.7324, + "step": 2450 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018203118478708781, + "loss": 1.623, + "step": 2451 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018201470216852843, + "loss": 1.7012, + "step": 2452 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018199821274071668, + "loss": 1.75, + "step": 2453 + }, + { + "epoch": 0.22, + "learning_rate": 0.001819817165050216, + "loss": 1.7324, + "step": 2454 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018196521346281278, + "loss": 1.5996, + "step": 2455 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018194870361546039, + "loss": 1.7246, + "step": 2456 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018193218696433513, + "loss": 1.6367, + "step": 2457 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018191566351080833, + "loss": 1.623, + "step": 2458 + }, + { + "epoch": 0.22, + "learning_rate": 0.001818991332562518, + "loss": 1.6836, + "step": 2459 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018188259620203803, + "loss": 1.6621, + "step": 2460 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018186605234953995, + "loss": 1.627, + "step": 2461 + }, + { + "epoch": 0.22, + "learning_rate": 0.001818495017001311, + "loss": 1.7129, + "step": 2462 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018183294425518563, + "loss": 1.7148, + "step": 2463 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018181638001607818, + "loss": 1.7188, + "step": 2464 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018179980898418407, + "loss": 1.7344, + "step": 2465 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018178323116087903, + "loss": 1.6816, + "step": 2466 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018176664654753945, + "loss": 1.7598, + "step": 2467 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018175005514554227, + "loss": 1.6094, + "step": 2468 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018173345695626497, + "loss": 1.7285, + "step": 2469 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018171685198108565, + "loss": 1.582, + "step": 2470 + }, + { + "epoch": 0.22, + "learning_rate": 0.001817002402213829, + "loss": 1.6133, + "step": 2471 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018168362167853593, + "loss": 1.6035, + "step": 2472 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018166699635392446, + "loss": 1.6289, + "step": 2473 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018165036424892882, + "loss": 1.7188, + "step": 2474 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018163372536492992, + "loss": 1.5957, + "step": 2475 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018161707970330914, + "loss": 1.6367, + "step": 2476 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018160042726544851, + "loss": 1.6719, + "step": 2477 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018158376805273062, + "loss": 1.7246, + "step": 2478 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018156710206653855, + "loss": 1.6387, + "step": 2479 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018155042930825603, + "loss": 1.709, + "step": 2480 + }, + { + "epoch": 0.22, + "learning_rate": 0.001815337497792673, + "loss": 1.7754, + "step": 2481 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018151706348095715, + "loss": 1.6816, + "step": 2482 + }, + { + "epoch": 0.22, + "learning_rate": 0.00181500370414711, + "loss": 1.6504, + "step": 2483 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018148367058191473, + "loss": 1.5742, + "step": 2484 + }, + { + "epoch": 0.22, + "learning_rate": 0.001814669639839549, + "loss": 1.5996, + "step": 2485 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018145025062221857, + "loss": 1.6289, + "step": 2486 + }, + { + "epoch": 0.22, + "learning_rate": 0.001814335304980933, + "loss": 1.6895, + "step": 2487 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018141680361296732, + "loss": 1.541, + "step": 2488 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018140006996822935, + "loss": 1.6289, + "step": 2489 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018138332956526876, + "loss": 1.6543, + "step": 2490 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018136658240547531, + "loss": 1.6758, + "step": 2491 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018134982849023955, + "loss": 1.5488, + "step": 2492 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018133306782095243, + "loss": 1.7227, + "step": 2493 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018131630039900544, + "loss": 1.6367, + "step": 2494 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018129952622579074, + "loss": 1.6602, + "step": 2495 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018128274530270101, + "loss": 1.6289, + "step": 2496 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018126595763112948, + "loss": 1.6328, + "step": 2497 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018124916321246994, + "loss": 1.5957, + "step": 2498 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018123236204811671, + "loss": 1.7891, + "step": 2499 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018121555413946472, + "loss": 1.748, + "step": 2500 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018119873948790948, + "loss": 1.627, + "step": 2501 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018118191809484699, + "loss": 1.6797, + "step": 2502 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018116508996167385, + "loss": 1.6992, + "step": 2503 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018114825508978721, + "loss": 1.5156, + "step": 2504 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018113141348058475, + "loss": 1.6992, + "step": 2505 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018111456513546479, + "loss": 1.6738, + "step": 2506 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018109771005582617, + "loss": 1.6914, + "step": 2507 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018108084824306821, + "loss": 1.6152, + "step": 2508 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018106397969859089, + "loss": 1.6934, + "step": 2509 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018104710442379476, + "loss": 1.6055, + "step": 2510 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018103022242008085, + "loss": 1.6758, + "step": 2511 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018101333368885078, + "loss": 1.5879, + "step": 2512 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018099643823150673, + "loss": 1.5938, + "step": 2513 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018097953604945148, + "loss": 1.6016, + "step": 2514 + }, + { + "epoch": 0.22, + "learning_rate": 0.001809626271440883, + "loss": 1.707, + "step": 2515 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018094571151682102, + "loss": 1.6836, + "step": 2516 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018092878916905408, + "loss": 1.6465, + "step": 2517 + }, + { + "epoch": 0.22, + "learning_rate": 0.001809118601021925, + "loss": 1.6074, + "step": 2518 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018089492431764174, + "loss": 1.7031, + "step": 2519 + }, + { + "epoch": 0.22, + "learning_rate": 0.001808779818168079, + "loss": 1.6211, + "step": 2520 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018086103260109767, + "loss": 1.6699, + "step": 2521 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018084407667191826, + "loss": 1.623, + "step": 2522 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018082711403067738, + "loss": 1.4902, + "step": 2523 + }, + { + "epoch": 0.22, + "learning_rate": 0.001808101446787834, + "loss": 1.6074, + "step": 2524 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018079316861764516, + "loss": 1.6445, + "step": 2525 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018077618584867211, + "loss": 1.7383, + "step": 2526 + }, + { + "epoch": 0.22, + "learning_rate": 0.001807591963732742, + "loss": 1.7559, + "step": 2527 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018074220019286204, + "loss": 1.6602, + "step": 2528 + }, + { + "epoch": 0.22, + "learning_rate": 0.0018072519730884672, + "loss": 1.6172, + "step": 2529 + }, + { + "epoch": 0.23, + "learning_rate": 0.0018070818772263988, + "loss": 1.5977, + "step": 2530 + }, + { + "epoch": 0.23, + "learning_rate": 0.0018069117143565375, + "loss": 1.7305, + "step": 2531 + }, + { + "epoch": 0.23, + "learning_rate": 0.0018067414844930108, + "loss": 1.5508, + "step": 2532 + }, + { + "epoch": 0.23, + "learning_rate": 0.0018065711876499525, + "loss": 1.707, + "step": 2533 + }, + { + "epoch": 0.23, + "learning_rate": 0.0018064008238415008, + "loss": 1.6484, + "step": 2534 + }, + { + "epoch": 0.23, + "learning_rate": 0.0018062303930818006, + "loss": 1.6934, + "step": 2535 + }, + { + "epoch": 0.23, + "learning_rate": 0.0018060598953850017, + "loss": 1.6016, + "step": 2536 + }, + { + "epoch": 0.23, + "learning_rate": 0.0018058893307652598, + "loss": 1.6992, + "step": 2537 + }, + { + "epoch": 0.23, + "learning_rate": 0.0018057186992367358, + "loss": 1.5508, + "step": 2538 + }, + { + "epoch": 0.23, + "learning_rate": 0.0018055480008135963, + "loss": 1.541, + "step": 2539 + }, + { + "epoch": 0.23, + "learning_rate": 0.0018053772355100138, + "loss": 1.6699, + "step": 2540 + }, + { + "epoch": 0.23, + "learning_rate": 0.0018052064033401657, + "loss": 1.7891, + "step": 2541 + }, + { + "epoch": 0.23, + "learning_rate": 0.0018050355043182354, + "loss": 1.5801, + "step": 2542 + }, + { + "epoch": 0.23, + "learning_rate": 0.001804864538458412, + "loss": 1.6426, + "step": 2543 + }, + { + "epoch": 0.23, + "learning_rate": 0.0018046935057748896, + "loss": 1.5879, + "step": 2544 + }, + { + "epoch": 0.23, + "learning_rate": 0.0018045224062818682, + "loss": 1.6035, + "step": 2545 + }, + { + "epoch": 0.23, + "learning_rate": 0.0018043512399935535, + "loss": 2.0059, + "step": 2546 + }, + { + "epoch": 0.23, + "learning_rate": 0.0018041800069241564, + "loss": 1.6738, + "step": 2547 + }, + { + "epoch": 0.23, + "learning_rate": 0.0018040087070878934, + "loss": 1.6445, + "step": 2548 + }, + { + "epoch": 0.23, + "learning_rate": 0.0018038373404989868, + "loss": 1.7402, + "step": 2549 + }, + { + "epoch": 0.23, + "learning_rate": 0.001803665907171664, + "loss": 1.6973, + "step": 2550 + }, + { + "epoch": 0.23, + "learning_rate": 0.0018034944071201587, + "loss": 1.6035, + "step": 2551 + }, + { + "epoch": 0.23, + "learning_rate": 0.0018033228403587093, + "loss": 1.5957, + "step": 2552 + }, + { + "epoch": 0.23, + "learning_rate": 0.0018031512069015601, + "loss": 1.5957, + "step": 2553 + }, + { + "epoch": 0.23, + "learning_rate": 0.0018029795067629611, + "loss": 1.6797, + "step": 2554 + }, + { + "epoch": 0.23, + "learning_rate": 0.0018028077399571672, + "loss": 1.6582, + "step": 2555 + }, + { + "epoch": 0.23, + "learning_rate": 0.0018026359064984398, + "loss": 1.6934, + "step": 2556 + }, + { + "epoch": 0.23, + "learning_rate": 0.0018024640064010454, + "loss": 1.6934, + "step": 2557 + }, + { + "epoch": 0.23, + "learning_rate": 0.0018022920396792556, + "loss": 1.7031, + "step": 2558 + }, + { + "epoch": 0.23, + "learning_rate": 0.001802120006347348, + "loss": 1.584, + "step": 2559 + }, + { + "epoch": 0.23, + "learning_rate": 0.0018019479064196057, + "loss": 1.7285, + "step": 2560 + }, + { + "epoch": 0.23, + "learning_rate": 0.001801775739910317, + "loss": 1.5508, + "step": 2561 + }, + { + "epoch": 0.23, + "learning_rate": 0.001801603506833777, + "loss": 1.5918, + "step": 2562 + }, + { + "epoch": 0.23, + "learning_rate": 0.0018014312072042837, + "loss": 1.6035, + "step": 2563 + }, + { + "epoch": 0.23, + "learning_rate": 0.0018012588410361437, + "loss": 1.6738, + "step": 2564 + }, + { + "epoch": 0.23, + "learning_rate": 0.0018010864083436667, + "loss": 1.6445, + "step": 2565 + }, + { + "epoch": 0.23, + "learning_rate": 0.001800913909141169, + "loss": 1.7031, + "step": 2566 + }, + { + "epoch": 0.23, + "learning_rate": 0.0018007413434429729, + "loss": 1.5684, + "step": 2567 + }, + { + "epoch": 0.23, + "learning_rate": 0.0018005687112634052, + "loss": 1.6211, + "step": 2568 + }, + { + "epoch": 0.23, + "learning_rate": 0.0018003960126167986, + "loss": 1.7812, + "step": 2569 + }, + { + "epoch": 0.23, + "learning_rate": 0.0018002232475174916, + "loss": 1.5781, + "step": 2570 + }, + { + "epoch": 0.23, + "learning_rate": 0.0018000504159798277, + "loss": 1.6641, + "step": 2571 + }, + { + "epoch": 0.23, + "learning_rate": 0.0017998775180181567, + "loss": 1.6484, + "step": 2572 + }, + { + "epoch": 0.23, + "learning_rate": 0.0017997045536468327, + "loss": 1.6758, + "step": 2573 + }, + { + "epoch": 0.23, + "learning_rate": 0.0017995315228802164, + "loss": 1.668, + "step": 2574 + }, + { + "epoch": 0.23, + "learning_rate": 0.0017993584257326738, + "loss": 1.6738, + "step": 2575 + }, + { + "epoch": 0.23, + "learning_rate": 0.0017991852622185763, + "loss": 1.5605, + "step": 2576 + }, + { + "epoch": 0.23, + "learning_rate": 0.0017990120323523003, + "loss": 1.6406, + "step": 2577 + }, + { + "epoch": 0.23, + "learning_rate": 0.0017988387361482284, + "loss": 1.6172, + "step": 2578 + }, + { + "epoch": 0.23, + "learning_rate": 0.0017986653736207483, + "loss": 1.502, + "step": 2579 + }, + { + "epoch": 0.23, + "learning_rate": 0.001798491944784254, + "loss": 1.7324, + "step": 2580 + }, + { + "epoch": 0.23, + "learning_rate": 0.0017983184496531436, + "loss": 1.6758, + "step": 2581 + }, + { + "epoch": 0.23, + "learning_rate": 0.0017981448882418223, + "loss": 1.6719, + "step": 2582 + }, + { + "epoch": 0.23, + "learning_rate": 0.0017979712605646996, + "loss": 1.6621, + "step": 2583 + }, + { + "epoch": 0.23, + "learning_rate": 0.0017977975666361905, + "loss": 1.6348, + "step": 2584 + }, + { + "epoch": 0.23, + "learning_rate": 0.0017976238064707165, + "loss": 1.6211, + "step": 2585 + }, + { + "epoch": 0.23, + "learning_rate": 0.0017974499800827038, + "loss": 1.6758, + "step": 2586 + }, + { + "epoch": 0.23, + "learning_rate": 0.001797276087486584, + "loss": 1.6465, + "step": 2587 + }, + { + "epoch": 0.23, + "learning_rate": 0.001797102128696795, + "loss": 1.6289, + "step": 2588 + }, + { + "epoch": 0.23, + "learning_rate": 0.0017969281037277793, + "loss": 1.5625, + "step": 2589 + }, + { + "epoch": 0.23, + "learning_rate": 0.0017967540125939855, + "loss": 1.6211, + "step": 2590 + }, + { + "epoch": 0.23, + "learning_rate": 0.0017965798553098676, + "loss": 1.5332, + "step": 2591 + }, + { + "epoch": 0.23, + "learning_rate": 0.0017964056318898844, + "loss": 1.6445, + "step": 2592 + }, + { + "epoch": 0.23, + "learning_rate": 0.0017962313423485017, + "loss": 1.6113, + "step": 2593 + }, + { + "epoch": 0.23, + "learning_rate": 0.0017960569867001889, + "loss": 1.6816, + "step": 2594 + }, + { + "epoch": 0.23, + "learning_rate": 0.001795882564959422, + "loss": 1.6992, + "step": 2595 + }, + { + "epoch": 0.23, + "learning_rate": 0.001795708077140683, + "loss": 1.6699, + "step": 2596 + }, + { + "epoch": 0.23, + "learning_rate": 0.0017955335232584582, + "loss": 1.7168, + "step": 2597 + }, + { + "epoch": 0.23, + "learning_rate": 0.00179535890332724, + "loss": 1.6797, + "step": 2598 + }, + { + "epoch": 0.23, + "learning_rate": 0.0017951842173615261, + "loss": 1.623, + "step": 2599 + }, + { + "epoch": 0.23, + "learning_rate": 0.0017950094653758194, + "loss": 1.6914, + "step": 2600 + }, + { + "epoch": 0.23, + "learning_rate": 0.0017948346473846298, + "loss": 1.6719, + "step": 2601 + }, + { + "epoch": 0.23, + "learning_rate": 0.0017946597634024705, + "loss": 1.7539, + "step": 2602 + }, + { + "epoch": 0.23, + "learning_rate": 0.0017944848134438614, + "loss": 1.5859, + "step": 2603 + }, + { + "epoch": 0.23, + "learning_rate": 0.0017943097975233276, + "loss": 1.5977, + "step": 2604 + }, + { + "epoch": 0.23, + "learning_rate": 0.0017941347156554, + "loss": 1.6699, + "step": 2605 + }, + { + "epoch": 0.23, + "learning_rate": 0.0017939595678546148, + "loss": 1.5664, + "step": 2606 + }, + { + "epoch": 0.23, + "learning_rate": 0.0017937843541355135, + "loss": 1.707, + "step": 2607 + }, + { + "epoch": 0.23, + "learning_rate": 0.001793609074512643, + "loss": 1.6934, + "step": 2608 + }, + { + "epoch": 0.23, + "learning_rate": 0.001793433729000556, + "loss": 1.6758, + "step": 2609 + }, + { + "epoch": 0.23, + "learning_rate": 0.0017932583176138107, + "loss": 1.7188, + "step": 2610 + }, + { + "epoch": 0.23, + "learning_rate": 0.0017930828403669703, + "loss": 1.666, + "step": 2611 + }, + { + "epoch": 0.23, + "learning_rate": 0.0017929072972746036, + "loss": 1.6719, + "step": 2612 + }, + { + "epoch": 0.23, + "learning_rate": 0.0017927316883512858, + "loss": 1.7695, + "step": 2613 + }, + { + "epoch": 0.23, + "learning_rate": 0.001792556013611596, + "loss": 1.6445, + "step": 2614 + }, + { + "epoch": 0.23, + "learning_rate": 0.00179238027307012, + "loss": 1.5781, + "step": 2615 + }, + { + "epoch": 0.23, + "learning_rate": 0.0017922044667414484, + "loss": 1.5547, + "step": 2616 + }, + { + "epoch": 0.23, + "learning_rate": 0.0017920285946401774, + "loss": 1.6465, + "step": 2617 + }, + { + "epoch": 0.23, + "learning_rate": 0.001791852656780909, + "loss": 1.6055, + "step": 2618 + }, + { + "epoch": 0.23, + "learning_rate": 0.0017916766531782503, + "loss": 1.627, + "step": 2619 + }, + { + "epoch": 0.23, + "learning_rate": 0.001791500583846814, + "loss": 1.6074, + "step": 2620 + }, + { + "epoch": 0.23, + "learning_rate": 0.001791324448801218, + "loss": 1.6797, + "step": 2621 + }, + { + "epoch": 0.23, + "learning_rate": 0.001791148248056086, + "loss": 1.6035, + "step": 2622 + }, + { + "epoch": 0.23, + "learning_rate": 0.0017909719816260475, + "loss": 1.6914, + "step": 2623 + }, + { + "epoch": 0.23, + "learning_rate": 0.0017907956495257361, + "loss": 1.6055, + "step": 2624 + }, + { + "epoch": 0.23, + "learning_rate": 0.0017906192517697923, + "loss": 1.752, + "step": 2625 + }, + { + "epoch": 0.23, + "learning_rate": 0.0017904427883728613, + "loss": 1.625, + "step": 2626 + }, + { + "epoch": 0.23, + "learning_rate": 0.001790266259349594, + "loss": 1.6152, + "step": 2627 + }, + { + "epoch": 0.23, + "learning_rate": 0.001790089664714647, + "loss": 1.543, + "step": 2628 + }, + { + "epoch": 0.23, + "learning_rate": 0.0017899130044826816, + "loss": 1.6777, + "step": 2629 + }, + { + "epoch": 0.23, + "learning_rate": 0.001789736278668365, + "loss": 1.6289, + "step": 2630 + }, + { + "epoch": 0.23, + "learning_rate": 0.0017895594872863697, + "loss": 1.6426, + "step": 2631 + }, + { + "epoch": 0.23, + "learning_rate": 0.0017893826303513741, + "loss": 1.6699, + "step": 2632 + }, + { + "epoch": 0.23, + "learning_rate": 0.0017892057078780618, + "loss": 1.5957, + "step": 2633 + }, + { + "epoch": 0.23, + "learning_rate": 0.0017890287198811215, + "loss": 1.6855, + "step": 2634 + }, + { + "epoch": 0.23, + "learning_rate": 0.0017888516663752472, + "loss": 1.7266, + "step": 2635 + }, + { + "epoch": 0.23, + "learning_rate": 0.0017886745473751397, + "loss": 1.7012, + "step": 2636 + }, + { + "epoch": 0.23, + "learning_rate": 0.0017884973628955031, + "loss": 1.709, + "step": 2637 + }, + { + "epoch": 0.23, + "learning_rate": 0.001788320112951049, + "loss": 1.6992, + "step": 2638 + }, + { + "epoch": 0.23, + "learning_rate": 0.0017881427975564934, + "loss": 1.6426, + "step": 2639 + }, + { + "epoch": 0.23, + "learning_rate": 0.0017879654167265579, + "loss": 1.7246, + "step": 2640 + }, + { + "epoch": 0.23, + "learning_rate": 0.001787787970475969, + "loss": 1.6992, + "step": 2641 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017876104588194594, + "loss": 1.5723, + "step": 2642 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017874328817717674, + "loss": 1.6836, + "step": 2643 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017872552393476356, + "loss": 1.6758, + "step": 2644 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017870775315618132, + "loss": 1.6738, + "step": 2645 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017868997584290541, + "loss": 1.6836, + "step": 2646 + }, + { + "epoch": 0.24, + "learning_rate": 0.001786721919964118, + "loss": 1.7305, + "step": 2647 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017865440161817698, + "loss": 1.6992, + "step": 2648 + }, + { + "epoch": 0.24, + "learning_rate": 0.00178636604709678, + "loss": 1.5273, + "step": 2649 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017861880127239247, + "loss": 1.6543, + "step": 2650 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017860099130779846, + "loss": 1.6973, + "step": 2651 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017858317481737468, + "loss": 1.6895, + "step": 2652 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017856535180260033, + "loss": 1.6406, + "step": 2653 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017854752226495517, + "loss": 1.5508, + "step": 2654 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017852968620591944, + "loss": 1.6699, + "step": 2655 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017851184362697406, + "loss": 1.6797, + "step": 2656 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017849399452960033, + "loss": 1.5508, + "step": 2657 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017847613891528026, + "loss": 1.5645, + "step": 2658 + }, + { + "epoch": 0.24, + "learning_rate": 0.001784582767854962, + "loss": 1.6621, + "step": 2659 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017844040814173122, + "loss": 1.6113, + "step": 2660 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017842253298546882, + "loss": 1.6562, + "step": 2661 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017840465131819313, + "loss": 1.6406, + "step": 2662 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017838676314138873, + "loss": 1.6543, + "step": 2663 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017836886845654077, + "loss": 1.6191, + "step": 2664 + }, + { + "epoch": 0.24, + "learning_rate": 0.00178350967265135, + "loss": 1.6191, + "step": 2665 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017833305956865763, + "loss": 1.6289, + "step": 2666 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017831514536859549, + "loss": 1.6621, + "step": 2667 + }, + { + "epoch": 0.24, + "learning_rate": 0.001782972246664358, + "loss": 1.6602, + "step": 2668 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017827929746366653, + "loss": 1.6406, + "step": 2669 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017826136376177602, + "loss": 1.6113, + "step": 2670 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017824342356225322, + "loss": 1.6543, + "step": 2671 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017822547686658762, + "loss": 1.5977, + "step": 2672 + }, + { + "epoch": 0.24, + "learning_rate": 0.001782075236762693, + "loss": 1.6367, + "step": 2673 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017818956399278869, + "loss": 1.6777, + "step": 2674 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017817159781763699, + "loss": 1.6543, + "step": 2675 + }, + { + "epoch": 0.24, + "learning_rate": 0.001781536251523058, + "loss": 1.6562, + "step": 2676 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017813564599828732, + "loss": 1.543, + "step": 2677 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017811766035707422, + "loss": 1.6797, + "step": 2678 + }, + { + "epoch": 0.24, + "learning_rate": 0.001780996682301598, + "loss": 1.6504, + "step": 2679 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017808166961903785, + "loss": 1.5879, + "step": 2680 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017806366452520264, + "loss": 1.7617, + "step": 2681 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017804565295014912, + "loss": 1.6875, + "step": 2682 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017802763489537265, + "loss": 1.6582, + "step": 2683 + }, + { + "epoch": 0.24, + "learning_rate": 0.001780096103623692, + "loss": 1.6777, + "step": 2684 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017799157935263525, + "loss": 1.6816, + "step": 2685 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017797354186766777, + "loss": 1.6289, + "step": 2686 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017795549790896436, + "loss": 1.6172, + "step": 2687 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017793744747802314, + "loss": 1.7305, + "step": 2688 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017791939057634272, + "loss": 1.6445, + "step": 2689 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017790132720542225, + "loss": 1.6523, + "step": 2690 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017788325736676147, + "loss": 1.6211, + "step": 2691 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017786518106186061, + "loss": 1.7227, + "step": 2692 + }, + { + "epoch": 0.24, + "learning_rate": 0.001778470982922204, + "loss": 1.6035, + "step": 2693 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017782900905934228, + "loss": 1.625, + "step": 2694 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017781091336472798, + "loss": 1.6641, + "step": 2695 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017779281120987999, + "loss": 1.7598, + "step": 2696 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017777470259630116, + "loss": 1.7266, + "step": 2697 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017775658752549497, + "loss": 1.709, + "step": 2698 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017773846599896546, + "loss": 1.7051, + "step": 2699 + }, + { + "epoch": 0.24, + "learning_rate": 0.001777203380182171, + "loss": 1.7461, + "step": 2700 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017770220358475505, + "loss": 1.8125, + "step": 2701 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017768406270008486, + "loss": 1.6953, + "step": 2702 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017766591536571266, + "loss": 1.623, + "step": 2703 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017764776158314516, + "loss": 1.75, + "step": 2704 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017762960135388956, + "loss": 1.6914, + "step": 2705 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017761143467945365, + "loss": 1.5898, + "step": 2706 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017759326156134563, + "loss": 1.7598, + "step": 2707 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017757508200107438, + "loss": 1.7148, + "step": 2708 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017755689600014923, + "loss": 1.6172, + "step": 2709 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017753870356008008, + "loss": 1.5391, + "step": 2710 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017752050468237738, + "loss": 1.6445, + "step": 2711 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017750229936855204, + "loss": 1.5547, + "step": 2712 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017748408762011557, + "loss": 1.6387, + "step": 2713 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017746586943858, + "loss": 1.6738, + "step": 2714 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017744764482545787, + "loss": 1.7207, + "step": 2715 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017742941378226234, + "loss": 1.6113, + "step": 2716 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017741117631050695, + "loss": 1.6133, + "step": 2717 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017739293241170592, + "loss": 1.5352, + "step": 2718 + }, + { + "epoch": 0.24, + "learning_rate": 0.001773746820873739, + "loss": 1.6797, + "step": 2719 + }, + { + "epoch": 0.24, + "learning_rate": 0.001773564253390262, + "loss": 1.7168, + "step": 2720 + }, + { + "epoch": 0.24, + "learning_rate": 0.001773381621681785, + "loss": 1.6348, + "step": 2721 + }, + { + "epoch": 0.24, + "learning_rate": 0.001773198925763471, + "loss": 1.6895, + "step": 2722 + }, + { + "epoch": 0.24, + "learning_rate": 0.001773016165650489, + "loss": 1.6816, + "step": 2723 + }, + { + "epoch": 0.24, + "learning_rate": 0.001772833341358012, + "loss": 1.625, + "step": 2724 + }, + { + "epoch": 0.24, + "learning_rate": 0.001772650452901219, + "loss": 1.6641, + "step": 2725 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017724675002952946, + "loss": 1.6875, + "step": 2726 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017722844835554282, + "loss": 1.6504, + "step": 2727 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017721014026968144, + "loss": 1.6074, + "step": 2728 + }, + { + "epoch": 0.24, + "learning_rate": 0.001771918257734654, + "loss": 1.5918, + "step": 2729 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017717350486841522, + "loss": 1.5312, + "step": 2730 + }, + { + "epoch": 0.24, + "learning_rate": 0.00177155177556052, + "loss": 1.5703, + "step": 2731 + }, + { + "epoch": 0.24, + "learning_rate": 0.001771368438378974, + "loss": 1.5488, + "step": 2732 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017711850371547348, + "loss": 1.6836, + "step": 2733 + }, + { + "epoch": 0.24, + "learning_rate": 0.00177100157190303, + "loss": 1.6348, + "step": 2734 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017708180426390917, + "loss": 1.6621, + "step": 2735 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017706344493781572, + "loss": 1.6836, + "step": 2736 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017704507921354692, + "loss": 1.5723, + "step": 2737 + }, + { + "epoch": 0.24, + "learning_rate": 0.001770267070926276, + "loss": 1.6094, + "step": 2738 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017700832857658311, + "loss": 1.5332, + "step": 2739 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017698994366693928, + "loss": 1.6523, + "step": 2740 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017697155236522256, + "loss": 1.6387, + "step": 2741 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017695315467295984, + "loss": 1.6582, + "step": 2742 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017693475059167864, + "loss": 1.6152, + "step": 2743 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017691634012290689, + "loss": 1.5879, + "step": 2744 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017689792326817317, + "loss": 1.5957, + "step": 2745 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017687950002900652, + "loss": 1.5508, + "step": 2746 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017686107040693648, + "loss": 1.6582, + "step": 2747 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017684263440349325, + "loss": 1.5527, + "step": 2748 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017682419202020739, + "loss": 1.6973, + "step": 2749 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017680574325861012, + "loss": 1.7129, + "step": 2750 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017678728812023316, + "loss": 1.5645, + "step": 2751 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017676882660660868, + "loss": 1.7383, + "step": 2752 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017675035871926952, + "loss": 1.6719, + "step": 2753 + }, + { + "epoch": 0.24, + "learning_rate": 0.0017673188445974893, + "loss": 1.5762, + "step": 2754 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017671340382958073, + "loss": 1.6113, + "step": 2755 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017669491683029927, + "loss": 1.5977, + "step": 2756 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017667642346343947, + "loss": 1.5918, + "step": 2757 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017665792373053667, + "loss": 1.7129, + "step": 2758 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017663941763312685, + "loss": 1.6895, + "step": 2759 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017662090517274647, + "loss": 1.5801, + "step": 2760 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017660238635093253, + "loss": 1.5781, + "step": 2761 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017658386116922253, + "loss": 1.5352, + "step": 2762 + }, + { + "epoch": 0.25, + "learning_rate": 0.001765653296291545, + "loss": 1.584, + "step": 2763 + }, + { + "epoch": 0.25, + "learning_rate": 0.001765467917322671, + "loss": 1.5508, + "step": 2764 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017652824748009932, + "loss": 1.7383, + "step": 2765 + }, + { + "epoch": 0.25, + "learning_rate": 0.001765096968741909, + "loss": 1.5898, + "step": 2766 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017649113991608193, + "loss": 1.6426, + "step": 2767 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017647257660731311, + "loss": 1.584, + "step": 2768 + }, + { + "epoch": 0.25, + "learning_rate": 0.001764540069494257, + "loss": 1.7051, + "step": 2769 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017643543094396138, + "loss": 1.6387, + "step": 2770 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017641684859246245, + "loss": 1.7891, + "step": 2771 + }, + { + "epoch": 0.25, + "learning_rate": 0.001763982598964717, + "loss": 1.8652, + "step": 2772 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017637966485753245, + "loss": 1.5801, + "step": 2773 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017636106347718858, + "loss": 1.7227, + "step": 2774 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017634245575698438, + "loss": 1.707, + "step": 2775 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017632384169846485, + "loss": 1.6113, + "step": 2776 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017630522130317537, + "loss": 1.5137, + "step": 2777 + }, + { + "epoch": 0.25, + "learning_rate": 0.001762865945726619, + "loss": 1.7949, + "step": 2778 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017626796150847091, + "loss": 1.6445, + "step": 2779 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017624932211214941, + "loss": 1.6582, + "step": 2780 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017623067638524496, + "loss": 1.7148, + "step": 2781 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017621202432930559, + "loss": 1.7031, + "step": 2782 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017619336594587992, + "loss": 1.6328, + "step": 2783 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017617470123651698, + "loss": 1.5703, + "step": 2784 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017615603020276645, + "loss": 1.6367, + "step": 2785 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017613735284617853, + "loss": 1.7539, + "step": 2786 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017611866916830384, + "loss": 1.6094, + "step": 2787 + }, + { + "epoch": 0.25, + "learning_rate": 0.001760999791706936, + "loss": 1.6953, + "step": 2788 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017608128285489957, + "loss": 1.5605, + "step": 2789 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017606258022247396, + "loss": 1.5703, + "step": 2790 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017604387127496963, + "loss": 1.6113, + "step": 2791 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017602515601393982, + "loss": 1.6719, + "step": 2792 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017600643444093836, + "loss": 1.6875, + "step": 2793 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017598770655751963, + "loss": 1.7344, + "step": 2794 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017596897236523852, + "loss": 1.6113, + "step": 2795 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017595023186565042, + "loss": 1.6328, + "step": 2796 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017593148506031124, + "loss": 1.6855, + "step": 2797 + }, + { + "epoch": 0.25, + "learning_rate": 0.001759127319507774, + "loss": 1.6113, + "step": 2798 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017589397253860595, + "loss": 1.7441, + "step": 2799 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017587520682535436, + "loss": 1.6309, + "step": 2800 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017585643481258062, + "loss": 1.7559, + "step": 2801 + }, + { + "epoch": 0.25, + "learning_rate": 0.001758376565018433, + "loss": 1.7617, + "step": 2802 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017581887189470145, + "loss": 1.8145, + "step": 2803 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017580008099271462, + "loss": 1.5996, + "step": 2804 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017578128379744302, + "loss": 1.6777, + "step": 2805 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017576248031044722, + "loss": 1.6934, + "step": 2806 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017574367053328835, + "loss": 1.5938, + "step": 2807 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017572485446752815, + "loss": 1.6094, + "step": 2808 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017570603211472875, + "loss": 1.6777, + "step": 2809 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017568720347645294, + "loss": 1.6758, + "step": 2810 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017566836855426394, + "loss": 1.7031, + "step": 2811 + }, + { + "epoch": 0.25, + "learning_rate": 0.001756495273497255, + "loss": 1.6816, + "step": 2812 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017563067986440192, + "loss": 1.6445, + "step": 2813 + }, + { + "epoch": 0.25, + "learning_rate": 0.00175611826099858, + "loss": 1.4785, + "step": 2814 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017559296605765904, + "loss": 1.7383, + "step": 2815 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017557409973937097, + "loss": 1.666, + "step": 2816 + }, + { + "epoch": 0.25, + "learning_rate": 0.001755552271465601, + "loss": 1.5703, + "step": 2817 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017553634828079332, + "loss": 1.6602, + "step": 2818 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017551746314363808, + "loss": 1.7539, + "step": 2819 + }, + { + "epoch": 0.25, + "learning_rate": 0.001754985717366623, + "loss": 1.6309, + "step": 2820 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017547967406143442, + "loss": 1.5566, + "step": 2821 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017546077011952343, + "loss": 1.5801, + "step": 2822 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017544185991249885, + "loss": 1.6953, + "step": 2823 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017542294344193065, + "loss": 1.5332, + "step": 2824 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017540402070938936, + "loss": 1.7031, + "step": 2825 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017538509171644609, + "loss": 1.7031, + "step": 2826 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017536615646467238, + "loss": 1.5957, + "step": 2827 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017534721495564031, + "loss": 1.6445, + "step": 2828 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017532826719092252, + "loss": 1.6035, + "step": 2829 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017530931317209216, + "loss": 1.7402, + "step": 2830 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017529035290072283, + "loss": 1.7129, + "step": 2831 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017527138637838874, + "loss": 1.7305, + "step": 2832 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017525241360666461, + "loss": 1.625, + "step": 2833 + }, + { + "epoch": 0.25, + "learning_rate": 0.001752334345871256, + "loss": 1.6641, + "step": 2834 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017521444932134742, + "loss": 1.7012, + "step": 2835 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017519545781090639, + "loss": 1.6582, + "step": 2836 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017517646005737921, + "loss": 1.6172, + "step": 2837 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017515745606234323, + "loss": 1.5059, + "step": 2838 + }, + { + "epoch": 0.25, + "learning_rate": 0.001751384458273762, + "loss": 1.6621, + "step": 2839 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017511942935405642, + "loss": 1.5723, + "step": 2840 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017510040664396281, + "loss": 1.6387, + "step": 2841 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017508137769867466, + "loss": 1.7129, + "step": 2842 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017506234251977184, + "loss": 1.7129, + "step": 2843 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017504330110883481, + "loss": 1.627, + "step": 2844 + }, + { + "epoch": 0.25, + "learning_rate": 0.001750242534674444, + "loss": 1.6152, + "step": 2845 + }, + { + "epoch": 0.25, + "learning_rate": 0.001750051995971821, + "loss": 1.6445, + "step": 2846 + }, + { + "epoch": 0.25, + "learning_rate": 0.001749861394996298, + "loss": 1.6328, + "step": 2847 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017496707317637, + "loss": 1.6836, + "step": 2848 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017494800062898563, + "loss": 1.7109, + "step": 2849 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017492892185906022, + "loss": 1.7051, + "step": 2850 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017490983686817782, + "loss": 1.5859, + "step": 2851 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017489074565792287, + "loss": 1.6172, + "step": 2852 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017487164822988048, + "loss": 1.7656, + "step": 2853 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017485254458563617, + "loss": 1.6777, + "step": 2854 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017483343472677604, + "loss": 1.6133, + "step": 2855 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017481431865488668, + "loss": 1.7188, + "step": 2856 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017479519637155516, + "loss": 1.5098, + "step": 2857 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017477606787836917, + "loss": 1.8203, + "step": 2858 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017475693317691682, + "loss": 1.5723, + "step": 2859 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017473779226878671, + "loss": 1.6074, + "step": 2860 + }, + { + "epoch": 0.25, + "learning_rate": 0.001747186451555681, + "loss": 1.707, + "step": 2861 + }, + { + "epoch": 0.25, + "learning_rate": 0.001746994918388506, + "loss": 1.5703, + "step": 2862 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017468033232022446, + "loss": 1.6934, + "step": 2863 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017466116660128038, + "loss": 1.627, + "step": 2864 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017464199468360958, + "loss": 1.6172, + "step": 2865 + }, + { + "epoch": 0.25, + "learning_rate": 0.0017462281656880384, + "loss": 1.6211, + "step": 2866 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017460363225845535, + "loss": 1.7012, + "step": 2867 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017458444175415693, + "loss": 1.6387, + "step": 2868 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017456524505750184, + "loss": 1.6094, + "step": 2869 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017454604217008395, + "loss": 1.7891, + "step": 2870 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017452683309349749, + "loss": 1.7402, + "step": 2871 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017450761782933735, + "loss": 1.7617, + "step": 2872 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017448839637919883, + "loss": 1.5996, + "step": 2873 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017446916874467781, + "loss": 1.668, + "step": 2874 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017444993492737064, + "loss": 1.7344, + "step": 2875 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017443069492887424, + "loss": 1.5703, + "step": 2876 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017441144875078598, + "loss": 1.5918, + "step": 2877 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017439219639470377, + "loss": 1.7715, + "step": 2878 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017437293786222606, + "loss": 1.6523, + "step": 2879 + }, + { + "epoch": 0.26, + "learning_rate": 0.001743536731549517, + "loss": 1.6953, + "step": 2880 + }, + { + "epoch": 0.26, + "learning_rate": 0.001743344022744803, + "loss": 1.541, + "step": 2881 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017431512522241165, + "loss": 1.6426, + "step": 2882 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017429584200034632, + "loss": 1.6855, + "step": 2883 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017427655260988527, + "loss": 1.5508, + "step": 2884 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017425725705262998, + "loss": 1.6289, + "step": 2885 + }, + { + "epoch": 0.26, + "learning_rate": 0.001742379553301825, + "loss": 1.6699, + "step": 2886 + }, + { + "epoch": 0.26, + "learning_rate": 0.001742186474441453, + "loss": 1.6348, + "step": 2887 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017419933339612146, + "loss": 1.6484, + "step": 2888 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017418001318771448, + "loss": 1.623, + "step": 2889 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017416068682052847, + "loss": 1.7656, + "step": 2890 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017414135429616798, + "loss": 1.6719, + "step": 2891 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017412201561623807, + "loss": 1.6523, + "step": 2892 + }, + { + "epoch": 0.26, + "learning_rate": 0.001741026707823443, + "loss": 1.7168, + "step": 2893 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017408331979609285, + "loss": 1.6465, + "step": 2894 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017406396265909026, + "loss": 1.627, + "step": 2895 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017404459937294368, + "loss": 1.5898, + "step": 2896 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017402522993926076, + "loss": 1.5938, + "step": 2897 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017400585435964962, + "loss": 1.6406, + "step": 2898 + }, + { + "epoch": 0.26, + "learning_rate": 0.001739864726357189, + "loss": 1.7148, + "step": 2899 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017396708476907778, + "loss": 1.6953, + "step": 2900 + }, + { + "epoch": 0.26, + "learning_rate": 0.00173947690761336, + "loss": 1.7715, + "step": 2901 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017392829061410363, + "loss": 1.8301, + "step": 2902 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017390888432899142, + "loss": 1.6992, + "step": 2903 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017388947190761052, + "loss": 1.5059, + "step": 2904 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017387005335157274, + "loss": 1.7168, + "step": 2905 + }, + { + "epoch": 0.26, + "learning_rate": 0.001738506286624902, + "loss": 1.7129, + "step": 2906 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017383119784197575, + "loss": 1.6113, + "step": 2907 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017381176089164253, + "loss": 1.7109, + "step": 2908 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017379231781310426, + "loss": 1.6016, + "step": 2909 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017377286860797531, + "loss": 1.7578, + "step": 2910 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017375341327787038, + "loss": 1.5195, + "step": 2911 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017373395182440476, + "loss": 1.5547, + "step": 2912 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017371448424919422, + "loss": 1.5918, + "step": 2913 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017369501055385507, + "loss": 1.6074, + "step": 2914 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017367553074000405, + "loss": 1.5859, + "step": 2915 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017365604480925857, + "loss": 1.7129, + "step": 2916 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017363655276323633, + "loss": 1.6289, + "step": 2917 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017361705460355577, + "loss": 1.6699, + "step": 2918 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017359755033183562, + "loss": 1.6836, + "step": 2919 + }, + { + "epoch": 0.26, + "learning_rate": 0.001735780399496953, + "loss": 1.7266, + "step": 2920 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017355852345875458, + "loss": 1.7383, + "step": 2921 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017353900086063386, + "loss": 1.7363, + "step": 2922 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017351947215695396, + "loss": 1.7188, + "step": 2923 + }, + { + "epoch": 0.26, + "learning_rate": 0.001734999373493363, + "loss": 1.6797, + "step": 2924 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017348039643940273, + "loss": 1.7422, + "step": 2925 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017346084942877562, + "loss": 1.748, + "step": 2926 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017344129631907785, + "loss": 1.6367, + "step": 2927 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017342173711193285, + "loss": 1.7363, + "step": 2928 + }, + { + "epoch": 0.26, + "learning_rate": 0.001734021718089645, + "loss": 1.752, + "step": 2929 + }, + { + "epoch": 0.26, + "learning_rate": 0.001733826004117972, + "loss": 1.5625, + "step": 2930 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017336302292205582, + "loss": 1.5684, + "step": 2931 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017334343934136583, + "loss": 1.5762, + "step": 2932 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017332384967135312, + "loss": 1.6738, + "step": 2933 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017330425391364416, + "loss": 1.8047, + "step": 2934 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017328465206986588, + "loss": 1.584, + "step": 2935 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017326504414164568, + "loss": 1.6855, + "step": 2936 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017324543013061148, + "loss": 1.6328, + "step": 2937 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017322581003839184, + "loss": 1.5742, + "step": 2938 + }, + { + "epoch": 0.26, + "learning_rate": 0.001732061838666156, + "loss": 1.5957, + "step": 2939 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017318655161691228, + "loss": 1.7305, + "step": 2940 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017316691329091183, + "loss": 1.7324, + "step": 2941 + }, + { + "epoch": 0.26, + "learning_rate": 0.001731472688902447, + "loss": 1.5898, + "step": 2942 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017312761841654187, + "loss": 1.6816, + "step": 2943 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017310796187143486, + "loss": 1.4805, + "step": 2944 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017308829925655558, + "loss": 1.6836, + "step": 2945 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017306863057353654, + "loss": 1.627, + "step": 2946 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017304895582401078, + "loss": 1.7051, + "step": 2947 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017302927500961172, + "loss": 1.5469, + "step": 2948 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017300958813197336, + "loss": 1.6113, + "step": 2949 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017298989519273025, + "loss": 1.7344, + "step": 2950 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017297019619351738, + "loss": 1.5566, + "step": 2951 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017295049113597019, + "loss": 1.6055, + "step": 2952 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017293078002172475, + "loss": 1.6094, + "step": 2953 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017291106285241755, + "loss": 1.666, + "step": 2954 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017289133962968561, + "loss": 1.5723, + "step": 2955 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017287161035516648, + "loss": 1.5801, + "step": 2956 + }, + { + "epoch": 0.26, + "learning_rate": 0.001728518750304981, + "loss": 1.7559, + "step": 2957 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017283213365731909, + "loss": 1.6855, + "step": 2958 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017281238623726835, + "loss": 1.6074, + "step": 2959 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017279263277198554, + "loss": 1.6367, + "step": 2960 + }, + { + "epoch": 0.26, + "learning_rate": 0.001727728732631106, + "loss": 1.5195, + "step": 2961 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017275310771228405, + "loss": 1.6172, + "step": 2962 + }, + { + "epoch": 0.26, + "learning_rate": 0.00172733336121147, + "loss": 1.5957, + "step": 2963 + }, + { + "epoch": 0.26, + "learning_rate": 0.001727135584913409, + "loss": 1.6914, + "step": 2964 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017269377482450779, + "loss": 1.6973, + "step": 2965 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017267398512229026, + "loss": 1.7363, + "step": 2966 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017265418938633133, + "loss": 1.6445, + "step": 2967 + }, + { + "epoch": 0.26, + "learning_rate": 0.001726343876182745, + "loss": 1.6973, + "step": 2968 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017261457981976383, + "loss": 1.7168, + "step": 2969 + }, + { + "epoch": 0.26, + "learning_rate": 0.001725947659924439, + "loss": 1.541, + "step": 2970 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017257494613795964, + "loss": 1.6367, + "step": 2971 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017255512025795667, + "loss": 1.5879, + "step": 2972 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017253528835408103, + "loss": 1.5879, + "step": 2973 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017251545042797924, + "loss": 1.7031, + "step": 2974 + }, + { + "epoch": 0.26, + "learning_rate": 0.0017249560648129837, + "loss": 1.6602, + "step": 2975 + }, + { + "epoch": 0.26, + "learning_rate": 0.001724757565156859, + "loss": 1.6133, + "step": 2976 + }, + { + "epoch": 0.26, + "learning_rate": 0.001724559005327899, + "loss": 1.6055, + "step": 2977 + }, + { + "epoch": 0.26, + "learning_rate": 0.001724360385342589, + "loss": 1.5195, + "step": 2978 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017241617052174197, + "loss": 1.5117, + "step": 2979 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017239629649688864, + "loss": 1.5312, + "step": 2980 + }, + { + "epoch": 0.27, + "learning_rate": 0.001723764164613489, + "loss": 1.6035, + "step": 2981 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017235653041677336, + "loss": 1.6113, + "step": 2982 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017233663836481298, + "loss": 1.752, + "step": 2983 + }, + { + "epoch": 0.27, + "learning_rate": 0.001723167403071193, + "loss": 1.5996, + "step": 2984 + }, + { + "epoch": 0.27, + "learning_rate": 0.001722968362453444, + "loss": 1.5312, + "step": 2985 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017227692618114081, + "loss": 1.6348, + "step": 2986 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017225701011616152, + "loss": 1.5664, + "step": 2987 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017223708805206006, + "loss": 1.5996, + "step": 2988 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017221715999049048, + "loss": 1.7207, + "step": 2989 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017219722593310727, + "loss": 1.5801, + "step": 2990 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017217728588156552, + "loss": 1.6094, + "step": 2991 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017215733983752064, + "loss": 1.6074, + "step": 2992 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017213738780262875, + "loss": 1.6211, + "step": 2993 + }, + { + "epoch": 0.27, + "learning_rate": 0.001721174297785463, + "loss": 1.6289, + "step": 2994 + }, + { + "epoch": 0.27, + "learning_rate": 0.001720974657669303, + "loss": 1.6035, + "step": 2995 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017207749576943832, + "loss": 1.6113, + "step": 2996 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017205751978772827, + "loss": 1.5918, + "step": 2997 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017203753782345874, + "loss": 1.6738, + "step": 2998 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017201754987828867, + "loss": 1.6152, + "step": 2999 + }, + { + "epoch": 0.27, + "learning_rate": 0.001719975559538776, + "loss": 1.5059, + "step": 3000 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017197755605188548, + "loss": 1.6836, + "step": 3001 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017195755017397284, + "loss": 1.7324, + "step": 3002 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017193753832180061, + "loss": 1.5469, + "step": 3003 + }, + { + "epoch": 0.27, + "learning_rate": 0.001719175204970303, + "loss": 1.6504, + "step": 3004 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017189749670132386, + "loss": 1.6777, + "step": 3005 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017187746693634383, + "loss": 1.6953, + "step": 3006 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017185743120375309, + "loss": 1.6895, + "step": 3007 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017183738950521518, + "loss": 1.668, + "step": 3008 + }, + { + "epoch": 0.27, + "learning_rate": 0.00171817341842394, + "loss": 1.668, + "step": 3009 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017179728821695402, + "loss": 1.6895, + "step": 3010 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017177722863056018, + "loss": 1.5938, + "step": 3011 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017175716308487794, + "loss": 1.5957, + "step": 3012 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017173709158157325, + "loss": 1.5957, + "step": 3013 + }, + { + "epoch": 0.27, + "learning_rate": 0.001717170141223125, + "loss": 1.6289, + "step": 3014 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017169693070876266, + "loss": 1.5527, + "step": 3015 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017167684134259112, + "loss": 1.4941, + "step": 3016 + }, + { + "epoch": 0.27, + "learning_rate": 0.001716567460254658, + "loss": 1.668, + "step": 3017 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017163664475905512, + "loss": 1.7129, + "step": 3018 + }, + { + "epoch": 0.27, + "learning_rate": 0.00171616537545028, + "loss": 1.7109, + "step": 3019 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017159642438505376, + "loss": 1.668, + "step": 3020 + }, + { + "epoch": 0.27, + "learning_rate": 0.001715763052808024, + "loss": 1.6914, + "step": 3021 + }, + { + "epoch": 0.27, + "learning_rate": 0.001715561802339442, + "loss": 1.541, + "step": 3022 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017153604924615015, + "loss": 1.6621, + "step": 3023 + }, + { + "epoch": 0.27, + "learning_rate": 0.001715159123190915, + "loss": 1.7559, + "step": 3024 + }, + { + "epoch": 0.27, + "learning_rate": 0.001714957694544402, + "loss": 1.6328, + "step": 3025 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017147562065386857, + "loss": 1.6387, + "step": 3026 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017145546591904948, + "loss": 1.543, + "step": 3027 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017143530525165628, + "loss": 1.5918, + "step": 3028 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017141513865336275, + "loss": 1.6992, + "step": 3029 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017139496612584325, + "loss": 1.8379, + "step": 3030 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017137478767077262, + "loss": 1.6543, + "step": 3031 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017135460328982614, + "loss": 1.707, + "step": 3032 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017133441298467964, + "loss": 1.6973, + "step": 3033 + }, + { + "epoch": 0.27, + "learning_rate": 0.001713142167570094, + "loss": 1.7363, + "step": 3034 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017129401460849222, + "loss": 1.75, + "step": 3035 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017127380654080536, + "loss": 1.6055, + "step": 3036 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017125359255562658, + "loss": 1.7266, + "step": 3037 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017123337265463419, + "loss": 1.7344, + "step": 3038 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017121314683950692, + "loss": 1.6543, + "step": 3039 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017119291511192398, + "loss": 1.6133, + "step": 3040 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017117267747356517, + "loss": 1.6133, + "step": 3041 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017115243392611067, + "loss": 1.6211, + "step": 3042 + }, + { + "epoch": 0.27, + "learning_rate": 0.001711321844712412, + "loss": 1.6504, + "step": 3043 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017111192911063795, + "loss": 1.6875, + "step": 3044 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017109166784598266, + "loss": 1.6777, + "step": 3045 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017107140067895747, + "loss": 1.5527, + "step": 3046 + }, + { + "epoch": 0.27, + "learning_rate": 0.001710511276112451, + "loss": 1.5684, + "step": 3047 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017103084864452868, + "loss": 1.6855, + "step": 3048 + }, + { + "epoch": 0.27, + "learning_rate": 0.001710105637804919, + "loss": 1.6973, + "step": 3049 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017099027302081892, + "loss": 1.6523, + "step": 3050 + }, + { + "epoch": 0.27, + "learning_rate": 0.001709699763671943, + "loss": 1.6426, + "step": 3051 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017094967382130324, + "loss": 1.6582, + "step": 3052 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017092936538483131, + "loss": 1.6582, + "step": 3053 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017090905105946463, + "loss": 1.6152, + "step": 3054 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017088873084688983, + "loss": 1.8848, + "step": 3055 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017086840474879392, + "loss": 1.543, + "step": 3056 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017084807276686452, + "loss": 1.6309, + "step": 3057 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017082773490278964, + "loss": 1.7285, + "step": 3058 + }, + { + "epoch": 0.27, + "learning_rate": 0.001708073911582579, + "loss": 1.6973, + "step": 3059 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017078704153495826, + "loss": 1.7461, + "step": 3060 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017076668603458029, + "loss": 1.6523, + "step": 3061 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017074632465881399, + "loss": 1.668, + "step": 3062 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017072595740934987, + "loss": 1.7168, + "step": 3063 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017070558428787885, + "loss": 1.6934, + "step": 3064 + }, + { + "epoch": 0.27, + "learning_rate": 0.001706852052960925, + "loss": 1.5742, + "step": 3065 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017066482043568275, + "loss": 1.5547, + "step": 3066 + }, + { + "epoch": 0.27, + "learning_rate": 0.00170644429708342, + "loss": 1.5059, + "step": 3067 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017062403311576323, + "loss": 1.6973, + "step": 3068 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017060363065963986, + "loss": 1.6094, + "step": 3069 + }, + { + "epoch": 0.27, + "learning_rate": 0.001705832223416658, + "loss": 1.6914, + "step": 3070 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017056280816353541, + "loss": 1.6465, + "step": 3071 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017054238812694365, + "loss": 1.7559, + "step": 3072 + }, + { + "epoch": 0.27, + "learning_rate": 0.001705219622335858, + "loss": 1.6797, + "step": 3073 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017050153048515778, + "loss": 1.707, + "step": 3074 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017048109288335586, + "loss": 1.7598, + "step": 3075 + }, + { + "epoch": 0.27, + "learning_rate": 0.00170460649429877, + "loss": 1.6797, + "step": 3076 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017044020012641835, + "loss": 1.5391, + "step": 3077 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017041974497467782, + "loss": 1.5215, + "step": 3078 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017039928397635368, + "loss": 1.5742, + "step": 3079 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017037881713314468, + "loss": 1.6543, + "step": 3080 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017035834444675002, + "loss": 1.6953, + "step": 3081 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017033786591886956, + "loss": 1.582, + "step": 3082 + }, + { + "epoch": 0.27, + "learning_rate": 0.001703173815512034, + "loss": 1.5625, + "step": 3083 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017029689134545236, + "loss": 1.6465, + "step": 3084 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017027639530331755, + "loss": 1.707, + "step": 3085 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017025589342650074, + "loss": 1.6113, + "step": 3086 + }, + { + "epoch": 0.27, + "learning_rate": 0.00170235385716704, + "loss": 1.6523, + "step": 3087 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017021487217563002, + "loss": 1.582, + "step": 3088 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017019435280498194, + "loss": 1.7754, + "step": 3089 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017017382760646336, + "loss": 1.6973, + "step": 3090 + }, + { + "epoch": 0.27, + "learning_rate": 0.0017015329658177838, + "loss": 1.541, + "step": 3091 + }, + { + "epoch": 0.28, + "learning_rate": 0.001701327597326316, + "loss": 1.6641, + "step": 3092 + }, + { + "epoch": 0.28, + "learning_rate": 0.0017011221706072807, + "loss": 1.5254, + "step": 3093 + }, + { + "epoch": 0.28, + "learning_rate": 0.0017009166856777336, + "loss": 1.8008, + "step": 3094 + }, + { + "epoch": 0.28, + "learning_rate": 0.0017007111425547349, + "loss": 1.7422, + "step": 3095 + }, + { + "epoch": 0.28, + "learning_rate": 0.0017005055412553497, + "loss": 1.6172, + "step": 3096 + }, + { + "epoch": 0.28, + "learning_rate": 0.001700299881796648, + "loss": 1.668, + "step": 3097 + }, + { + "epoch": 0.28, + "learning_rate": 0.001700094164195705, + "loss": 1.5586, + "step": 3098 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016998883884696, + "loss": 1.5664, + "step": 3099 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016996825546354173, + "loss": 1.5527, + "step": 3100 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016994766627102466, + "loss": 1.5371, + "step": 3101 + }, + { + "epoch": 0.28, + "learning_rate": 0.001699270712711182, + "loss": 1.6543, + "step": 3102 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016990647046553225, + "loss": 1.6738, + "step": 3103 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016988586385597716, + "loss": 1.5449, + "step": 3104 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016986525144416379, + "loss": 1.6621, + "step": 3105 + }, + { + "epoch": 0.28, + "learning_rate": 0.001698446332318035, + "loss": 1.6797, + "step": 3106 + }, + { + "epoch": 0.28, + "learning_rate": 0.001698240092206081, + "loss": 1.5859, + "step": 3107 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016980337941228992, + "loss": 1.6484, + "step": 3108 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016978274380856169, + "loss": 1.7227, + "step": 3109 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016976210241113675, + "loss": 1.6953, + "step": 3110 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016974145522172877, + "loss": 1.6445, + "step": 3111 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016972080224205205, + "loss": 1.6914, + "step": 3112 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016970014347382123, + "loss": 1.6602, + "step": 3113 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016967947891875156, + "loss": 1.5898, + "step": 3114 + }, + { + "epoch": 0.28, + "learning_rate": 0.001696588085785587, + "loss": 1.5488, + "step": 3115 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016963813245495878, + "loss": 1.6484, + "step": 3116 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016961745054966842, + "loss": 1.5664, + "step": 3117 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016959676286440475, + "loss": 1.6953, + "step": 3118 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016957606940088536, + "loss": 1.709, + "step": 3119 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016955537016082832, + "loss": 1.8027, + "step": 3120 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016953466514595217, + "loss": 1.5625, + "step": 3121 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016951395435797596, + "loss": 1.6113, + "step": 3122 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016949323779861915, + "loss": 1.6152, + "step": 3123 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016947251546960184, + "loss": 1.7695, + "step": 3124 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016945178737264436, + "loss": 1.6816, + "step": 3125 + }, + { + "epoch": 0.28, + "learning_rate": 0.001694310535094677, + "loss": 1.6836, + "step": 3126 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016941031388179332, + "loss": 1.7324, + "step": 3127 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016938956849134313, + "loss": 1.7773, + "step": 3128 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016936881733983947, + "loss": 1.6387, + "step": 3129 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016934806042900518, + "loss": 1.6621, + "step": 3130 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016932729776056366, + "loss": 1.6406, + "step": 3131 + }, + { + "epoch": 0.28, + "learning_rate": 0.001693065293362387, + "loss": 1.6934, + "step": 3132 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016928575515775459, + "loss": 1.6152, + "step": 3133 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016926497522683607, + "loss": 1.6309, + "step": 3134 + }, + { + "epoch": 0.28, + "learning_rate": 0.001692441895452085, + "loss": 1.623, + "step": 3135 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016922339811459746, + "loss": 1.6562, + "step": 3136 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016920260093672927, + "loss": 1.5664, + "step": 3137 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016918179801333052, + "loss": 1.6973, + "step": 3138 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016916098934612843, + "loss": 1.6484, + "step": 3139 + }, + { + "epoch": 0.28, + "learning_rate": 0.001691401749368506, + "loss": 1.6387, + "step": 3140 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016911935478722518, + "loss": 1.625, + "step": 3141 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016909852889898072, + "loss": 1.7637, + "step": 3142 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016907769727384633, + "loss": 1.6621, + "step": 3143 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016905685991355148, + "loss": 1.7578, + "step": 3144 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016903601681982627, + "loss": 1.6895, + "step": 3145 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016901516799440112, + "loss": 1.623, + "step": 3146 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016899431343900706, + "loss": 1.7363, + "step": 3147 + }, + { + "epoch": 0.28, + "learning_rate": 0.001689734531553755, + "loss": 1.7598, + "step": 3148 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016895258714523839, + "loss": 1.6719, + "step": 3149 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016893171541032806, + "loss": 1.6836, + "step": 3150 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016891083795237745, + "loss": 1.5703, + "step": 3151 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016888995477311985, + "loss": 1.625, + "step": 3152 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016886906587428917, + "loss": 1.5938, + "step": 3153 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016884817125761957, + "loss": 1.5156, + "step": 3154 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016882727092484597, + "loss": 1.7988, + "step": 3155 + }, + { + "epoch": 0.28, + "learning_rate": 0.001688063648777035, + "loss": 1.5801, + "step": 3156 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016878545311792795, + "loss": 1.6348, + "step": 3157 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016876453564725546, + "loss": 1.7324, + "step": 3158 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016874361246742276, + "loss": 1.7207, + "step": 3159 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016872268358016692, + "loss": 1.6328, + "step": 3160 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016870174898722563, + "loss": 1.6465, + "step": 3161 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016868080869033692, + "loss": 1.7676, + "step": 3162 + }, + { + "epoch": 0.28, + "learning_rate": 0.001686598626912394, + "loss": 1.6035, + "step": 3163 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016863891099167209, + "loss": 1.6309, + "step": 3164 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016861795359337445, + "loss": 1.5879, + "step": 3165 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016859699049808655, + "loss": 1.6348, + "step": 3166 + }, + { + "epoch": 0.28, + "learning_rate": 0.001685760217075488, + "loss": 1.5234, + "step": 3167 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016855504722350216, + "loss": 1.7188, + "step": 3168 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016853406704768798, + "loss": 1.6582, + "step": 3169 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016851308118184813, + "loss": 1.6895, + "step": 3170 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016849208962772502, + "loss": 1.5781, + "step": 3171 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016847109238706142, + "loss": 1.6211, + "step": 3172 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016845008946160067, + "loss": 1.707, + "step": 3173 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016842908085308648, + "loss": 1.6797, + "step": 3174 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016840806656326312, + "loss": 1.7285, + "step": 3175 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016838704659387527, + "loss": 1.6543, + "step": 3176 + }, + { + "epoch": 0.28, + "learning_rate": 0.001683660209466681, + "loss": 1.5859, + "step": 3177 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016834498962338734, + "loss": 1.625, + "step": 3178 + }, + { + "epoch": 0.28, + "learning_rate": 0.00168323952625779, + "loss": 1.6875, + "step": 3179 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016830290995558973, + "loss": 1.6309, + "step": 3180 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016828186161456662, + "loss": 1.5742, + "step": 3181 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016826080760445712, + "loss": 1.666, + "step": 3182 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016823974792700934, + "loss": 1.6016, + "step": 3183 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016821868258397163, + "loss": 1.4922, + "step": 3184 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016819761157709308, + "loss": 1.6445, + "step": 3185 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016817653490812296, + "loss": 1.5918, + "step": 3186 + }, + { + "epoch": 0.28, + "learning_rate": 0.001681554525788113, + "loss": 1.6914, + "step": 3187 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016813436459090835, + "loss": 1.6797, + "step": 3188 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016811327094616497, + "loss": 1.5879, + "step": 3189 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016809217164633244, + "loss": 1.5645, + "step": 3190 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016807106669316254, + "loss": 1.584, + "step": 3191 + }, + { + "epoch": 0.28, + "learning_rate": 0.001680499560884075, + "loss": 1.6191, + "step": 3192 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016802883983382006, + "loss": 1.6602, + "step": 3193 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016800771793115332, + "loss": 1.6562, + "step": 3194 + }, + { + "epoch": 0.28, + "learning_rate": 0.00167986590382161, + "loss": 1.6523, + "step": 3195 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016796545718859713, + "loss": 1.6797, + "step": 3196 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016794431835221633, + "loss": 1.6484, + "step": 3197 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016792317387477366, + "loss": 1.5918, + "step": 3198 + }, + { + "epoch": 0.28, + "learning_rate": 0.001679020237580246, + "loss": 1.6523, + "step": 3199 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016788086800372517, + "loss": 1.6602, + "step": 3200 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016785970661363182, + "loss": 1.6758, + "step": 3201 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016783853958950142, + "loss": 1.6367, + "step": 3202 + }, + { + "epoch": 0.28, + "learning_rate": 0.0016781736693309145, + "loss": 1.6797, + "step": 3203 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016779618864615964, + "loss": 1.6406, + "step": 3204 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016777500473046443, + "loss": 1.5996, + "step": 3205 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016775381518776453, + "loss": 1.6621, + "step": 3206 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016773262001981925, + "loss": 1.5996, + "step": 3207 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016771141922838826, + "loss": 1.6836, + "step": 3208 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016769021281523183, + "loss": 1.6875, + "step": 3209 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016766900078211053, + "loss": 1.6406, + "step": 3210 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016764778313078552, + "loss": 1.6445, + "step": 3211 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016762655986301842, + "loss": 1.7148, + "step": 3212 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016760533098057124, + "loss": 1.6406, + "step": 3213 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016758409648520657, + "loss": 1.6074, + "step": 3214 + }, + { + "epoch": 0.29, + "learning_rate": 0.001675628563786873, + "loss": 1.7148, + "step": 3215 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016754161066277699, + "loss": 1.7207, + "step": 3216 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016752035933923948, + "loss": 1.6562, + "step": 3217 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016749910240983921, + "loss": 1.6348, + "step": 3218 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016747783987634097, + "loss": 1.6621, + "step": 3219 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016745657174051015, + "loss": 1.502, + "step": 3220 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016743529800411251, + "loss": 1.6094, + "step": 3221 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016741401866891424, + "loss": 1.6758, + "step": 3222 + }, + { + "epoch": 0.29, + "learning_rate": 0.001673927337366821, + "loss": 1.6367, + "step": 3223 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016737144320918332, + "loss": 1.5957, + "step": 3224 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016735014708818543, + "loss": 1.625, + "step": 3225 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016732884537545662, + "loss": 1.5195, + "step": 3226 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016730753807276545, + "loss": 1.6465, + "step": 3227 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016728622518188093, + "loss": 1.5957, + "step": 3228 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016726490670457253, + "loss": 1.627, + "step": 3229 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016724358264261026, + "loss": 1.6426, + "step": 3230 + }, + { + "epoch": 0.29, + "learning_rate": 0.001672222529977645, + "loss": 1.6406, + "step": 3231 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016720091777180622, + "loss": 1.543, + "step": 3232 + }, + { + "epoch": 0.29, + "learning_rate": 0.001671795769665067, + "loss": 1.6777, + "step": 3233 + }, + { + "epoch": 0.29, + "learning_rate": 0.001671582305836378, + "loss": 1.5801, + "step": 3234 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016713687862497176, + "loss": 1.584, + "step": 3235 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016711552109228132, + "loss": 1.6406, + "step": 3236 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016709415798733974, + "loss": 1.7461, + "step": 3237 + }, + { + "epoch": 0.29, + "learning_rate": 0.001670727893119206, + "loss": 1.5996, + "step": 3238 + }, + { + "epoch": 0.29, + "learning_rate": 0.001670514150677981, + "loss": 1.5957, + "step": 3239 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016703003525674683, + "loss": 1.7148, + "step": 3240 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016700864988054183, + "loss": 1.7656, + "step": 3241 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016698725894095858, + "loss": 1.5605, + "step": 3242 + }, + { + "epoch": 0.29, + "learning_rate": 0.001669658624397731, + "loss": 1.5547, + "step": 3243 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016694446037876184, + "loss": 1.7168, + "step": 3244 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016692305275970165, + "loss": 1.7715, + "step": 3245 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016690163958436993, + "loss": 1.5859, + "step": 3246 + }, + { + "epoch": 0.29, + "learning_rate": 0.001668802208545445, + "loss": 1.5527, + "step": 3247 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016685879657200365, + "loss": 1.7656, + "step": 3248 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016683736673852611, + "loss": 1.6758, + "step": 3249 + }, + { + "epoch": 0.29, + "learning_rate": 0.001668159313558911, + "loss": 1.6348, + "step": 3250 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016679449042587829, + "loss": 1.7051, + "step": 3251 + }, + { + "epoch": 0.29, + "learning_rate": 0.001667730439502678, + "loss": 1.7344, + "step": 3252 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016675159193084022, + "loss": 1.7578, + "step": 3253 + }, + { + "epoch": 0.29, + "learning_rate": 0.001667301343693766, + "loss": 1.6074, + "step": 3254 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016670867126765843, + "loss": 1.6055, + "step": 3255 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016668720262746773, + "loss": 1.5449, + "step": 3256 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016666572845058687, + "loss": 1.623, + "step": 3257 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016664424873879878, + "loss": 1.5352, + "step": 3258 + }, + { + "epoch": 0.29, + "learning_rate": 0.001666227634938868, + "loss": 1.6113, + "step": 3259 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016660127271763473, + "loss": 1.6543, + "step": 3260 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016657977641182681, + "loss": 1.5293, + "step": 3261 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016655827457824784, + "loss": 1.6406, + "step": 3262 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016653676721868293, + "loss": 1.5957, + "step": 3263 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016651525433491778, + "loss": 1.709, + "step": 3264 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016649373592873841, + "loss": 1.7031, + "step": 3265 + }, + { + "epoch": 0.29, + "learning_rate": 0.001664722120019315, + "loss": 1.5469, + "step": 3266 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016645068255628397, + "loss": 1.5469, + "step": 3267 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016642914759358333, + "loss": 1.8086, + "step": 3268 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016640760711561758, + "loss": 1.6914, + "step": 3269 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016638606112417498, + "loss": 1.6387, + "step": 3270 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016636450962104448, + "loss": 1.7891, + "step": 3271 + }, + { + "epoch": 0.29, + "learning_rate": 0.001663429526080154, + "loss": 1.5469, + "step": 3272 + }, + { + "epoch": 0.29, + "learning_rate": 0.001663213900868774, + "loss": 1.5273, + "step": 3273 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016629982205942082, + "loss": 1.5879, + "step": 3274 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016627824852743628, + "loss": 1.6426, + "step": 3275 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016625666949271491, + "loss": 1.7324, + "step": 3276 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016623508495704838, + "loss": 1.5469, + "step": 3277 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016621349492222863, + "loss": 1.6426, + "step": 3278 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016619189939004824, + "loss": 1.5547, + "step": 3279 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016617029836230018, + "loss": 1.6113, + "step": 3280 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016614869184077784, + "loss": 1.5918, + "step": 3281 + }, + { + "epoch": 0.29, + "learning_rate": 0.001661270798272751, + "loss": 1.5449, + "step": 3282 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016610546232358628, + "loss": 1.6914, + "step": 3283 + }, + { + "epoch": 0.29, + "learning_rate": 0.001660838393315062, + "loss": 1.707, + "step": 3284 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016606221085283008, + "loss": 1.7773, + "step": 3285 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016604057688935365, + "loss": 1.6836, + "step": 3286 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016601893744287307, + "loss": 1.7129, + "step": 3287 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016599729251518488, + "loss": 1.582, + "step": 3288 + }, + { + "epoch": 0.29, + "learning_rate": 0.001659756421080862, + "loss": 1.5547, + "step": 3289 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016595398622337454, + "loss": 1.6738, + "step": 3290 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016593232486284788, + "loss": 1.5566, + "step": 3291 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016591065802830465, + "loss": 1.6719, + "step": 3292 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016588898572154373, + "loss": 1.6055, + "step": 3293 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016586730794436446, + "loss": 1.6445, + "step": 3294 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016584562469856663, + "loss": 1.584, + "step": 3295 + }, + { + "epoch": 0.29, + "learning_rate": 0.001658239359859505, + "loss": 1.627, + "step": 3296 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016580224180831674, + "loss": 1.6328, + "step": 3297 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016578054216746653, + "loss": 1.6523, + "step": 3298 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016575883706520145, + "loss": 1.7461, + "step": 3299 + }, + { + "epoch": 0.29, + "learning_rate": 0.001657371265033236, + "loss": 1.7617, + "step": 3300 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016571541048363548, + "loss": 1.6328, + "step": 3301 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016569368900794005, + "loss": 1.4863, + "step": 3302 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016567196207804074, + "loss": 1.7305, + "step": 3303 + }, + { + "epoch": 0.29, + "learning_rate": 0.001656502296957414, + "loss": 1.7031, + "step": 3304 + }, + { + "epoch": 0.29, + "learning_rate": 0.001656284918628464, + "loss": 1.6211, + "step": 3305 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016560674858116045, + "loss": 1.582, + "step": 3306 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016558499985248885, + "loss": 1.5469, + "step": 3307 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016556324567863726, + "loss": 1.6816, + "step": 3308 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016554148606141182, + "loss": 1.6289, + "step": 3309 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016551972100261908, + "loss": 1.5293, + "step": 3310 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016549795050406616, + "loss": 1.6289, + "step": 3311 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016547617456756046, + "loss": 1.6172, + "step": 3312 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016545439319490996, + "loss": 1.5859, + "step": 3313 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016543260638792305, + "loss": 1.6289, + "step": 3314 + }, + { + "epoch": 0.29, + "learning_rate": 0.0016541081414840862, + "loss": 1.5586, + "step": 3315 + }, + { + "epoch": 0.29, + "learning_rate": 0.001653890164781759, + "loss": 1.6738, + "step": 3316 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016536721337903466, + "loss": 1.6973, + "step": 3317 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016534540485279511, + "loss": 1.5996, + "step": 3318 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016532359090126792, + "loss": 1.6074, + "step": 3319 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016530177152626411, + "loss": 1.6836, + "step": 3320 + }, + { + "epoch": 0.3, + "learning_rate": 0.001652799467295953, + "loss": 1.6816, + "step": 3321 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016525811651307346, + "loss": 1.5684, + "step": 3322 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016523628087851104, + "loss": 1.5977, + "step": 3323 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016521443982772093, + "loss": 1.6875, + "step": 3324 + }, + { + "epoch": 0.3, + "learning_rate": 0.001651925933625165, + "loss": 1.7793, + "step": 3325 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016517074148471153, + "loss": 1.8438, + "step": 3326 + }, + { + "epoch": 0.3, + "learning_rate": 0.001651488841961203, + "loss": 1.5762, + "step": 3327 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016512702149855747, + "loss": 1.6094, + "step": 3328 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016510515339383822, + "loss": 1.5762, + "step": 3329 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016508327988377807, + "loss": 1.5703, + "step": 3330 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016506140097019319, + "loss": 1.7266, + "step": 3331 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016503951665489992, + "loss": 1.6348, + "step": 3332 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016501762693971532, + "loss": 1.6309, + "step": 3333 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016499573182645673, + "loss": 1.5352, + "step": 3334 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016497383131694199, + "loss": 1.666, + "step": 3335 + }, + { + "epoch": 0.3, + "learning_rate": 0.001649519254129894, + "loss": 1.6602, + "step": 3336 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016493001411641765, + "loss": 1.584, + "step": 3337 + }, + { + "epoch": 0.3, + "learning_rate": 0.00164908097429046, + "loss": 1.627, + "step": 3338 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016488617535269402, + "loss": 1.8262, + "step": 3339 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016486424788918176, + "loss": 1.582, + "step": 3340 + }, + { + "epoch": 0.3, + "learning_rate": 0.001648423150403298, + "loss": 1.6641, + "step": 3341 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016482037680795908, + "loss": 1.748, + "step": 3342 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016479843319389103, + "loss": 1.5566, + "step": 3343 + }, + { + "epoch": 0.3, + "learning_rate": 0.001647764841999475, + "loss": 1.6719, + "step": 3344 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016475452982795086, + "loss": 1.498, + "step": 3345 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016473257007972376, + "loss": 1.6211, + "step": 3346 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016471060495708947, + "loss": 1.625, + "step": 3347 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016468863446187166, + "loss": 1.8262, + "step": 3348 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016466665859589437, + "loss": 1.6992, + "step": 3349 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016464467736098214, + "loss": 1.6426, + "step": 3350 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016462269075896002, + "loss": 1.6992, + "step": 3351 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016460069879165337, + "loss": 1.6113, + "step": 3352 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016457870146088813, + "loss": 1.7422, + "step": 3353 + }, + { + "epoch": 0.3, + "learning_rate": 0.001645566987684906, + "loss": 1.6562, + "step": 3354 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016453469071628754, + "loss": 1.7637, + "step": 3355 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016451267730610618, + "loss": 1.7246, + "step": 3356 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016449065853977414, + "loss": 1.6172, + "step": 3357 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016446863441911957, + "loss": 1.541, + "step": 3358 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016444660494597102, + "loss": 1.6367, + "step": 3359 + }, + { + "epoch": 0.3, + "learning_rate": 0.001644245701221574, + "loss": 1.6914, + "step": 3360 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016440252994950825, + "loss": 1.6992, + "step": 3361 + }, + { + "epoch": 0.3, + "learning_rate": 0.001643804844298534, + "loss": 1.6875, + "step": 3362 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016435843356502318, + "loss": 1.7305, + "step": 3363 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016433637735684835, + "loss": 1.6836, + "step": 3364 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016431431580716015, + "loss": 1.6172, + "step": 3365 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016429224891779022, + "loss": 1.6465, + "step": 3366 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016427017669057064, + "loss": 1.6367, + "step": 3367 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016424809912733397, + "loss": 1.5098, + "step": 3368 + }, + { + "epoch": 0.3, + "learning_rate": 0.001642260162299132, + "loss": 1.582, + "step": 3369 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016420392800014173, + "loss": 1.5723, + "step": 3370 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016418183443985348, + "loss": 1.6484, + "step": 3371 + }, + { + "epoch": 0.3, + "learning_rate": 0.001641597355508827, + "loss": 1.5898, + "step": 3372 + }, + { + "epoch": 0.3, + "learning_rate": 0.001641376313350642, + "loss": 1.5508, + "step": 3373 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016411552179423315, + "loss": 1.6426, + "step": 3374 + }, + { + "epoch": 0.3, + "learning_rate": 0.001640934069302252, + "loss": 1.6445, + "step": 3375 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016407128674487642, + "loss": 1.7031, + "step": 3376 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016404916124002336, + "loss": 1.7188, + "step": 3377 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016402703041750292, + "loss": 1.7344, + "step": 3378 + }, + { + "epoch": 0.3, + "learning_rate": 0.001640048942791526, + "loss": 1.6445, + "step": 3379 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016398275282681017, + "loss": 1.5488, + "step": 3380 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016396060606231396, + "loss": 1.6895, + "step": 3381 + }, + { + "epoch": 0.3, + "learning_rate": 0.001639384539875027, + "loss": 1.5977, + "step": 3382 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016391629660421553, + "loss": 1.6914, + "step": 3383 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016389413391429209, + "loss": 1.6562, + "step": 3384 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016387196591957243, + "loss": 1.5391, + "step": 3385 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016384979262189705, + "loss": 1.5137, + "step": 3386 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016382761402310684, + "loss": 1.791, + "step": 3387 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016380543012504325, + "loss": 1.6738, + "step": 3388 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016378324092954802, + "loss": 1.6133, + "step": 3389 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016376104643846343, + "loss": 1.6504, + "step": 3390 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016373884665363218, + "loss": 1.7188, + "step": 3391 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016371664157689737, + "loss": 1.6973, + "step": 3392 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016369443121010262, + "loss": 1.627, + "step": 3393 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016367221555509191, + "loss": 1.6172, + "step": 3394 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016364999461370969, + "loss": 1.6816, + "step": 3395 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016362776838780087, + "loss": 1.6016, + "step": 3396 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016360553687921073, + "loss": 1.5723, + "step": 3397 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016358330008978512, + "loss": 1.5918, + "step": 3398 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016356105802137018, + "loss": 1.5879, + "step": 3399 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016353881067581255, + "loss": 1.752, + "step": 3400 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016351655805495933, + "loss": 1.7148, + "step": 3401 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016349430016065806, + "loss": 1.6152, + "step": 3402 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016347203699475664, + "loss": 1.625, + "step": 3403 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016344976855910353, + "loss": 1.6602, + "step": 3404 + }, + { + "epoch": 0.3, + "learning_rate": 0.001634274948555475, + "loss": 1.6074, + "step": 3405 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016340521588593785, + "loss": 1.7559, + "step": 3406 + }, + { + "epoch": 0.3, + "learning_rate": 0.001633829316521243, + "loss": 1.6367, + "step": 3407 + }, + { + "epoch": 0.3, + "learning_rate": 0.00163360642155957, + "loss": 1.627, + "step": 3408 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016333834739928647, + "loss": 1.6641, + "step": 3409 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016331604738396377, + "loss": 1.5762, + "step": 3410 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016329374211184037, + "loss": 1.7129, + "step": 3411 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016327143158476813, + "loss": 1.6992, + "step": 3412 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016324911580459938, + "loss": 1.6367, + "step": 3413 + }, + { + "epoch": 0.3, + "learning_rate": 0.001632267947731869, + "loss": 1.6992, + "step": 3414 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016320446849238386, + "loss": 1.668, + "step": 3415 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016318213696404397, + "loss": 1.7031, + "step": 3416 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016315980019002117, + "loss": 1.6191, + "step": 3417 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016313745817217006, + "loss": 1.8066, + "step": 3418 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016311511091234559, + "loss": 1.6602, + "step": 3419 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016309275841240307, + "loss": 1.7207, + "step": 3420 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016307040067419837, + "loss": 1.6758, + "step": 3421 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016304803769958771, + "loss": 1.5527, + "step": 3422 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016302566949042776, + "loss": 1.6777, + "step": 3423 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016300329604857567, + "loss": 1.7305, + "step": 3424 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016298091737588897, + "loss": 1.623, + "step": 3425 + }, + { + "epoch": 0.3, + "learning_rate": 0.0016295853347422563, + "loss": 1.5898, + "step": 3426 + }, + { + "epoch": 0.3, + "learning_rate": 0.001629361443454441, + "loss": 1.6191, + "step": 3427 + }, + { + "epoch": 0.3, + "learning_rate": 0.001629137499914032, + "loss": 1.6699, + "step": 3428 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016289135041396223, + "loss": 1.6992, + "step": 3429 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016286894561498091, + "loss": 1.627, + "step": 3430 + }, + { + "epoch": 0.31, + "learning_rate": 0.001628465355963194, + "loss": 1.668, + "step": 3431 + }, + { + "epoch": 0.31, + "learning_rate": 0.001628241203598383, + "loss": 1.7109, + "step": 3432 + }, + { + "epoch": 0.31, + "learning_rate": 0.001628016999073986, + "loss": 1.6855, + "step": 3433 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016277927424086177, + "loss": 1.8164, + "step": 3434 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016275684336208966, + "loss": 1.6133, + "step": 3435 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016273440727294463, + "loss": 1.584, + "step": 3436 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016271196597528945, + "loss": 1.6367, + "step": 3437 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016268951947098725, + "loss": 1.5664, + "step": 3438 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016266706776190169, + "loss": 1.625, + "step": 3439 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016264461084989678, + "loss": 1.5996, + "step": 3440 + }, + { + "epoch": 0.31, + "learning_rate": 0.00162622148736837, + "loss": 1.5566, + "step": 3441 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016259968142458732, + "loss": 1.7051, + "step": 3442 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016257720891501301, + "loss": 1.7461, + "step": 3443 + }, + { + "epoch": 0.31, + "learning_rate": 0.001625547312099799, + "loss": 1.6152, + "step": 3444 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016253224831135418, + "loss": 1.7051, + "step": 3445 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016250976022100247, + "loss": 1.6562, + "step": 3446 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016248726694079187, + "loss": 1.6289, + "step": 3447 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016246476847258987, + "loss": 1.6289, + "step": 3448 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016244226481826436, + "loss": 1.6875, + "step": 3449 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016241975597968375, + "loss": 1.5879, + "step": 3450 + }, + { + "epoch": 0.31, + "learning_rate": 0.001623972419587168, + "loss": 1.5625, + "step": 3451 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016237472275723278, + "loss": 1.6328, + "step": 3452 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016235219837710127, + "loss": 1.6504, + "step": 3453 + }, + { + "epoch": 0.31, + "learning_rate": 0.001623296688201924, + "loss": 1.6016, + "step": 3454 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016230713408837668, + "loss": 1.5527, + "step": 3455 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016228459418352503, + "loss": 1.7422, + "step": 3456 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016226204910750883, + "loss": 1.6934, + "step": 3457 + }, + { + "epoch": 0.31, + "learning_rate": 0.001622394988621999, + "loss": 1.6074, + "step": 3458 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016221694344947042, + "loss": 1.5273, + "step": 3459 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016219438287119311, + "loss": 1.584, + "step": 3460 + }, + { + "epoch": 0.31, + "learning_rate": 0.00162171817129241, + "loss": 1.6504, + "step": 3461 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016214924622548767, + "loss": 1.6797, + "step": 3462 + }, + { + "epoch": 0.31, + "learning_rate": 0.00162126670161807, + "loss": 1.7441, + "step": 3463 + }, + { + "epoch": 0.31, + "learning_rate": 0.001621040889400734, + "loss": 1.6973, + "step": 3464 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016208150256216164, + "loss": 1.543, + "step": 3465 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016205891102994698, + "loss": 1.5859, + "step": 3466 + }, + { + "epoch": 0.31, + "learning_rate": 0.001620363143453051, + "loss": 1.7031, + "step": 3467 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016201371251011198, + "loss": 1.5586, + "step": 3468 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016199110552624428, + "loss": 1.6484, + "step": 3469 + }, + { + "epoch": 0.31, + "learning_rate": 0.001619684933955788, + "loss": 1.6641, + "step": 3470 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016194587611999302, + "loss": 1.7031, + "step": 3471 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016192325370136462, + "loss": 1.6328, + "step": 3472 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016190062614157195, + "loss": 1.6777, + "step": 3473 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016187799344249357, + "loss": 1.5938, + "step": 3474 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016185535560600856, + "loss": 1.5977, + "step": 3475 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016183271263399647, + "loss": 1.668, + "step": 3476 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016181006452833718, + "loss": 1.5215, + "step": 3477 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016178741129091105, + "loss": 1.6504, + "step": 3478 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016176475292359889, + "loss": 1.5801, + "step": 3479 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016174208942828186, + "loss": 1.5605, + "step": 3480 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016171942080684166, + "loss": 1.8066, + "step": 3481 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016169674706116028, + "loss": 1.6035, + "step": 3482 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016167406819312022, + "loss": 1.6914, + "step": 3483 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016165138420460444, + "loss": 1.5898, + "step": 3484 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016162869509749617, + "loss": 1.6133, + "step": 3485 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016160600087367926, + "loss": 1.7148, + "step": 3486 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016158330153503786, + "loss": 1.5234, + "step": 3487 + }, + { + "epoch": 0.31, + "learning_rate": 0.001615605970834566, + "loss": 1.5586, + "step": 3488 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016153788752082045, + "loss": 1.6465, + "step": 3489 + }, + { + "epoch": 0.31, + "learning_rate": 0.001615151728490149, + "loss": 1.7305, + "step": 3490 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016149245306992588, + "loss": 1.7637, + "step": 3491 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016146972818543964, + "loss": 1.7734, + "step": 3492 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016144699819744292, + "loss": 1.7422, + "step": 3493 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016142426310782284, + "loss": 1.6816, + "step": 3494 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016140152291846703, + "loss": 1.5332, + "step": 3495 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016137877763126348, + "loss": 1.5098, + "step": 3496 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016135602724810059, + "loss": 1.541, + "step": 3497 + }, + { + "epoch": 0.31, + "learning_rate": 0.001613332717708672, + "loss": 1.6016, + "step": 3498 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016131051120145262, + "loss": 1.6797, + "step": 3499 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016128774554174648, + "loss": 1.6387, + "step": 3500 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016126497479363895, + "loss": 1.6348, + "step": 3501 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016124219895902057, + "loss": 1.6992, + "step": 3502 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016121941803978225, + "loss": 1.6172, + "step": 3503 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016119663203781537, + "loss": 1.582, + "step": 3504 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016117384095501177, + "loss": 1.6074, + "step": 3505 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016115104479326368, + "loss": 1.75, + "step": 3506 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016112824355446374, + "loss": 1.5352, + "step": 3507 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016110543724050497, + "loss": 1.623, + "step": 3508 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016108262585328092, + "loss": 1.7344, + "step": 3509 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016105980939468546, + "loss": 1.6328, + "step": 3510 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016103698786661294, + "loss": 1.6348, + "step": 3511 + }, + { + "epoch": 0.31, + "learning_rate": 0.001610141612709581, + "loss": 1.8145, + "step": 3512 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016099132960961614, + "loss": 1.6641, + "step": 3513 + }, + { + "epoch": 0.31, + "learning_rate": 0.001609684928844826, + "loss": 1.7012, + "step": 3514 + }, + { + "epoch": 0.31, + "learning_rate": 0.001609456510974536, + "loss": 1.6836, + "step": 3515 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016092280425042543, + "loss": 1.5996, + "step": 3516 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016089995234529505, + "loss": 1.7539, + "step": 3517 + }, + { + "epoch": 0.31, + "learning_rate": 0.001608770953839597, + "loss": 1.6211, + "step": 3518 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016085423336831709, + "loss": 1.8418, + "step": 3519 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016083136630026527, + "loss": 1.6191, + "step": 3520 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016080849418170287, + "loss": 1.7012, + "step": 3521 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016078561701452876, + "loss": 1.6699, + "step": 3522 + }, + { + "epoch": 0.31, + "learning_rate": 0.001607627348006424, + "loss": 1.7227, + "step": 3523 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016073984754194348, + "loss": 1.6953, + "step": 3524 + }, + { + "epoch": 0.31, + "learning_rate": 0.001607169552403323, + "loss": 1.6777, + "step": 3525 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016069405789770942, + "loss": 1.7539, + "step": 3526 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016067115551597591, + "loss": 1.6582, + "step": 3527 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016064824809703321, + "loss": 1.75, + "step": 3528 + }, + { + "epoch": 0.31, + "learning_rate": 0.001606253356427833, + "loss": 1.7461, + "step": 3529 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016060241815512835, + "loss": 1.7148, + "step": 3530 + }, + { + "epoch": 0.31, + "learning_rate": 0.001605794956359712, + "loss": 1.7051, + "step": 3531 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016055656808721488, + "loss": 1.6602, + "step": 3532 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016053363551076303, + "loss": 1.6504, + "step": 3533 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016051069790851955, + "loss": 1.459, + "step": 3534 + }, + { + "epoch": 0.31, + "learning_rate": 0.001604877552823889, + "loss": 1.6562, + "step": 3535 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016046480763427583, + "loss": 1.709, + "step": 3536 + }, + { + "epoch": 0.31, + "learning_rate": 0.001604418549660856, + "loss": 1.4922, + "step": 3537 + }, + { + "epoch": 0.31, + "learning_rate": 0.001604188972797238, + "loss": 1.6934, + "step": 3538 + }, + { + "epoch": 0.31, + "learning_rate": 0.0016039593457709655, + "loss": 1.7305, + "step": 3539 + }, + { + "epoch": 0.31, + "learning_rate": 0.001603729668601103, + "loss": 1.5938, + "step": 3540 + }, + { + "epoch": 0.32, + "learning_rate": 0.0016034999413067192, + "loss": 1.6367, + "step": 3541 + }, + { + "epoch": 0.32, + "learning_rate": 0.0016032701639068874, + "loss": 1.6309, + "step": 3542 + }, + { + "epoch": 0.32, + "learning_rate": 0.0016030403364206847, + "loss": 1.6738, + "step": 3543 + }, + { + "epoch": 0.32, + "learning_rate": 0.0016028104588671924, + "loss": 1.5215, + "step": 3544 + }, + { + "epoch": 0.32, + "learning_rate": 0.0016025805312654962, + "loss": 1.6992, + "step": 3545 + }, + { + "epoch": 0.32, + "learning_rate": 0.0016023505536346856, + "loss": 1.5898, + "step": 3546 + }, + { + "epoch": 0.32, + "learning_rate": 0.0016021205259938549, + "loss": 1.6016, + "step": 3547 + }, + { + "epoch": 0.32, + "learning_rate": 0.0016018904483621015, + "loss": 1.6191, + "step": 3548 + }, + { + "epoch": 0.32, + "learning_rate": 0.0016016603207585278, + "loss": 1.7266, + "step": 3549 + }, + { + "epoch": 0.32, + "learning_rate": 0.00160143014320224, + "loss": 1.6738, + "step": 3550 + }, + { + "epoch": 0.32, + "learning_rate": 0.0016011999157123488, + "loss": 1.7129, + "step": 3551 + }, + { + "epoch": 0.32, + "learning_rate": 0.0016009696383079686, + "loss": 1.6953, + "step": 3552 + }, + { + "epoch": 0.32, + "learning_rate": 0.0016007393110082182, + "loss": 1.6289, + "step": 3553 + }, + { + "epoch": 0.32, + "learning_rate": 0.0016005089338322204, + "loss": 1.6758, + "step": 3554 + }, + { + "epoch": 0.32, + "learning_rate": 0.0016002785067991022, + "loss": 1.7129, + "step": 3555 + }, + { + "epoch": 0.32, + "learning_rate": 0.0016000480299279947, + "loss": 1.6328, + "step": 3556 + }, + { + "epoch": 0.32, + "learning_rate": 0.0015998175032380334, + "loss": 1.7402, + "step": 3557 + }, + { + "epoch": 0.32, + "learning_rate": 0.0015995869267483576, + "loss": 1.5156, + "step": 3558 + }, + { + "epoch": 0.32, + "learning_rate": 0.0015993563004781106, + "loss": 1.6035, + "step": 3559 + }, + { + "epoch": 0.32, + "learning_rate": 0.0015991256244464408, + "loss": 1.6934, + "step": 3560 + }, + { + "epoch": 0.32, + "learning_rate": 0.001598894898672499, + "loss": 1.7051, + "step": 3561 + }, + { + "epoch": 0.32, + "learning_rate": 0.0015986641231754421, + "loss": 1.5293, + "step": 3562 + }, + { + "epoch": 0.32, + "learning_rate": 0.00159843329797443, + "loss": 1.5156, + "step": 3563 + }, + { + "epoch": 0.32, + "learning_rate": 0.0015982024230886266, + "loss": 1.5664, + "step": 3564 + }, + { + "epoch": 0.32, + "learning_rate": 0.0015979714985372004, + "loss": 1.7559, + "step": 3565 + }, + { + "epoch": 0.32, + "learning_rate": 0.0015977405243393235, + "loss": 1.6895, + "step": 3566 + }, + { + "epoch": 0.32, + "learning_rate": 0.001597509500514173, + "loss": 1.6348, + "step": 3567 + }, + { + "epoch": 0.32, + "learning_rate": 0.0015972784270809296, + "loss": 1.6055, + "step": 3568 + }, + { + "epoch": 0.32, + "learning_rate": 0.0015970473040587776, + "loss": 1.5996, + "step": 3569 + }, + { + "epoch": 0.32, + "learning_rate": 0.0015968161314669068, + "loss": 1.6934, + "step": 3570 + }, + { + "epoch": 0.32, + "learning_rate": 0.0015965849093245092, + "loss": 1.6191, + "step": 3571 + }, + { + "epoch": 0.32, + "learning_rate": 0.001596353637650783, + "loss": 1.7109, + "step": 3572 + }, + { + "epoch": 0.32, + "learning_rate": 0.0015961223164649286, + "loss": 1.7734, + "step": 3573 + }, + { + "epoch": 0.32, + "learning_rate": 0.0015958909457861519, + "loss": 1.5996, + "step": 3574 + }, + { + "epoch": 0.32, + "learning_rate": 0.001595659525633662, + "loss": 1.6953, + "step": 3575 + }, + { + "epoch": 0.32, + "learning_rate": 0.0015954280560266732, + "loss": 1.6777, + "step": 3576 + }, + { + "epoch": 0.32, + "learning_rate": 0.0015951965369844024, + "loss": 1.5723, + "step": 3577 + }, + { + "epoch": 0.32, + "learning_rate": 0.0015949649685260718, + "loss": 1.752, + "step": 3578 + }, + { + "epoch": 0.32, + "learning_rate": 0.0015947333506709075, + "loss": 1.5547, + "step": 3579 + }, + { + "epoch": 0.32, + "learning_rate": 0.0015945016834381395, + "loss": 1.6367, + "step": 3580 + }, + { + "epoch": 0.32, + "learning_rate": 0.0015942699668470013, + "loss": 1.7715, + "step": 3581 + }, + { + "epoch": 0.32, + "learning_rate": 0.001594038200916732, + "loss": 1.5879, + "step": 3582 + }, + { + "epoch": 0.32, + "learning_rate": 0.0015938063856665732, + "loss": 1.709, + "step": 3583 + }, + { + "epoch": 0.32, + "learning_rate": 0.0015935745211157718, + "loss": 1.6543, + "step": 3584 + }, + { + "epoch": 0.32, + "learning_rate": 0.001593342607283578, + "loss": 1.5723, + "step": 3585 + }, + { + "epoch": 0.32, + "learning_rate": 0.0015931106441892467, + "loss": 1.4922, + "step": 3586 + }, + { + "epoch": 0.32, + "learning_rate": 0.001592878631852036, + "loss": 1.5703, + "step": 3587 + }, + { + "epoch": 0.32, + "learning_rate": 0.0015926465702912093, + "loss": 1.5684, + "step": 3588 + }, + { + "epoch": 0.32, + "learning_rate": 0.0015924144595260332, + "loss": 1.6211, + "step": 3589 + }, + { + "epoch": 0.32, + "learning_rate": 0.0015921822995757786, + "loss": 1.7031, + "step": 3590 + }, + { + "epoch": 0.32, + "learning_rate": 0.0015919500904597206, + "loss": 1.5996, + "step": 3591 + }, + { + "epoch": 0.32, + "learning_rate": 0.0015917178321971383, + "loss": 1.5391, + "step": 3592 + }, + { + "epoch": 0.32, + "learning_rate": 0.001591485524807315, + "loss": 1.6094, + "step": 3593 + }, + { + "epoch": 0.32, + "learning_rate": 0.001591253168309538, + "loss": 1.7207, + "step": 3594 + }, + { + "epoch": 0.32, + "learning_rate": 0.0015910207627230979, + "loss": 1.5332, + "step": 3595 + }, + { + "epoch": 0.32, + "learning_rate": 0.0015907883080672912, + "loss": 1.6074, + "step": 3596 + }, + { + "epoch": 0.32, + "learning_rate": 0.0015905558043614167, + "loss": 1.6426, + "step": 3597 + }, + { + "epoch": 0.32, + "learning_rate": 0.0015903232516247786, + "loss": 1.5781, + "step": 3598 + }, + { + "epoch": 0.32, + "learning_rate": 0.0015900906498766837, + "loss": 1.584, + "step": 3599 + }, + { + "epoch": 0.32, + "learning_rate": 0.0015898579991364442, + "loss": 1.6543, + "step": 3600 + }, + { + "epoch": 0.32, + "learning_rate": 0.0015896252994233757, + "loss": 1.502, + "step": 3601 + }, + { + "epoch": 0.32, + "learning_rate": 0.0015893925507567985, + "loss": 1.6641, + "step": 3602 + }, + { + "epoch": 0.32, + "learning_rate": 0.0015891597531560357, + "loss": 1.6055, + "step": 3603 + }, + { + "epoch": 0.32, + "learning_rate": 0.0015889269066404164, + "loss": 1.7715, + "step": 3604 + }, + { + "epoch": 0.32, + "learning_rate": 0.0015886940112292713, + "loss": 1.6797, + "step": 3605 + }, + { + "epoch": 0.32, + "learning_rate": 0.0015884610669419374, + "loss": 1.6387, + "step": 3606 + }, + { + "epoch": 0.32, + "learning_rate": 0.0015882280737977545, + "loss": 1.5879, + "step": 3607 + }, + { + "epoch": 0.32, + "learning_rate": 0.0015879950318160674, + "loss": 1.5938, + "step": 3608 + }, + { + "epoch": 0.32, + "learning_rate": 0.001587761941016223, + "loss": 1.7051, + "step": 3609 + }, + { + "epoch": 0.32, + "learning_rate": 0.0015875288014175753, + "loss": 1.6094, + "step": 3610 + }, + { + "epoch": 0.32, + "learning_rate": 0.0015872956130394793, + "loss": 1.5488, + "step": 3611 + }, + { + "epoch": 0.32, + "learning_rate": 0.0015870623759012964, + "loss": 1.5703, + "step": 3612 + }, + { + "epoch": 0.32, + "learning_rate": 0.00158682909002239, + "loss": 1.6777, + "step": 3613 + }, + { + "epoch": 0.32, + "learning_rate": 0.0015865957554221297, + "loss": 1.7324, + "step": 3614 + }, + { + "epoch": 0.32, + "learning_rate": 0.0015863623721198872, + "loss": 1.6562, + "step": 3615 + }, + { + "epoch": 0.32, + "learning_rate": 0.00158612894013504, + "loss": 1.6602, + "step": 3616 + }, + { + "epoch": 0.32, + "learning_rate": 0.0015858954594869677, + "loss": 1.5312, + "step": 3617 + }, + { + "epoch": 0.32, + "learning_rate": 0.0015856619301950557, + "loss": 1.5645, + "step": 3618 + }, + { + "epoch": 0.32, + "learning_rate": 0.0015854283522786924, + "loss": 1.627, + "step": 3619 + }, + { + "epoch": 0.32, + "learning_rate": 0.0015851947257572707, + "loss": 1.6582, + "step": 3620 + }, + { + "epoch": 0.32, + "learning_rate": 0.0015849610506501874, + "loss": 1.6152, + "step": 3621 + }, + { + "epoch": 0.32, + "learning_rate": 0.0015847273269768431, + "loss": 1.7637, + "step": 3622 + }, + { + "epoch": 0.32, + "learning_rate": 0.001584493554756643, + "loss": 1.7793, + "step": 3623 + }, + { + "epoch": 0.32, + "learning_rate": 0.0015842597340089957, + "loss": 1.6602, + "step": 3624 + }, + { + "epoch": 0.32, + "learning_rate": 0.0015840258647533143, + "loss": 1.8086, + "step": 3625 + }, + { + "epoch": 0.32, + "learning_rate": 0.0015837919470090159, + "loss": 1.5977, + "step": 3626 + }, + { + "epoch": 0.32, + "learning_rate": 0.0015835579807955207, + "loss": 1.6562, + "step": 3627 + }, + { + "epoch": 0.32, + "learning_rate": 0.0015833239661322547, + "loss": 1.6289, + "step": 3628 + }, + { + "epoch": 0.32, + "learning_rate": 0.001583089903038646, + "loss": 1.541, + "step": 3629 + }, + { + "epoch": 0.32, + "learning_rate": 0.0015828557915341284, + "loss": 1.6602, + "step": 3630 + }, + { + "epoch": 0.32, + "learning_rate": 0.0015826216316381387, + "loss": 1.709, + "step": 3631 + }, + { + "epoch": 0.32, + "learning_rate": 0.0015823874233701178, + "loss": 1.7109, + "step": 3632 + }, + { + "epoch": 0.32, + "learning_rate": 0.0015821531667495109, + "loss": 1.6895, + "step": 3633 + }, + { + "epoch": 0.32, + "learning_rate": 0.0015819188617957672, + "loss": 1.75, + "step": 3634 + }, + { + "epoch": 0.32, + "learning_rate": 0.0015816845085283395, + "loss": 1.4961, + "step": 3635 + }, + { + "epoch": 0.32, + "learning_rate": 0.0015814501069666852, + "loss": 1.6504, + "step": 3636 + }, + { + "epoch": 0.32, + "learning_rate": 0.0015812156571302653, + "loss": 1.7188, + "step": 3637 + }, + { + "epoch": 0.32, + "learning_rate": 0.0015809811590385453, + "loss": 1.6582, + "step": 3638 + }, + { + "epoch": 0.32, + "learning_rate": 0.001580746612710994, + "loss": 1.6719, + "step": 3639 + }, + { + "epoch": 0.32, + "learning_rate": 0.0015805120181670844, + "loss": 1.5156, + "step": 3640 + }, + { + "epoch": 0.32, + "learning_rate": 0.0015802773754262942, + "loss": 1.7012, + "step": 3641 + }, + { + "epoch": 0.32, + "learning_rate": 0.001580042684508104, + "loss": 1.7871, + "step": 3642 + }, + { + "epoch": 0.32, + "learning_rate": 0.0015798079454319995, + "loss": 1.5332, + "step": 3643 + }, + { + "epoch": 0.32, + "learning_rate": 0.0015795731582174692, + "loss": 1.6504, + "step": 3644 + }, + { + "epoch": 0.32, + "learning_rate": 0.001579338322884007, + "loss": 1.5312, + "step": 3645 + }, + { + "epoch": 0.32, + "learning_rate": 0.0015791034394511097, + "loss": 1.6113, + "step": 3646 + }, + { + "epoch": 0.32, + "learning_rate": 0.0015788685079382782, + "loss": 1.6543, + "step": 3647 + }, + { + "epoch": 0.32, + "learning_rate": 0.001578633528365018, + "loss": 1.7012, + "step": 3648 + }, + { + "epoch": 0.32, + "learning_rate": 0.001578398500750838, + "loss": 1.6484, + "step": 3649 + }, + { + "epoch": 0.32, + "learning_rate": 0.0015781634251152514, + "loss": 1.6855, + "step": 3650 + }, + { + "epoch": 0.32, + "learning_rate": 0.0015779283014777756, + "loss": 1.6426, + "step": 3651 + }, + { + "epoch": 0.32, + "learning_rate": 0.001577693129857931, + "loss": 1.7578, + "step": 3652 + }, + { + "epoch": 0.32, + "learning_rate": 0.0015774579102752436, + "loss": 1.7598, + "step": 3653 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015772226427492413, + "loss": 1.5312, + "step": 3654 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015769873272994581, + "loss": 1.7949, + "step": 3655 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015767519639454306, + "loss": 1.7988, + "step": 3656 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015765165527067, + "loss": 1.8379, + "step": 3657 + }, + { + "epoch": 0.33, + "learning_rate": 0.001576281093602811, + "loss": 1.5898, + "step": 3658 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015760455866533128, + "loss": 1.6055, + "step": 3659 + }, + { + "epoch": 0.33, + "learning_rate": 0.001575810031877758, + "loss": 1.7559, + "step": 3660 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015755744292957035, + "loss": 1.6562, + "step": 3661 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015753387789267106, + "loss": 1.5938, + "step": 3662 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015751030807903436, + "loss": 1.666, + "step": 3663 + }, + { + "epoch": 0.33, + "learning_rate": 0.001574867334906172, + "loss": 1.6309, + "step": 3664 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015746315412937678, + "loss": 1.6582, + "step": 3665 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015743956999727078, + "loss": 1.5723, + "step": 3666 + }, + { + "epoch": 0.33, + "learning_rate": 0.001574159810962573, + "loss": 1.6406, + "step": 3667 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015739238742829478, + "loss": 1.6738, + "step": 3668 + }, + { + "epoch": 0.33, + "learning_rate": 0.001573687889953421, + "loss": 1.6387, + "step": 3669 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015734518579935848, + "loss": 1.6289, + "step": 3670 + }, + { + "epoch": 0.33, + "learning_rate": 0.001573215778423036, + "loss": 1.668, + "step": 3671 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015729796512613753, + "loss": 1.6387, + "step": 3672 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015727434765282066, + "loss": 1.5703, + "step": 3673 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015725072542431382, + "loss": 1.6113, + "step": 3674 + }, + { + "epoch": 0.33, + "learning_rate": 0.001572270984425783, + "loss": 1.6289, + "step": 3675 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015720346670957564, + "loss": 1.5527, + "step": 3676 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015717983022726797, + "loss": 1.6133, + "step": 3677 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015715618899761758, + "loss": 1.5488, + "step": 3678 + }, + { + "epoch": 0.33, + "learning_rate": 0.001571325430225874, + "loss": 1.5762, + "step": 3679 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015710889230414052, + "loss": 1.6543, + "step": 3680 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015708523684424063, + "loss": 1.5938, + "step": 3681 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015706157664485162, + "loss": 1.5488, + "step": 3682 + }, + { + "epoch": 0.33, + "learning_rate": 0.00157037911707938, + "loss": 1.6387, + "step": 3683 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015701424203546442, + "loss": 1.752, + "step": 3684 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015699056762939613, + "loss": 1.6191, + "step": 3685 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015696688849169865, + "loss": 1.582, + "step": 3686 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015694320462433797, + "loss": 1.7266, + "step": 3687 + }, + { + "epoch": 0.33, + "learning_rate": 0.001569195160292804, + "loss": 1.5938, + "step": 3688 + }, + { + "epoch": 0.33, + "learning_rate": 0.001568958227084927, + "loss": 1.7207, + "step": 3689 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015687212466394204, + "loss": 1.6816, + "step": 3690 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015684842189759587, + "loss": 1.5527, + "step": 3691 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015682471441142215, + "loss": 1.6133, + "step": 3692 + }, + { + "epoch": 0.33, + "learning_rate": 0.001568010022073892, + "loss": 1.6445, + "step": 3693 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015677728528746568, + "loss": 1.6133, + "step": 3694 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015675356365362073, + "loss": 1.6367, + "step": 3695 + }, + { + "epoch": 0.33, + "learning_rate": 0.001567298373078238, + "loss": 1.6426, + "step": 3696 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015670610625204478, + "loss": 1.6309, + "step": 3697 + }, + { + "epoch": 0.33, + "learning_rate": 0.001566823704882539, + "loss": 1.7188, + "step": 3698 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015665863001842189, + "loss": 1.5938, + "step": 3699 + }, + { + "epoch": 0.33, + "learning_rate": 0.001566348848445197, + "loss": 1.5723, + "step": 3700 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015661113496851886, + "loss": 1.5781, + "step": 3701 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015658738039239113, + "loss": 1.5215, + "step": 3702 + }, + { + "epoch": 0.33, + "learning_rate": 0.001565636211181088, + "loss": 1.6348, + "step": 3703 + }, + { + "epoch": 0.33, + "learning_rate": 0.001565398571476444, + "loss": 1.7402, + "step": 3704 + }, + { + "epoch": 0.33, + "learning_rate": 0.00156516088482971, + "loss": 1.6172, + "step": 3705 + }, + { + "epoch": 0.33, + "learning_rate": 0.001564923151260619, + "loss": 1.6641, + "step": 3706 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015646853707889097, + "loss": 1.6777, + "step": 3707 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015644475434343232, + "loss": 1.6914, + "step": 3708 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015642096692166055, + "loss": 1.7148, + "step": 3709 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015639717481555054, + "loss": 1.5293, + "step": 3710 + }, + { + "epoch": 0.33, + "learning_rate": 0.001563733780270777, + "loss": 1.666, + "step": 3711 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015634957655821767, + "loss": 1.6074, + "step": 3712 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015632577041094664, + "loss": 1.627, + "step": 3713 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015630195958724108, + "loss": 1.498, + "step": 3714 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015627814408907786, + "loss": 1.6211, + "step": 3715 + }, + { + "epoch": 0.33, + "learning_rate": 0.001562543239184343, + "loss": 1.5508, + "step": 3716 + }, + { + "epoch": 0.33, + "learning_rate": 0.00156230499077288, + "loss": 1.6387, + "step": 3717 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015620666956761707, + "loss": 1.5996, + "step": 3718 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015618283539139987, + "loss": 1.5078, + "step": 3719 + }, + { + "epoch": 0.33, + "learning_rate": 0.001561589965506154, + "loss": 1.6348, + "step": 3720 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015613515304724267, + "loss": 1.5156, + "step": 3721 + }, + { + "epoch": 0.33, + "learning_rate": 0.001561113048832614, + "loss": 1.6621, + "step": 3722 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015608745206065154, + "loss": 1.7617, + "step": 3723 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015606359458139349, + "loss": 1.5605, + "step": 3724 + }, + { + "epoch": 0.33, + "learning_rate": 0.00156039732447468, + "loss": 1.6602, + "step": 3725 + }, + { + "epoch": 0.33, + "learning_rate": 0.001560158656608562, + "loss": 1.627, + "step": 3726 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015599199422353966, + "loss": 1.584, + "step": 3727 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015596811813750027, + "loss": 1.6426, + "step": 3728 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015594423740472034, + "loss": 1.5703, + "step": 3729 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015592035202718257, + "loss": 1.7109, + "step": 3730 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015589646200687007, + "loss": 1.6582, + "step": 3731 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015587256734576624, + "loss": 1.498, + "step": 3732 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015584866804585497, + "loss": 1.5938, + "step": 3733 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015582476410912049, + "loss": 1.6172, + "step": 3734 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015580085553754742, + "loss": 1.6973, + "step": 3735 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015577694233312074, + "loss": 1.4746, + "step": 3736 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015575302449782587, + "loss": 1.6191, + "step": 3737 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015572910203364857, + "loss": 1.5859, + "step": 3738 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015570517494257498, + "loss": 1.6523, + "step": 3739 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015568124322659167, + "loss": 1.5215, + "step": 3740 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015565730688768555, + "loss": 1.5996, + "step": 3741 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015563336592784394, + "loss": 1.6777, + "step": 3742 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015560942034905452, + "loss": 1.6172, + "step": 3743 + }, + { + "epoch": 0.33, + "learning_rate": 0.001555854701533054, + "loss": 1.6035, + "step": 3744 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015556151534258495, + "loss": 1.5801, + "step": 3745 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015553755591888212, + "loss": 1.5742, + "step": 3746 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015551359188418607, + "loss": 1.6621, + "step": 3747 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015548962324048645, + "loss": 1.6348, + "step": 3748 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015546564998977321, + "loss": 1.7227, + "step": 3749 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015544167213403677, + "loss": 1.6367, + "step": 3750 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015541768967526782, + "loss": 1.5898, + "step": 3751 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015539370261545757, + "loss": 1.748, + "step": 3752 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015536971095659752, + "loss": 1.6133, + "step": 3753 + }, + { + "epoch": 0.33, + "learning_rate": 0.001553457147006795, + "loss": 1.6934, + "step": 3754 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015532171384969589, + "loss": 1.6133, + "step": 3755 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015529770840563934, + "loss": 1.5859, + "step": 3756 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015527369837050285, + "loss": 1.7148, + "step": 3757 + }, + { + "epoch": 0.33, + "learning_rate": 0.001552496837462799, + "loss": 1.6758, + "step": 3758 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015522566453496426, + "loss": 1.6777, + "step": 3759 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015520164073855012, + "loss": 1.4297, + "step": 3760 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015517761235903206, + "loss": 1.6504, + "step": 3761 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015515357939840508, + "loss": 1.6582, + "step": 3762 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015512954185866441, + "loss": 1.498, + "step": 3763 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015510549974180586, + "loss": 1.7148, + "step": 3764 + }, + { + "epoch": 0.33, + "learning_rate": 0.0015508145304982545, + "loss": 1.7305, + "step": 3765 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015505740178471967, + "loss": 1.6934, + "step": 3766 + }, + { + "epoch": 0.34, + "learning_rate": 0.001550333459484854, + "loss": 1.5117, + "step": 3767 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015500928554311986, + "loss": 1.7031, + "step": 3768 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015498522057062064, + "loss": 1.5703, + "step": 3769 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015496115103298571, + "loss": 1.6602, + "step": 3770 + }, + { + "epoch": 0.34, + "learning_rate": 0.001549370769322135, + "loss": 1.6211, + "step": 3771 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015491299827030272, + "loss": 1.6875, + "step": 3772 + }, + { + "epoch": 0.34, + "learning_rate": 0.001548889150492525, + "loss": 1.6191, + "step": 3773 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015486482727106235, + "loss": 1.75, + "step": 3774 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015484073493773213, + "loss": 1.5, + "step": 3775 + }, + { + "epoch": 0.34, + "learning_rate": 0.001548166380512621, + "loss": 1.6367, + "step": 3776 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015479253661365296, + "loss": 1.666, + "step": 3777 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015476843062690567, + "loss": 1.5801, + "step": 3778 + }, + { + "epoch": 0.34, + "learning_rate": 0.001547443200930216, + "loss": 1.6445, + "step": 3779 + }, + { + "epoch": 0.34, + "learning_rate": 0.001547202050140026, + "loss": 1.6602, + "step": 3780 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015469608539185075, + "loss": 1.7207, + "step": 3781 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015467196122856861, + "loss": 1.5801, + "step": 3782 + }, + { + "epoch": 0.34, + "learning_rate": 0.001546478325261591, + "loss": 1.5918, + "step": 3783 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015462369928662546, + "loss": 1.5293, + "step": 3784 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015459956151197135, + "loss": 1.6113, + "step": 3785 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015457541920420085, + "loss": 1.5254, + "step": 3786 + }, + { + "epoch": 0.34, + "learning_rate": 0.001545512723653183, + "loss": 1.7598, + "step": 3787 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015452712099732855, + "loss": 1.6152, + "step": 3788 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015450296510223672, + "loss": 1.6797, + "step": 3789 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015447880468204835, + "loss": 1.5137, + "step": 3790 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015445463973876937, + "loss": 1.666, + "step": 3791 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015443047027440607, + "loss": 1.6055, + "step": 3792 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015440629629096512, + "loss": 1.5938, + "step": 3793 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015438211779045354, + "loss": 1.5625, + "step": 3794 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015435793477487874, + "loss": 1.6504, + "step": 3795 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015433374724624857, + "loss": 1.6094, + "step": 3796 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015430955520657108, + "loss": 1.5508, + "step": 3797 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015428535865785493, + "loss": 1.627, + "step": 3798 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015426115760210895, + "loss": 1.5898, + "step": 3799 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015423695204134246, + "loss": 1.6973, + "step": 3800 + }, + { + "epoch": 0.34, + "learning_rate": 0.001542127419775651, + "loss": 1.6387, + "step": 3801 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015418852741278695, + "loss": 1.5684, + "step": 3802 + }, + { + "epoch": 0.34, + "learning_rate": 0.001541643083490184, + "loss": 1.6094, + "step": 3803 + }, + { + "epoch": 0.34, + "learning_rate": 0.001541400847882702, + "loss": 1.6055, + "step": 3804 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015411585673255352, + "loss": 1.7344, + "step": 3805 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015409162418387992, + "loss": 1.6797, + "step": 3806 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015406738714426127, + "loss": 1.6914, + "step": 3807 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015404314561570987, + "loss": 1.6836, + "step": 3808 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015401889960023833, + "loss": 1.543, + "step": 3809 + }, + { + "epoch": 0.34, + "learning_rate": 0.001539946490998597, + "loss": 1.6602, + "step": 3810 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015397039411658739, + "loss": 1.5898, + "step": 3811 + }, + { + "epoch": 0.34, + "learning_rate": 0.001539461346524351, + "loss": 1.6602, + "step": 3812 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015392187070941703, + "loss": 1.6641, + "step": 3813 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015389760228954766, + "loss": 1.5469, + "step": 3814 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015387332939484186, + "loss": 1.6484, + "step": 3815 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015384905202731494, + "loss": 1.6875, + "step": 3816 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015382477018898242, + "loss": 1.6777, + "step": 3817 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015380048388186041, + "loss": 1.6172, + "step": 3818 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015377619310796519, + "loss": 1.5469, + "step": 3819 + }, + { + "epoch": 0.34, + "learning_rate": 0.001537518978693135, + "loss": 1.541, + "step": 3820 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015372759816792252, + "loss": 1.6328, + "step": 3821 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015370329400580967, + "loss": 1.6465, + "step": 3822 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015367898538499278, + "loss": 1.5449, + "step": 3823 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015365467230749011, + "loss": 1.5352, + "step": 3824 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015363035477532023, + "loss": 1.6465, + "step": 3825 + }, + { + "epoch": 0.34, + "learning_rate": 0.001536060327905021, + "loss": 1.5605, + "step": 3826 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015358170635505503, + "loss": 1.5977, + "step": 3827 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015355737547099878, + "loss": 1.6289, + "step": 3828 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015353304014035331, + "loss": 1.5645, + "step": 3829 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015350870036513916, + "loss": 1.6719, + "step": 3830 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015348435614737708, + "loss": 1.6934, + "step": 3831 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015346000748908827, + "loss": 1.6152, + "step": 3832 + }, + { + "epoch": 0.34, + "learning_rate": 0.001534356543922942, + "loss": 1.6875, + "step": 3833 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015341129685901688, + "loss": 1.6543, + "step": 3834 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015338693489127856, + "loss": 1.4922, + "step": 3835 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015336256849110184, + "loss": 1.6133, + "step": 3836 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015333819766050977, + "loss": 1.5645, + "step": 3837 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015331382240152576, + "loss": 1.6641, + "step": 3838 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015328944271617352, + "loss": 1.5723, + "step": 3839 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015326505860647714, + "loss": 1.5508, + "step": 3840 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015324067007446117, + "loss": 1.5879, + "step": 3841 + }, + { + "epoch": 0.34, + "learning_rate": 0.001532162771221504, + "loss": 1.5957, + "step": 3842 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015319187975157012, + "loss": 1.5605, + "step": 3843 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015316747796474588, + "loss": 1.6348, + "step": 3844 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015314307176370362, + "loss": 1.707, + "step": 3845 + }, + { + "epoch": 0.34, + "learning_rate": 0.001531186611504697, + "loss": 1.6172, + "step": 3846 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015309424612707074, + "loss": 1.668, + "step": 3847 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015306982669553387, + "loss": 1.5898, + "step": 3848 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015304540285788644, + "loss": 1.6172, + "step": 3849 + }, + { + "epoch": 0.34, + "learning_rate": 0.001530209746161563, + "loss": 1.6504, + "step": 3850 + }, + { + "epoch": 0.34, + "learning_rate": 0.001529965419723715, + "loss": 1.7656, + "step": 3851 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015297210492856066, + "loss": 1.7324, + "step": 3852 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015294766348675265, + "loss": 1.6387, + "step": 3853 + }, + { + "epoch": 0.34, + "learning_rate": 0.001529232176489766, + "loss": 1.5605, + "step": 3854 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015289876741726226, + "loss": 1.7539, + "step": 3855 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015287431279363951, + "loss": 1.7305, + "step": 3856 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015284985378013877, + "loss": 1.5469, + "step": 3857 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015282539037879067, + "loss": 1.5957, + "step": 3858 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015280092259162634, + "loss": 1.623, + "step": 3859 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015277645042067714, + "loss": 1.6172, + "step": 3860 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015275197386797495, + "loss": 1.6426, + "step": 3861 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015272749293555187, + "loss": 1.5547, + "step": 3862 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015270300762544045, + "loss": 1.6387, + "step": 3863 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015267851793967354, + "loss": 1.5762, + "step": 3864 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015265402388028445, + "loss": 1.6133, + "step": 3865 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015262952544930675, + "loss": 1.7051, + "step": 3866 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015260502264877445, + "loss": 1.5117, + "step": 3867 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015258051548072186, + "loss": 1.6309, + "step": 3868 + }, + { + "epoch": 0.34, + "learning_rate": 0.001525560039471837, + "loss": 1.6562, + "step": 3869 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015253148805019498, + "loss": 1.582, + "step": 3870 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015250696779179123, + "loss": 1.7969, + "step": 3871 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015248244317400818, + "loss": 1.6914, + "step": 3872 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015245791419888197, + "loss": 1.6074, + "step": 3873 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015243338086844914, + "loss": 1.6309, + "step": 3874 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015240884318474652, + "loss": 1.6504, + "step": 3875 + }, + { + "epoch": 0.34, + "learning_rate": 0.001523843011498114, + "loss": 1.6309, + "step": 3876 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015235975476568138, + "loss": 1.5938, + "step": 3877 + }, + { + "epoch": 0.34, + "learning_rate": 0.0015233520403439439, + "loss": 1.7012, + "step": 3878 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015231064895798872, + "loss": 1.7988, + "step": 3879 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015228608953850307, + "loss": 1.748, + "step": 3880 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015226152577797651, + "loss": 1.5898, + "step": 3881 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015223695767844845, + "loss": 1.6973, + "step": 3882 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015221238524195859, + "loss": 1.5215, + "step": 3883 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015218780847054707, + "loss": 1.7129, + "step": 3884 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015216322736625442, + "loss": 1.6504, + "step": 3885 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015213864193112144, + "loss": 1.6699, + "step": 3886 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015211405216718932, + "loss": 1.502, + "step": 3887 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015208945807649965, + "loss": 1.7207, + "step": 3888 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015206485966109433, + "loss": 1.5371, + "step": 3889 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015204025692301564, + "loss": 1.6055, + "step": 3890 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015201564986430621, + "loss": 1.5586, + "step": 3891 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015199103848700908, + "loss": 1.5879, + "step": 3892 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015196642279316755, + "loss": 1.5293, + "step": 3893 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015194180278482535, + "loss": 1.5762, + "step": 3894 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015191717846402655, + "loss": 1.6562, + "step": 3895 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015189254983281558, + "loss": 1.7832, + "step": 3896 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015186791689323722, + "loss": 1.6191, + "step": 3897 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015184327964733665, + "loss": 1.7285, + "step": 3898 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015181863809715932, + "loss": 1.5176, + "step": 3899 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015179399224475114, + "loss": 1.6699, + "step": 3900 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015176934209215826, + "loss": 1.6094, + "step": 3901 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015174468764142732, + "loss": 1.6621, + "step": 3902 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015172002889460522, + "loss": 1.6465, + "step": 3903 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015169536585373927, + "loss": 1.7168, + "step": 3904 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015167069852087703, + "loss": 1.5645, + "step": 3905 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015164602689806662, + "loss": 1.7422, + "step": 3906 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015162135098735633, + "loss": 1.5566, + "step": 3907 + }, + { + "epoch": 0.35, + "learning_rate": 0.001515966707907949, + "loss": 1.5566, + "step": 3908 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015157198631043133, + "loss": 1.7324, + "step": 3909 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015154729754831512, + "loss": 1.6543, + "step": 3910 + }, + { + "epoch": 0.35, + "learning_rate": 0.00151522604506496, + "loss": 1.6172, + "step": 3911 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015149790718702416, + "loss": 1.6543, + "step": 3912 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015147320559195003, + "loss": 1.7402, + "step": 3913 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015144849972332452, + "loss": 1.6445, + "step": 3914 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015142378958319875, + "loss": 1.6406, + "step": 3915 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015139907517362431, + "loss": 1.7656, + "step": 3916 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015137435649665314, + "loss": 1.6719, + "step": 3917 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015134963355433748, + "loss": 1.6094, + "step": 3918 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015132490634872991, + "loss": 1.5215, + "step": 3919 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015130017488188346, + "loss": 1.7871, + "step": 3920 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015127543915585137, + "loss": 1.6562, + "step": 3921 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015125069917268743, + "loss": 1.6562, + "step": 3922 + }, + { + "epoch": 0.35, + "learning_rate": 0.001512259549344456, + "loss": 1.5859, + "step": 3923 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015120120644318026, + "loss": 1.6387, + "step": 3924 + }, + { + "epoch": 0.35, + "learning_rate": 0.001511764537009462, + "loss": 1.6719, + "step": 3925 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015115169670979847, + "loss": 1.5938, + "step": 3926 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015112693547179252, + "loss": 1.5898, + "step": 3927 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015110216998898416, + "loss": 1.6914, + "step": 3928 + }, + { + "epoch": 0.35, + "learning_rate": 0.001510774002634295, + "loss": 1.6035, + "step": 3929 + }, + { + "epoch": 0.35, + "learning_rate": 0.001510526262971851, + "loss": 1.6719, + "step": 3930 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015102784809230775, + "loss": 1.5488, + "step": 3931 + }, + { + "epoch": 0.35, + "learning_rate": 0.001510030656508547, + "loss": 1.6777, + "step": 3932 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015097827897488352, + "loss": 1.7891, + "step": 3933 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015095348806645208, + "loss": 1.5898, + "step": 3934 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015092869292761863, + "loss": 1.709, + "step": 3935 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015090389356044182, + "loss": 1.5977, + "step": 3936 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015087908996698062, + "loss": 1.6445, + "step": 3937 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015085428214929428, + "loss": 1.6152, + "step": 3938 + }, + { + "epoch": 0.35, + "learning_rate": 0.001508294701094425, + "loss": 1.6016, + "step": 3939 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015080465384948532, + "loss": 1.6523, + "step": 3940 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015077983337148306, + "loss": 1.6328, + "step": 3941 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015075500867749647, + "loss": 1.6055, + "step": 3942 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015073017976958657, + "loss": 1.4766, + "step": 3943 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015070534664981482, + "loss": 1.6055, + "step": 3944 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015068050932024297, + "loss": 1.6387, + "step": 3945 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015065566778293313, + "loss": 1.7266, + "step": 3946 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015063082203994776, + "loss": 1.7715, + "step": 3947 + }, + { + "epoch": 0.35, + "learning_rate": 0.001506059720933497, + "loss": 1.4805, + "step": 3948 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015058111794520204, + "loss": 1.582, + "step": 3949 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015055625959756837, + "loss": 1.6621, + "step": 3950 + }, + { + "epoch": 0.35, + "learning_rate": 0.001505313970525125, + "loss": 1.5938, + "step": 3951 + }, + { + "epoch": 0.35, + "learning_rate": 0.001505065303120987, + "loss": 1.6484, + "step": 3952 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015048165937839141, + "loss": 1.7188, + "step": 3953 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015045678425345565, + "loss": 1.6152, + "step": 3954 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015043190493935661, + "loss": 1.5605, + "step": 3955 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015040702143815992, + "loss": 1.5762, + "step": 3956 + }, + { + "epoch": 0.35, + "learning_rate": 0.001503821337519315, + "loss": 1.6152, + "step": 3957 + }, + { + "epoch": 0.35, + "learning_rate": 0.001503572418827377, + "loss": 1.6211, + "step": 3958 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015033234583264506, + "loss": 1.6504, + "step": 3959 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015030744560372066, + "loss": 1.6895, + "step": 3960 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015028254119803174, + "loss": 1.5977, + "step": 3961 + }, + { + "epoch": 0.35, + "learning_rate": 0.001502576326176461, + "loss": 1.6035, + "step": 3962 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015023271986463172, + "loss": 1.6641, + "step": 3963 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015020780294105697, + "loss": 1.6367, + "step": 3964 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015018288184899054, + "loss": 1.707, + "step": 3965 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015015795659050158, + "loss": 1.7168, + "step": 3966 + }, + { + "epoch": 0.35, + "learning_rate": 0.001501330271676594, + "loss": 1.5957, + "step": 3967 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015010809358253386, + "loss": 1.5625, + "step": 3968 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015008315583719498, + "loss": 1.5977, + "step": 3969 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015005821393371328, + "loss": 1.6504, + "step": 3970 + }, + { + "epoch": 0.35, + "learning_rate": 0.001500332678741595, + "loss": 1.4844, + "step": 3971 + }, + { + "epoch": 0.35, + "learning_rate": 0.0015000831766060485, + "loss": 1.6973, + "step": 3972 + }, + { + "epoch": 0.35, + "learning_rate": 0.0014998336329512074, + "loss": 1.5801, + "step": 3973 + }, + { + "epoch": 0.35, + "learning_rate": 0.0014995840477977907, + "loss": 1.6289, + "step": 3974 + }, + { + "epoch": 0.35, + "learning_rate": 0.0014993344211665194, + "loss": 1.6094, + "step": 3975 + }, + { + "epoch": 0.35, + "learning_rate": 0.0014990847530781193, + "loss": 1.5645, + "step": 3976 + }, + { + "epoch": 0.35, + "learning_rate": 0.001498835043553319, + "loss": 1.752, + "step": 3977 + }, + { + "epoch": 0.35, + "learning_rate": 0.00149858529261285, + "loss": 1.7031, + "step": 3978 + }, + { + "epoch": 0.35, + "learning_rate": 0.0014983355002774485, + "loss": 1.6387, + "step": 3979 + }, + { + "epoch": 0.35, + "learning_rate": 0.0014980856665678531, + "loss": 1.6504, + "step": 3980 + }, + { + "epoch": 0.35, + "learning_rate": 0.0014978357915048065, + "loss": 1.5918, + "step": 3981 + }, + { + "epoch": 0.35, + "learning_rate": 0.0014975858751090539, + "loss": 1.6387, + "step": 3982 + }, + { + "epoch": 0.35, + "learning_rate": 0.0014973359174013452, + "loss": 1.7598, + "step": 3983 + }, + { + "epoch": 0.35, + "learning_rate": 0.0014970859184024327, + "loss": 1.5234, + "step": 3984 + }, + { + "epoch": 0.35, + "learning_rate": 0.0014968358781330725, + "loss": 1.6855, + "step": 3985 + }, + { + "epoch": 0.35, + "learning_rate": 0.0014965857966140245, + "loss": 1.5039, + "step": 3986 + }, + { + "epoch": 0.35, + "learning_rate": 0.0014963356738660514, + "loss": 1.5566, + "step": 3987 + }, + { + "epoch": 0.35, + "learning_rate": 0.0014960855099099195, + "loss": 1.7285, + "step": 3988 + }, + { + "epoch": 0.35, + "learning_rate": 0.0014958353047663988, + "loss": 1.5488, + "step": 3989 + }, + { + "epoch": 0.35, + "learning_rate": 0.0014955850584562622, + "loss": 1.6504, + "step": 3990 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014953347710002871, + "loss": 1.5957, + "step": 3991 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014950844424192527, + "loss": 1.6406, + "step": 3992 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014948340727339432, + "loss": 1.6426, + "step": 3993 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014945836619651447, + "loss": 1.582, + "step": 3994 + }, + { + "epoch": 0.36, + "learning_rate": 0.001494333210133648, + "loss": 1.5352, + "step": 3995 + }, + { + "epoch": 0.36, + "learning_rate": 0.001494082717260247, + "loss": 1.6484, + "step": 3996 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014938321833657385, + "loss": 1.6992, + "step": 3997 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014935816084709226, + "loss": 1.7051, + "step": 3998 + }, + { + "epoch": 0.36, + "learning_rate": 0.001493330992596604, + "loss": 1.6758, + "step": 3999 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014930803357635898, + "loss": 1.5762, + "step": 4000 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014928296379926906, + "loss": 1.6934, + "step": 4001 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014925788993047204, + "loss": 1.6387, + "step": 4002 + }, + { + "epoch": 0.36, + "learning_rate": 0.001492328119720497, + "loss": 1.6562, + "step": 4003 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014920772992608412, + "loss": 1.4922, + "step": 4004 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014918264379465774, + "loss": 1.5703, + "step": 4005 + }, + { + "epoch": 0.36, + "learning_rate": 0.001491575535798533, + "loss": 1.6445, + "step": 4006 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014913245928375397, + "loss": 1.5801, + "step": 4007 + }, + { + "epoch": 0.36, + "learning_rate": 0.001491073609084431, + "loss": 1.5605, + "step": 4008 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014908225845600461, + "loss": 1.6973, + "step": 4009 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014905715192852248, + "loss": 1.5586, + "step": 4010 + }, + { + "epoch": 0.36, + "learning_rate": 0.001490320413280813, + "loss": 1.6328, + "step": 4011 + }, + { + "epoch": 0.36, + "learning_rate": 0.001490069266567658, + "loss": 1.5605, + "step": 4012 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014898180791666116, + "loss": 1.6719, + "step": 4013 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014895668510985281, + "loss": 1.7227, + "step": 4014 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014893155823842662, + "loss": 1.5957, + "step": 4015 + }, + { + "epoch": 0.36, + "learning_rate": 0.001489064273044687, + "loss": 1.6855, + "step": 4016 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014888129231006555, + "loss": 1.6953, + "step": 4017 + }, + { + "epoch": 0.36, + "learning_rate": 0.00148856153257304, + "loss": 1.627, + "step": 4018 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014883101014827123, + "loss": 1.7422, + "step": 4019 + }, + { + "epoch": 0.36, + "learning_rate": 0.001488058629850547, + "loss": 1.8203, + "step": 4020 + }, + { + "epoch": 0.36, + "learning_rate": 0.001487807117697423, + "loss": 1.5879, + "step": 4021 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014875555650442217, + "loss": 1.6289, + "step": 4022 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014873039719118283, + "loss": 1.8008, + "step": 4023 + }, + { + "epoch": 0.36, + "learning_rate": 0.001487052338321131, + "loss": 1.6016, + "step": 4024 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014868006642930224, + "loss": 1.5879, + "step": 4025 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014865489498483968, + "loss": 1.7227, + "step": 4026 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014862971950081534, + "loss": 1.7148, + "step": 4027 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014860453997931936, + "loss": 1.5781, + "step": 4028 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014857935642244228, + "loss": 1.709, + "step": 4029 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014855416883227497, + "loss": 1.7578, + "step": 4030 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014852897721090862, + "loss": 1.6113, + "step": 4031 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014850378156043473, + "loss": 1.6445, + "step": 4032 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014847858188294523, + "loss": 1.5762, + "step": 4033 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014845337818053224, + "loss": 1.6348, + "step": 4034 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014842817045528833, + "loss": 1.6562, + "step": 4035 + }, + { + "epoch": 0.36, + "learning_rate": 0.001484029587093064, + "loss": 1.5781, + "step": 4036 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014837774294467958, + "loss": 1.6211, + "step": 4037 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014835252316350144, + "loss": 1.6914, + "step": 4038 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014832729936786586, + "loss": 1.6641, + "step": 4039 + }, + { + "epoch": 0.36, + "learning_rate": 0.00148302071559867, + "loss": 1.584, + "step": 4040 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014827683974159944, + "loss": 1.5859, + "step": 4041 + }, + { + "epoch": 0.36, + "learning_rate": 0.00148251603915158, + "loss": 1.6484, + "step": 4042 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014822636408263793, + "loss": 1.6133, + "step": 4043 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014820112024613468, + "loss": 1.6602, + "step": 4044 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014817587240774422, + "loss": 1.6211, + "step": 4045 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014815062056956264, + "loss": 1.6191, + "step": 4046 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014812536473368656, + "loss": 1.5664, + "step": 4047 + }, + { + "epoch": 0.36, + "learning_rate": 0.001481001049022128, + "loss": 1.6387, + "step": 4048 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014807484107723853, + "loss": 1.7285, + "step": 4049 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014804957326086132, + "loss": 1.7812, + "step": 4050 + }, + { + "epoch": 0.36, + "learning_rate": 0.00148024301455179, + "loss": 1.5547, + "step": 4051 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014799902566228972, + "loss": 1.5176, + "step": 4052 + }, + { + "epoch": 0.36, + "learning_rate": 0.001479737458842921, + "loss": 1.5957, + "step": 4053 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014794846212328485, + "loss": 1.5469, + "step": 4054 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014792317438136729, + "loss": 1.5566, + "step": 4055 + }, + { + "epoch": 0.36, + "learning_rate": 0.001478978826606388, + "loss": 1.6328, + "step": 4056 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014787258696319931, + "loss": 1.6582, + "step": 4057 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014784728729114898, + "loss": 1.6133, + "step": 4058 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014782198364658825, + "loss": 1.5371, + "step": 4059 + }, + { + "epoch": 0.36, + "learning_rate": 0.00147796676031618, + "loss": 1.6445, + "step": 4060 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014777136444833938, + "loss": 1.6562, + "step": 4061 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014774604889885388, + "loss": 1.5664, + "step": 4062 + }, + { + "epoch": 0.36, + "learning_rate": 0.001477207293852633, + "loss": 1.6699, + "step": 4063 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014769540590966982, + "loss": 1.6074, + "step": 4064 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014767007847417586, + "loss": 1.6016, + "step": 4065 + }, + { + "epoch": 0.36, + "learning_rate": 0.001476447470808843, + "loss": 1.6582, + "step": 4066 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014761941173189821, + "loss": 1.5332, + "step": 4067 + }, + { + "epoch": 0.36, + "learning_rate": 0.001475940724293211, + "loss": 1.6504, + "step": 4068 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014756872917525672, + "loss": 1.5938, + "step": 4069 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014754338197180922, + "loss": 1.7148, + "step": 4070 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014751803082108299, + "loss": 1.7227, + "step": 4071 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014749267572518288, + "loss": 1.752, + "step": 4072 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014746731668621395, + "loss": 1.6562, + "step": 4073 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014744195370628161, + "loss": 1.5566, + "step": 4074 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014741658678749168, + "loss": 1.8262, + "step": 4075 + }, + { + "epoch": 0.36, + "learning_rate": 0.001473912159319502, + "loss": 1.5918, + "step": 4076 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014736584114176355, + "loss": 1.6406, + "step": 4077 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014734046241903851, + "loss": 1.6992, + "step": 4078 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014731507976588213, + "loss": 1.625, + "step": 4079 + }, + { + "epoch": 0.36, + "learning_rate": 0.001472896931844018, + "loss": 1.5859, + "step": 4080 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014726430267670524, + "loss": 1.6172, + "step": 4081 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014723890824490048, + "loss": 1.6699, + "step": 4082 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014721350989109587, + "loss": 1.6426, + "step": 4083 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014718810761740018, + "loss": 1.7422, + "step": 4084 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014716270142592235, + "loss": 1.6348, + "step": 4085 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014713729131877174, + "loss": 1.6289, + "step": 4086 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014711187729805801, + "loss": 1.6641, + "step": 4087 + }, + { + "epoch": 0.36, + "learning_rate": 0.001470864593658912, + "loss": 1.6699, + "step": 4088 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014706103752438159, + "loss": 1.584, + "step": 4089 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014703561177563983, + "loss": 1.6777, + "step": 4090 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014701018212177686, + "loss": 1.5996, + "step": 4091 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014698474856490403, + "loss": 1.5566, + "step": 4092 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014695931110713292, + "loss": 1.5156, + "step": 4093 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014693386975057545, + "loss": 1.5938, + "step": 4094 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014690842449734394, + "loss": 1.5762, + "step": 4095 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014688297534955095, + "loss": 1.748, + "step": 4096 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014685752230930937, + "loss": 1.5898, + "step": 4097 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014683206537873241, + "loss": 1.7012, + "step": 4098 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014680660455993369, + "loss": 1.5879, + "step": 4099 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014678113985502708, + "loss": 1.4707, + "step": 4100 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014675567126612673, + "loss": 1.6191, + "step": 4101 + }, + { + "epoch": 0.36, + "learning_rate": 0.0014673019879534721, + "loss": 1.5645, + "step": 4102 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014670472244480335, + "loss": 1.6055, + "step": 4103 + }, + { + "epoch": 0.37, + "learning_rate": 0.001466792422166103, + "loss": 1.5625, + "step": 4104 + }, + { + "epoch": 0.37, + "learning_rate": 0.001466537581128836, + "loss": 1.5586, + "step": 4105 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014662827013573903, + "loss": 1.6172, + "step": 4106 + }, + { + "epoch": 0.37, + "learning_rate": 0.001466027782872927, + "loss": 1.6777, + "step": 4107 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014657728256966113, + "loss": 1.5977, + "step": 4108 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014655178298496102, + "loss": 1.5938, + "step": 4109 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014652627953530952, + "loss": 1.5859, + "step": 4110 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014650077222282403, + "loss": 1.5664, + "step": 4111 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014647526104962232, + "loss": 1.5371, + "step": 4112 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014644974601782238, + "loss": 1.6094, + "step": 4113 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014642422712954266, + "loss": 1.7246, + "step": 4114 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014639870438690184, + "loss": 1.7793, + "step": 4115 + }, + { + "epoch": 0.37, + "learning_rate": 0.001463731777920189, + "loss": 1.7734, + "step": 4116 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014634764734701323, + "loss": 1.5508, + "step": 4117 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014632211305400447, + "loss": 1.709, + "step": 4118 + }, + { + "epoch": 0.37, + "learning_rate": 0.001462965749151126, + "loss": 1.6816, + "step": 4119 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014627103293245792, + "loss": 1.5547, + "step": 4120 + }, + { + "epoch": 0.37, + "learning_rate": 0.00146245487108161, + "loss": 1.5293, + "step": 4121 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014621993744434287, + "loss": 1.6504, + "step": 4122 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014619438394312474, + "loss": 1.6719, + "step": 4123 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014616882660662817, + "loss": 1.5898, + "step": 4124 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014614326543697506, + "loss": 1.666, + "step": 4125 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014611770043628763, + "loss": 1.5977, + "step": 4126 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014609213160668838, + "loss": 1.6602, + "step": 4127 + }, + { + "epoch": 0.37, + "learning_rate": 0.001460665589503002, + "loss": 1.6953, + "step": 4128 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014604098246924623, + "loss": 1.6211, + "step": 4129 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014601540216564995, + "loss": 1.666, + "step": 4130 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014598981804163514, + "loss": 1.627, + "step": 4131 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014596423009932598, + "loss": 1.498, + "step": 4132 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014593863834084682, + "loss": 1.5566, + "step": 4133 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014591304276832249, + "loss": 1.6543, + "step": 4134 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014588744338387802, + "loss": 1.7207, + "step": 4135 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014586184018963878, + "loss": 1.6191, + "step": 4136 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014583623318773047, + "loss": 1.6719, + "step": 4137 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014581062238027914, + "loss": 1.7266, + "step": 4138 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014578500776941106, + "loss": 1.582, + "step": 4139 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014575938935725294, + "loss": 1.5352, + "step": 4140 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014573376714593174, + "loss": 1.5391, + "step": 4141 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014570814113757467, + "loss": 1.6895, + "step": 4142 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014568251133430942, + "loss": 1.5918, + "step": 4143 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014565687773826382, + "loss": 1.4941, + "step": 4144 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014563124035156614, + "loss": 1.6074, + "step": 4145 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014560559917634489, + "loss": 1.6777, + "step": 4146 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014557995421472893, + "loss": 1.6562, + "step": 4147 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014555430546884743, + "loss": 1.7402, + "step": 4148 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014552865294082992, + "loss": 1.5312, + "step": 4149 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014550299663280608, + "loss": 1.6133, + "step": 4150 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014547733654690614, + "loss": 1.7637, + "step": 4151 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014545167268526042, + "loss": 1.6172, + "step": 4152 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014542600504999976, + "loss": 1.5508, + "step": 4153 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014540033364325512, + "loss": 1.5293, + "step": 4154 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014537465846715793, + "loss": 1.6855, + "step": 4155 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014534897952383982, + "loss": 1.6543, + "step": 4156 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014532329681543282, + "loss": 1.6641, + "step": 4157 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014529761034406917, + "loss": 1.625, + "step": 4158 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014527192011188153, + "loss": 1.6523, + "step": 4159 + }, + { + "epoch": 0.37, + "learning_rate": 0.001452462261210028, + "loss": 1.6895, + "step": 4160 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014522052837356626, + "loss": 1.6191, + "step": 4161 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014519482687170542, + "loss": 1.6699, + "step": 4162 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014516912161755416, + "loss": 1.6328, + "step": 4163 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014514341261324662, + "loss": 1.5645, + "step": 4164 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014511769986091734, + "loss": 1.5625, + "step": 4165 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014509198336270107, + "loss": 1.5391, + "step": 4166 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014506626312073294, + "loss": 1.6016, + "step": 4167 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014504053913714837, + "loss": 1.5938, + "step": 4168 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014501481141408305, + "loss": 1.6543, + "step": 4169 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014498907995367306, + "loss": 1.6543, + "step": 4170 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014496334475805476, + "loss": 1.6543, + "step": 4171 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014493760582936475, + "loss": 1.6289, + "step": 4172 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014491186316974004, + "loss": 1.5625, + "step": 4173 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014488611678131794, + "loss": 1.6328, + "step": 4174 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014486036666623598, + "loss": 1.6055, + "step": 4175 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014483461282663206, + "loss": 1.5527, + "step": 4176 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014480885526464442, + "loss": 1.6699, + "step": 4177 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014478309398241154, + "loss": 1.584, + "step": 4178 + }, + { + "epoch": 0.37, + "learning_rate": 0.001447573289820723, + "loss": 1.6914, + "step": 4179 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014473156026576577, + "loss": 1.7305, + "step": 4180 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014470578783563144, + "loss": 1.5684, + "step": 4181 + }, + { + "epoch": 0.37, + "learning_rate": 0.00144680011693809, + "loss": 1.7617, + "step": 4182 + }, + { + "epoch": 0.37, + "learning_rate": 0.001446542318424386, + "loss": 1.6699, + "step": 4183 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014462844828366053, + "loss": 1.623, + "step": 4184 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014460266101961549, + "loss": 1.5996, + "step": 4185 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014457687005244445, + "loss": 1.6562, + "step": 4186 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014455107538428873, + "loss": 1.5605, + "step": 4187 + }, + { + "epoch": 0.37, + "learning_rate": 0.001445252770172899, + "loss": 1.6875, + "step": 4188 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014449947495358989, + "loss": 1.6465, + "step": 4189 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014447366919533087, + "loss": 1.5879, + "step": 4190 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014444785974465537, + "loss": 1.6406, + "step": 4191 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014442204660370624, + "loss": 1.6367, + "step": 4192 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014439622977462658, + "loss": 1.4785, + "step": 4193 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014437040925955986, + "loss": 1.5723, + "step": 4194 + }, + { + "epoch": 0.37, + "learning_rate": 0.001443445850606498, + "loss": 1.627, + "step": 4195 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014431875718004046, + "loss": 1.6504, + "step": 4196 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014429292561987618, + "loss": 1.6172, + "step": 4197 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014426709038230158, + "loss": 1.668, + "step": 4198 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014424125146946171, + "loss": 1.5312, + "step": 4199 + }, + { + "epoch": 0.37, + "learning_rate": 0.001442154088835018, + "loss": 1.5566, + "step": 4200 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014418956262656742, + "loss": 1.6055, + "step": 4201 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014416371270080442, + "loss": 1.666, + "step": 4202 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014413785910835906, + "loss": 1.6191, + "step": 4203 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014411200185137777, + "loss": 1.6191, + "step": 4204 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014408614093200736, + "loss": 1.582, + "step": 4205 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014406027635239492, + "loss": 1.6758, + "step": 4206 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014403440811468786, + "loss": 1.6191, + "step": 4207 + }, + { + "epoch": 0.37, + "learning_rate": 0.001440085362210339, + "loss": 1.543, + "step": 4208 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014398266067358102, + "loss": 1.7305, + "step": 4209 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014395678147447752, + "loss": 1.4863, + "step": 4210 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014393089862587208, + "loss": 1.6348, + "step": 4211 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014390501212991353, + "loss": 1.4766, + "step": 4212 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014387912198875117, + "loss": 1.5391, + "step": 4213 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014385322820453447, + "loss": 1.5059, + "step": 4214 + }, + { + "epoch": 0.37, + "learning_rate": 0.0014382733077941327, + "loss": 1.6758, + "step": 4215 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014380142971553774, + "loss": 1.6719, + "step": 4216 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014377552501505825, + "loss": 1.6719, + "step": 4217 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014374961668012554, + "loss": 1.5762, + "step": 4218 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014372370471289067, + "loss": 1.6113, + "step": 4219 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014369778911550496, + "loss": 1.7363, + "step": 4220 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014367186989012004, + "loss": 1.6855, + "step": 4221 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014364594703888786, + "loss": 1.4707, + "step": 4222 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014362002056396066, + "loss": 1.5566, + "step": 4223 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014359409046749098, + "loss": 1.707, + "step": 4224 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014356815675163166, + "loss": 1.7285, + "step": 4225 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014354221941853583, + "loss": 1.6875, + "step": 4226 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014351627847035693, + "loss": 1.709, + "step": 4227 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014349033390924873, + "loss": 1.6172, + "step": 4228 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014346438573736523, + "loss": 1.6055, + "step": 4229 + }, + { + "epoch": 0.38, + "learning_rate": 0.001434384339568608, + "loss": 1.7168, + "step": 4230 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014341247856989009, + "loss": 1.6387, + "step": 4231 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014338651957860802, + "loss": 1.5801, + "step": 4232 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014336055698516983, + "loss": 1.7383, + "step": 4233 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014333459079173107, + "loss": 1.625, + "step": 4234 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014330862100044755, + "loss": 1.6016, + "step": 4235 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014328264761347546, + "loss": 1.5781, + "step": 4236 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014325667063297117, + "loss": 1.5957, + "step": 4237 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014323069006109143, + "loss": 1.5527, + "step": 4238 + }, + { + "epoch": 0.38, + "learning_rate": 0.001432047058999933, + "loss": 1.6191, + "step": 4239 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014317871815183413, + "loss": 1.668, + "step": 4240 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014315272681877146, + "loss": 1.6113, + "step": 4241 + }, + { + "epoch": 0.38, + "learning_rate": 0.001431267319029633, + "loss": 1.6934, + "step": 4242 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014310073340656779, + "loss": 1.6367, + "step": 4243 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014307473133174356, + "loss": 1.6055, + "step": 4244 + }, + { + "epoch": 0.38, + "learning_rate": 0.001430487256806493, + "loss": 1.5566, + "step": 4245 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014302271645544424, + "loss": 1.5508, + "step": 4246 + }, + { + "epoch": 0.38, + "learning_rate": 0.001429967036582877, + "loss": 1.6309, + "step": 4247 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014297068729133943, + "loss": 1.6641, + "step": 4248 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014294466735675945, + "loss": 1.502, + "step": 4249 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014291864385670803, + "loss": 1.627, + "step": 4250 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014289261679334576, + "loss": 1.6074, + "step": 4251 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014286658616883355, + "loss": 1.6797, + "step": 4252 + }, + { + "epoch": 0.38, + "learning_rate": 0.001428405519853326, + "loss": 1.6094, + "step": 4253 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014281451424500433, + "loss": 1.6602, + "step": 4254 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014278847295001056, + "loss": 1.5566, + "step": 4255 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014276242810251339, + "loss": 1.498, + "step": 4256 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014273637970467514, + "loss": 1.6973, + "step": 4257 + }, + { + "epoch": 0.38, + "learning_rate": 0.001427103277586585, + "loss": 1.6602, + "step": 4258 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014268427226662639, + "loss": 1.7559, + "step": 4259 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014265821323074213, + "loss": 1.6641, + "step": 4260 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014263215065316922, + "loss": 1.7305, + "step": 4261 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014260608453607149, + "loss": 1.6543, + "step": 4262 + }, + { + "epoch": 0.38, + "learning_rate": 0.001425800148816131, + "loss": 1.6895, + "step": 4263 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014255394169195845, + "loss": 1.6191, + "step": 4264 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014252786496927229, + "loss": 1.7148, + "step": 4265 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014250178471571964, + "loss": 1.5352, + "step": 4266 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014247570093346576, + "loss": 1.6875, + "step": 4267 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014244961362467627, + "loss": 1.498, + "step": 4268 + }, + { + "epoch": 0.38, + "learning_rate": 0.001424235227915171, + "loss": 1.6738, + "step": 4269 + }, + { + "epoch": 0.38, + "learning_rate": 0.001423974284361544, + "loss": 1.5703, + "step": 4270 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014237133056075465, + "loss": 1.6602, + "step": 4271 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014234522916748463, + "loss": 1.7891, + "step": 4272 + }, + { + "epoch": 0.38, + "learning_rate": 0.001423191242585114, + "loss": 1.6289, + "step": 4273 + }, + { + "epoch": 0.38, + "learning_rate": 0.001422930158360023, + "loss": 1.7598, + "step": 4274 + }, + { + "epoch": 0.38, + "learning_rate": 0.00142266903902125, + "loss": 1.6816, + "step": 4275 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014224078845904745, + "loss": 1.5391, + "step": 4276 + }, + { + "epoch": 0.38, + "learning_rate": 0.001422146695089378, + "loss": 1.6133, + "step": 4277 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014218854705396464, + "loss": 1.5938, + "step": 4278 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014216242109629678, + "loss": 1.5898, + "step": 4279 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014213629163810331, + "loss": 1.7148, + "step": 4280 + }, + { + "epoch": 0.38, + "learning_rate": 0.001421101586815536, + "loss": 1.6289, + "step": 4281 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014208402222881738, + "loss": 1.7031, + "step": 4282 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014205788228206456, + "loss": 1.5273, + "step": 4283 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014203173884346548, + "loss": 1.4902, + "step": 4284 + }, + { + "epoch": 0.38, + "learning_rate": 0.001420055919151906, + "loss": 1.498, + "step": 4285 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014197944149941086, + "loss": 1.6562, + "step": 4286 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014195328759829733, + "loss": 1.7227, + "step": 4287 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014192713021402147, + "loss": 1.6172, + "step": 4288 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014190096934875494, + "loss": 1.6035, + "step": 4289 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014187480500466977, + "loss": 1.6504, + "step": 4290 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014184863718393825, + "loss": 1.5723, + "step": 4291 + }, + { + "epoch": 0.38, + "learning_rate": 0.00141822465888733, + "loss": 1.584, + "step": 4292 + }, + { + "epoch": 0.38, + "learning_rate": 0.001417962911212268, + "loss": 1.6465, + "step": 4293 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014177011288359286, + "loss": 1.6895, + "step": 4294 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014174393117800462, + "loss": 1.6914, + "step": 4295 + }, + { + "epoch": 0.38, + "learning_rate": 0.001417177460066358, + "loss": 1.6094, + "step": 4296 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014169155737166039, + "loss": 1.5781, + "step": 4297 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014166536527525273, + "loss": 1.6348, + "step": 4298 + }, + { + "epoch": 0.38, + "learning_rate": 0.001416391697195874, + "loss": 1.7832, + "step": 4299 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014161297070683933, + "loss": 1.6191, + "step": 4300 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014158676823918359, + "loss": 1.5762, + "step": 4301 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014156056231879573, + "loss": 1.6328, + "step": 4302 + }, + { + "epoch": 0.38, + "learning_rate": 0.001415343529478514, + "loss": 1.5625, + "step": 4303 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014150814012852673, + "loss": 1.6543, + "step": 4304 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014148192386299794, + "loss": 1.6172, + "step": 4305 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014145570415344168, + "loss": 1.6016, + "step": 4306 + }, + { + "epoch": 0.38, + "learning_rate": 0.001414294810020348, + "loss": 1.5703, + "step": 4307 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014140325441095456, + "loss": 1.8379, + "step": 4308 + }, + { + "epoch": 0.38, + "learning_rate": 0.001413770243823783, + "loss": 1.543, + "step": 4309 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014135079091848381, + "loss": 1.5684, + "step": 4310 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014132455402144915, + "loss": 1.6133, + "step": 4311 + }, + { + "epoch": 0.38, + "learning_rate": 0.001412983136934526, + "loss": 1.5332, + "step": 4312 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014127206993667276, + "loss": 1.7441, + "step": 4313 + }, + { + "epoch": 0.38, + "learning_rate": 0.001412458227532885, + "loss": 1.5195, + "step": 4314 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014121957214547902, + "loss": 1.709, + "step": 4315 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014119331811542372, + "loss": 1.6289, + "step": 4316 + }, + { + "epoch": 0.38, + "learning_rate": 0.001411670606653024, + "loss": 1.6562, + "step": 4317 + }, + { + "epoch": 0.38, + "learning_rate": 0.00141140799797295, + "loss": 1.4746, + "step": 4318 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014111453551358193, + "loss": 1.7363, + "step": 4319 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014108826781634365, + "loss": 1.6484, + "step": 4320 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014106199670776115, + "loss": 1.6582, + "step": 4321 + }, + { + "epoch": 0.38, + "learning_rate": 0.001410357221900155, + "loss": 1.6426, + "step": 4322 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014100944426528818, + "loss": 1.668, + "step": 4323 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014098316293576086, + "loss": 1.6797, + "step": 4324 + }, + { + "epoch": 0.38, + "learning_rate": 0.001409568782036156, + "loss": 1.6113, + "step": 4325 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014093059007103462, + "loss": 1.6328, + "step": 4326 + }, + { + "epoch": 0.38, + "learning_rate": 0.0014090429854020058, + "loss": 1.5879, + "step": 4327 + }, + { + "epoch": 0.39, + "learning_rate": 0.0014087800361329622, + "loss": 1.5488, + "step": 4328 + }, + { + "epoch": 0.39, + "learning_rate": 0.0014085170529250476, + "loss": 1.6465, + "step": 4329 + }, + { + "epoch": 0.39, + "learning_rate": 0.0014082540358000956, + "loss": 1.5703, + "step": 4330 + }, + { + "epoch": 0.39, + "learning_rate": 0.0014079909847799432, + "loss": 1.627, + "step": 4331 + }, + { + "epoch": 0.39, + "learning_rate": 0.0014077278998864305, + "loss": 1.6602, + "step": 4332 + }, + { + "epoch": 0.39, + "learning_rate": 0.0014074647811413994, + "loss": 1.7129, + "step": 4333 + }, + { + "epoch": 0.39, + "learning_rate": 0.001407201628566696, + "loss": 1.5508, + "step": 4334 + }, + { + "epoch": 0.39, + "learning_rate": 0.0014069384421841681, + "loss": 1.75, + "step": 4335 + }, + { + "epoch": 0.39, + "learning_rate": 0.0014066752220156667, + "loss": 1.6426, + "step": 4336 + }, + { + "epoch": 0.39, + "learning_rate": 0.0014064119680830458, + "loss": 1.6074, + "step": 4337 + }, + { + "epoch": 0.39, + "learning_rate": 0.0014061486804081615, + "loss": 1.7461, + "step": 4338 + }, + { + "epoch": 0.39, + "learning_rate": 0.0014058853590128738, + "loss": 1.5938, + "step": 4339 + }, + { + "epoch": 0.39, + "learning_rate": 0.0014056220039190446, + "loss": 1.5137, + "step": 4340 + }, + { + "epoch": 0.39, + "learning_rate": 0.0014053586151485389, + "loss": 1.5176, + "step": 4341 + }, + { + "epoch": 0.39, + "learning_rate": 0.0014050951927232246, + "loss": 1.6367, + "step": 4342 + }, + { + "epoch": 0.39, + "learning_rate": 0.0014048317366649723, + "loss": 1.5586, + "step": 4343 + }, + { + "epoch": 0.39, + "learning_rate": 0.0014045682469956552, + "loss": 1.6172, + "step": 4344 + }, + { + "epoch": 0.39, + "learning_rate": 0.0014043047237371495, + "loss": 1.6895, + "step": 4345 + }, + { + "epoch": 0.39, + "learning_rate": 0.0014040411669113343, + "loss": 1.7422, + "step": 4346 + }, + { + "epoch": 0.39, + "learning_rate": 0.0014037775765400914, + "loss": 1.6113, + "step": 4347 + }, + { + "epoch": 0.39, + "learning_rate": 0.0014035139526453052, + "loss": 1.6172, + "step": 4348 + }, + { + "epoch": 0.39, + "learning_rate": 0.001403250295248863, + "loss": 1.7344, + "step": 4349 + }, + { + "epoch": 0.39, + "learning_rate": 0.0014029866043726547, + "loss": 1.5898, + "step": 4350 + }, + { + "epoch": 0.39, + "learning_rate": 0.0014027228800385738, + "loss": 1.5391, + "step": 4351 + }, + { + "epoch": 0.39, + "learning_rate": 0.0014024591222685154, + "loss": 1.6465, + "step": 4352 + }, + { + "epoch": 0.39, + "learning_rate": 0.001402195331084378, + "loss": 1.7402, + "step": 4353 + }, + { + "epoch": 0.39, + "learning_rate": 0.0014019315065080628, + "loss": 1.6309, + "step": 4354 + }, + { + "epoch": 0.39, + "learning_rate": 0.0014016676485614744, + "loss": 1.6113, + "step": 4355 + }, + { + "epoch": 0.39, + "learning_rate": 0.0014014037572665184, + "loss": 1.6602, + "step": 4356 + }, + { + "epoch": 0.39, + "learning_rate": 0.0014011398326451052, + "loss": 1.5898, + "step": 4357 + }, + { + "epoch": 0.39, + "learning_rate": 0.0014008758747191467, + "loss": 1.6465, + "step": 4358 + }, + { + "epoch": 0.39, + "learning_rate": 0.0014006118835105583, + "loss": 1.6113, + "step": 4359 + }, + { + "epoch": 0.39, + "learning_rate": 0.0014003478590412573, + "loss": 1.498, + "step": 4360 + }, + { + "epoch": 0.39, + "learning_rate": 0.0014000838013331646, + "loss": 1.6582, + "step": 4361 + }, + { + "epoch": 0.39, + "learning_rate": 0.0013998197104082035, + "loss": 1.6367, + "step": 4362 + }, + { + "epoch": 0.39, + "learning_rate": 0.0013995555862883003, + "loss": 1.6133, + "step": 4363 + }, + { + "epoch": 0.39, + "learning_rate": 0.0013992914289953833, + "loss": 1.748, + "step": 4364 + }, + { + "epoch": 0.39, + "learning_rate": 0.0013990272385513848, + "loss": 1.6035, + "step": 4365 + }, + { + "epoch": 0.39, + "learning_rate": 0.0013987630149782382, + "loss": 1.7285, + "step": 4366 + }, + { + "epoch": 0.39, + "learning_rate": 0.0013984987582978817, + "loss": 1.6875, + "step": 4367 + }, + { + "epoch": 0.39, + "learning_rate": 0.0013982344685322544, + "loss": 1.6191, + "step": 4368 + }, + { + "epoch": 0.39, + "learning_rate": 0.0013979701457032992, + "loss": 1.7109, + "step": 4369 + }, + { + "epoch": 0.39, + "learning_rate": 0.0013977057898329611, + "loss": 1.627, + "step": 4370 + }, + { + "epoch": 0.39, + "learning_rate": 0.0013974414009431885, + "loss": 1.6211, + "step": 4371 + }, + { + "epoch": 0.39, + "learning_rate": 0.0013971769790559329, + "loss": 1.4844, + "step": 4372 + }, + { + "epoch": 0.39, + "learning_rate": 0.0013969125241931463, + "loss": 1.6035, + "step": 4373 + }, + { + "epoch": 0.39, + "learning_rate": 0.0013966480363767862, + "loss": 1.6094, + "step": 4374 + }, + { + "epoch": 0.39, + "learning_rate": 0.0013963835156288113, + "loss": 1.6602, + "step": 4375 + }, + { + "epoch": 0.39, + "learning_rate": 0.0013961189619711833, + "loss": 1.5957, + "step": 4376 + }, + { + "epoch": 0.39, + "learning_rate": 0.0013958543754258668, + "loss": 1.5586, + "step": 4377 + }, + { + "epoch": 0.39, + "learning_rate": 0.001395589756014829, + "loss": 1.7227, + "step": 4378 + }, + { + "epoch": 0.39, + "learning_rate": 0.00139532510376004, + "loss": 1.7539, + "step": 4379 + }, + { + "epoch": 0.39, + "learning_rate": 0.0013950604186834723, + "loss": 1.6797, + "step": 4380 + }, + { + "epoch": 0.39, + "learning_rate": 0.0013947957008071015, + "loss": 1.5977, + "step": 4381 + }, + { + "epoch": 0.39, + "learning_rate": 0.0013945309501529053, + "loss": 1.623, + "step": 4382 + }, + { + "epoch": 0.39, + "learning_rate": 0.0013942661667428653, + "loss": 1.7578, + "step": 4383 + }, + { + "epoch": 0.39, + "learning_rate": 0.0013940013505989645, + "loss": 1.6562, + "step": 4384 + }, + { + "epoch": 0.39, + "learning_rate": 0.0013937365017431896, + "loss": 1.5566, + "step": 4385 + }, + { + "epoch": 0.39, + "learning_rate": 0.0013934716201975293, + "loss": 1.5352, + "step": 4386 + }, + { + "epoch": 0.39, + "learning_rate": 0.0013932067059839754, + "loss": 1.5879, + "step": 4387 + }, + { + "epoch": 0.39, + "learning_rate": 0.0013929417591245223, + "loss": 1.6719, + "step": 4388 + }, + { + "epoch": 0.39, + "learning_rate": 0.0013926767796411674, + "loss": 1.6172, + "step": 4389 + }, + { + "epoch": 0.39, + "learning_rate": 0.0013924117675559102, + "loss": 1.6035, + "step": 4390 + }, + { + "epoch": 0.39, + "learning_rate": 0.001392146722890754, + "loss": 1.7695, + "step": 4391 + }, + { + "epoch": 0.39, + "learning_rate": 0.001391881645667703, + "loss": 1.6992, + "step": 4392 + }, + { + "epoch": 0.39, + "learning_rate": 0.0013916165359087657, + "loss": 1.5469, + "step": 4393 + }, + { + "epoch": 0.39, + "learning_rate": 0.0013913513936359533, + "loss": 1.6504, + "step": 4394 + }, + { + "epoch": 0.39, + "learning_rate": 0.0013910862188712782, + "loss": 1.6504, + "step": 4395 + }, + { + "epoch": 0.39, + "learning_rate": 0.0013908210116367575, + "loss": 1.5312, + "step": 4396 + }, + { + "epoch": 0.39, + "learning_rate": 0.001390555771954409, + "loss": 1.5039, + "step": 4397 + }, + { + "epoch": 0.39, + "learning_rate": 0.0013902904998462546, + "loss": 1.6758, + "step": 4398 + }, + { + "epoch": 0.39, + "learning_rate": 0.0013900251953343186, + "loss": 1.4824, + "step": 4399 + }, + { + "epoch": 0.39, + "learning_rate": 0.0013897598584406278, + "loss": 1.5918, + "step": 4400 + }, + { + "epoch": 0.39, + "learning_rate": 0.0013894944891872112, + "loss": 1.6328, + "step": 4401 + }, + { + "epoch": 0.39, + "learning_rate": 0.0013892290875961019, + "loss": 1.5332, + "step": 4402 + }, + { + "epoch": 0.39, + "learning_rate": 0.001388963653689334, + "loss": 1.7188, + "step": 4403 + }, + { + "epoch": 0.39, + "learning_rate": 0.0013886981874889457, + "loss": 1.6094, + "step": 4404 + }, + { + "epoch": 0.39, + "learning_rate": 0.001388432689016977, + "loss": 1.6426, + "step": 4405 + }, + { + "epoch": 0.39, + "learning_rate": 0.001388167158295471, + "loss": 1.6777, + "step": 4406 + }, + { + "epoch": 0.39, + "learning_rate": 0.0013879015953464728, + "loss": 1.5898, + "step": 4407 + }, + { + "epoch": 0.39, + "learning_rate": 0.0013876360001920315, + "loss": 1.5254, + "step": 4408 + }, + { + "epoch": 0.39, + "learning_rate": 0.0013873703728541976, + "loss": 1.6133, + "step": 4409 + }, + { + "epoch": 0.39, + "learning_rate": 0.0013871047133550247, + "loss": 1.7109, + "step": 4410 + }, + { + "epoch": 0.39, + "learning_rate": 0.0013868390217165697, + "loss": 1.6875, + "step": 4411 + }, + { + "epoch": 0.39, + "learning_rate": 0.0013865732979608907, + "loss": 1.4395, + "step": 4412 + }, + { + "epoch": 0.39, + "learning_rate": 0.0013863075421100502, + "loss": 1.6348, + "step": 4413 + }, + { + "epoch": 0.39, + "learning_rate": 0.001386041754186112, + "loss": 1.7031, + "step": 4414 + }, + { + "epoch": 0.39, + "learning_rate": 0.0013857759342111428, + "loss": 1.5938, + "step": 4415 + }, + { + "epoch": 0.39, + "learning_rate": 0.001385510082207213, + "loss": 1.8203, + "step": 4416 + }, + { + "epoch": 0.39, + "learning_rate": 0.0013852441981963945, + "loss": 1.5215, + "step": 4417 + }, + { + "epoch": 0.39, + "learning_rate": 0.0013849782822007626, + "loss": 1.6406, + "step": 4418 + }, + { + "epoch": 0.39, + "learning_rate": 0.0013847123342423941, + "loss": 1.6484, + "step": 4419 + }, + { + "epoch": 0.39, + "learning_rate": 0.0013844463543433703, + "loss": 1.6328, + "step": 4420 + }, + { + "epoch": 0.39, + "learning_rate": 0.0013841803425257732, + "loss": 1.5, + "step": 4421 + }, + { + "epoch": 0.39, + "learning_rate": 0.0013839142988116893, + "loss": 1.6699, + "step": 4422 + }, + { + "epoch": 0.39, + "learning_rate": 0.0013836482232232059, + "loss": 1.6699, + "step": 4423 + }, + { + "epoch": 0.39, + "learning_rate": 0.0013833821157824147, + "loss": 1.6543, + "step": 4424 + }, + { + "epoch": 0.39, + "learning_rate": 0.0013831159765114083, + "loss": 1.6738, + "step": 4425 + }, + { + "epoch": 0.39, + "learning_rate": 0.0013828498054322838, + "loss": 1.6309, + "step": 4426 + }, + { + "epoch": 0.39, + "learning_rate": 0.0013825836025671391, + "loss": 1.5898, + "step": 4427 + }, + { + "epoch": 0.39, + "learning_rate": 0.0013823173679380766, + "loss": 1.5059, + "step": 4428 + }, + { + "epoch": 0.39, + "learning_rate": 0.0013820511015671996, + "loss": 1.4844, + "step": 4429 + }, + { + "epoch": 0.39, + "learning_rate": 0.001381784803476615, + "loss": 1.5293, + "step": 4430 + }, + { + "epoch": 0.39, + "learning_rate": 0.0013815184736884322, + "loss": 1.6777, + "step": 4431 + }, + { + "epoch": 0.39, + "learning_rate": 0.0013812521122247633, + "loss": 1.7266, + "step": 4432 + }, + { + "epoch": 0.39, + "learning_rate": 0.0013809857191077227, + "loss": 1.6172, + "step": 4433 + }, + { + "epoch": 0.39, + "learning_rate": 0.0013807192943594278, + "loss": 1.584, + "step": 4434 + }, + { + "epoch": 0.39, + "learning_rate": 0.001380452838001998, + "loss": 1.6523, + "step": 4435 + }, + { + "epoch": 0.39, + "learning_rate": 0.0013801863500575568, + "loss": 1.668, + "step": 4436 + }, + { + "epoch": 0.39, + "learning_rate": 0.001379919830548228, + "loss": 1.5879, + "step": 4437 + }, + { + "epoch": 0.39, + "learning_rate": 0.0013796532794961402, + "loss": 1.5312, + "step": 4438 + }, + { + "epoch": 0.39, + "learning_rate": 0.0013793866969234233, + "loss": 1.502, + "step": 4439 + }, + { + "epoch": 0.39, + "learning_rate": 0.0013791200828522107, + "loss": 1.5977, + "step": 4440 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013788534373046376, + "loss": 1.666, + "step": 4441 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013785867603028424, + "loss": 1.5996, + "step": 4442 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013783200518689659, + "loss": 1.6367, + "step": 4443 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013780533120251513, + "loss": 1.6309, + "step": 4444 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013777865407935447, + "loss": 1.6836, + "step": 4445 + }, + { + "epoch": 0.4, + "learning_rate": 0.001377519738196295, + "loss": 1.5566, + "step": 4446 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013772529042555528, + "loss": 1.6152, + "step": 4447 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013769860389934727, + "loss": 1.6465, + "step": 4448 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013767191424322105, + "loss": 1.7598, + "step": 4449 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013764522145939258, + "loss": 1.4902, + "step": 4450 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013761852555007796, + "loss": 1.5488, + "step": 4451 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013759182651749365, + "loss": 1.6953, + "step": 4452 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013756512436385634, + "loss": 1.6758, + "step": 4453 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013753841909138293, + "loss": 1.7012, + "step": 4454 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013751171070229065, + "loss": 1.6055, + "step": 4455 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013748499919879697, + "loss": 1.8105, + "step": 4456 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013745828458311953, + "loss": 1.6387, + "step": 4457 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013743156685747642, + "loss": 1.5527, + "step": 4458 + }, + { + "epoch": 0.4, + "learning_rate": 0.001374048460240858, + "loss": 1.6895, + "step": 4459 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013737812208516615, + "loss": 1.5195, + "step": 4460 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013735139504293628, + "loss": 1.5488, + "step": 4461 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013732466489961516, + "loss": 1.6855, + "step": 4462 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013729793165742203, + "loss": 1.5488, + "step": 4463 + }, + { + "epoch": 0.4, + "learning_rate": 0.001372711953185765, + "loss": 1.6289, + "step": 4464 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013724445588529821, + "loss": 1.6875, + "step": 4465 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013721771335980732, + "loss": 1.666, + "step": 4466 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013719096774432405, + "loss": 1.6445, + "step": 4467 + }, + { + "epoch": 0.4, + "learning_rate": 0.00137164219041069, + "loss": 1.6211, + "step": 4468 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013713746725226292, + "loss": 1.6836, + "step": 4469 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013711071238012692, + "loss": 1.8164, + "step": 4470 + }, + { + "epoch": 0.4, + "learning_rate": 0.001370839544268823, + "loss": 1.5352, + "step": 4471 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013705719339475063, + "loss": 1.7129, + "step": 4472 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013703042928595373, + "loss": 1.6875, + "step": 4473 + }, + { + "epoch": 0.4, + "learning_rate": 0.001370036621027137, + "loss": 1.666, + "step": 4474 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013697689184725287, + "loss": 1.6309, + "step": 4475 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013695011852179384, + "loss": 1.7441, + "step": 4476 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013692334212855944, + "loss": 1.75, + "step": 4477 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013689656266977282, + "loss": 1.6797, + "step": 4478 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013686978014765727, + "loss": 1.6211, + "step": 4479 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013684299456443645, + "loss": 1.6523, + "step": 4480 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013681620592233426, + "loss": 1.6211, + "step": 4481 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013678941422357474, + "loss": 1.582, + "step": 4482 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013676261947038232, + "loss": 1.666, + "step": 4483 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013673582166498159, + "loss": 1.6035, + "step": 4484 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013670902080959748, + "loss": 1.5547, + "step": 4485 + }, + { + "epoch": 0.4, + "learning_rate": 0.001366822169064551, + "loss": 1.6367, + "step": 4486 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013665540995777986, + "loss": 1.6914, + "step": 4487 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013662859996579732, + "loss": 1.7168, + "step": 4488 + }, + { + "epoch": 0.4, + "learning_rate": 0.001366017869327335, + "loss": 1.7891, + "step": 4489 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013657497086081445, + "loss": 1.5215, + "step": 4490 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013654815175226658, + "loss": 1.5508, + "step": 4491 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013652132960931657, + "loss": 1.5293, + "step": 4492 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013649450443419134, + "loss": 1.5293, + "step": 4493 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013646767622911798, + "loss": 1.6758, + "step": 4494 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013644084499632395, + "loss": 1.6348, + "step": 4495 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013641401073803685, + "loss": 1.6914, + "step": 4496 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013638717345648467, + "loss": 1.5859, + "step": 4497 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013636033315389548, + "loss": 1.6094, + "step": 4498 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013633348983249779, + "loss": 1.5977, + "step": 4499 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013630664349452016, + "loss": 1.6289, + "step": 4500 + }, + { + "epoch": 0.4, + "learning_rate": 0.001362797941421916, + "loss": 1.7344, + "step": 4501 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013625294177774115, + "loss": 1.6719, + "step": 4502 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013622608640339834, + "loss": 1.5586, + "step": 4503 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013619922802139276, + "loss": 1.7012, + "step": 4504 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013617236663395437, + "loss": 1.6426, + "step": 4505 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013614550224331325, + "loss": 1.5762, + "step": 4506 + }, + { + "epoch": 0.4, + "learning_rate": 0.001361186348516999, + "loss": 1.6309, + "step": 4507 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013609176446134493, + "loss": 1.5762, + "step": 4508 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013606489107447928, + "loss": 1.6133, + "step": 4509 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013603801469333407, + "loss": 1.5098, + "step": 4510 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013601113532014076, + "loss": 1.7207, + "step": 4511 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013598425295713093, + "loss": 1.498, + "step": 4512 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013595736760653654, + "loss": 1.707, + "step": 4513 + }, + { + "epoch": 0.4, + "learning_rate": 0.001359304792705897, + "loss": 1.668, + "step": 4514 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013590358795152284, + "loss": 1.5859, + "step": 4515 + }, + { + "epoch": 0.4, + "learning_rate": 0.001358766936515686, + "loss": 1.6211, + "step": 4516 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013584979637295987, + "loss": 1.6602, + "step": 4517 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013582289611792976, + "loss": 1.6172, + "step": 4518 + }, + { + "epoch": 0.4, + "learning_rate": 0.001357959928887117, + "loss": 1.6289, + "step": 4519 + }, + { + "epoch": 0.4, + "learning_rate": 0.001357690866875393, + "loss": 1.6602, + "step": 4520 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013574217751664648, + "loss": 1.6133, + "step": 4521 + }, + { + "epoch": 0.4, + "learning_rate": 0.001357152653782673, + "loss": 1.6309, + "step": 4522 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013568835027463623, + "loss": 1.5977, + "step": 4523 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013566143220798777, + "loss": 1.6777, + "step": 4524 + }, + { + "epoch": 0.4, + "learning_rate": 0.001356345111805569, + "loss": 1.7305, + "step": 4525 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013560758719457864, + "loss": 1.666, + "step": 4526 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013558066025228844, + "loss": 1.6699, + "step": 4527 + }, + { + "epoch": 0.4, + "learning_rate": 0.001355537303559218, + "loss": 1.7285, + "step": 4528 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013552679750771467, + "loss": 1.5703, + "step": 4529 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013549986170990308, + "loss": 1.6348, + "step": 4530 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013547292296472336, + "loss": 1.6055, + "step": 4531 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013544598127441217, + "loss": 1.5898, + "step": 4532 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013541903664120625, + "loss": 1.5527, + "step": 4533 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013539208906734271, + "loss": 1.6074, + "step": 4534 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013536513855505888, + "loss": 1.5391, + "step": 4535 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013533818510659229, + "loss": 1.6816, + "step": 4536 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013531122872418077, + "loss": 1.6191, + "step": 4537 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013528426941006233, + "loss": 1.6172, + "step": 4538 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013525730716647533, + "loss": 1.6035, + "step": 4539 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013523034199565823, + "loss": 1.5273, + "step": 4540 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013520337389984986, + "loss": 1.5957, + "step": 4541 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013517640288128918, + "loss": 1.7578, + "step": 4542 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013514942894221553, + "loss": 1.6289, + "step": 4543 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013512245208486837, + "loss": 1.6777, + "step": 4544 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013509547231148748, + "loss": 1.5137, + "step": 4545 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013506848962431277, + "loss": 1.4863, + "step": 4546 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013504150402558457, + "loss": 1.5664, + "step": 4547 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013501451551754327, + "loss": 1.6543, + "step": 4548 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013498752410242966, + "loss": 1.6348, + "step": 4549 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013496052978248466, + "loss": 1.5938, + "step": 4550 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013493353255994945, + "loss": 1.627, + "step": 4551 + }, + { + "epoch": 0.4, + "learning_rate": 0.0013490653243706549, + "loss": 1.5586, + "step": 4552 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013487952941607449, + "loss": 1.6172, + "step": 4553 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013485252349921828, + "loss": 1.7617, + "step": 4554 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013482551468873912, + "loss": 1.8828, + "step": 4555 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013479850298687933, + "loss": 1.5879, + "step": 4556 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013477148839588162, + "loss": 1.5117, + "step": 4557 + }, + { + "epoch": 0.41, + "learning_rate": 0.001347444709179888, + "loss": 1.5625, + "step": 4558 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013471745055544406, + "loss": 1.5332, + "step": 4559 + }, + { + "epoch": 0.41, + "learning_rate": 0.001346904273104907, + "loss": 1.5566, + "step": 4560 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013466340118537237, + "loss": 1.6719, + "step": 4561 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013463637218233287, + "loss": 1.6016, + "step": 4562 + }, + { + "epoch": 0.41, + "learning_rate": 0.001346093403036163, + "loss": 1.5625, + "step": 4563 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013458230555146696, + "loss": 1.4902, + "step": 4564 + }, + { + "epoch": 0.41, + "learning_rate": 0.001345552679281294, + "loss": 1.6348, + "step": 4565 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013452822743584844, + "loss": 1.6367, + "step": 4566 + }, + { + "epoch": 0.41, + "learning_rate": 0.001345011840768691, + "loss": 1.6543, + "step": 4567 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013447413785343664, + "loss": 1.5488, + "step": 4568 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013444708876779656, + "loss": 1.6016, + "step": 4569 + }, + { + "epoch": 0.41, + "learning_rate": 0.001344200368221946, + "loss": 1.6367, + "step": 4570 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013439298201887678, + "loss": 1.6289, + "step": 4571 + }, + { + "epoch": 0.41, + "learning_rate": 0.001343659243600893, + "loss": 1.7031, + "step": 4572 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013433886384807859, + "loss": 1.6328, + "step": 4573 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013431180048509136, + "loss": 1.5938, + "step": 4574 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013428473427337456, + "loss": 1.6797, + "step": 4575 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013425766521517532, + "loss": 1.6543, + "step": 4576 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013423059331274109, + "loss": 1.5781, + "step": 4577 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013420351856831946, + "loss": 1.6504, + "step": 4578 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013417644098415833, + "loss": 1.7051, + "step": 4579 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013414936056250582, + "loss": 1.623, + "step": 4580 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013412227730561024, + "loss": 1.5879, + "step": 4581 + }, + { + "epoch": 0.41, + "learning_rate": 0.001340951912157202, + "loss": 1.6797, + "step": 4582 + }, + { + "epoch": 0.41, + "learning_rate": 0.001340681022950845, + "loss": 1.6895, + "step": 4583 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013404101054595224, + "loss": 1.6094, + "step": 4584 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013401391597057262, + "loss": 1.7051, + "step": 4585 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013398681857119525, + "loss": 1.5957, + "step": 4586 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013395971835006982, + "loss": 1.5801, + "step": 4587 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013393261530944636, + "loss": 1.7363, + "step": 4588 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013390550945157507, + "loss": 1.6133, + "step": 4589 + }, + { + "epoch": 0.41, + "learning_rate": 0.001338784007787064, + "loss": 1.5273, + "step": 4590 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013385128929309107, + "loss": 1.5957, + "step": 4591 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013382417499698, + "loss": 1.5098, + "step": 4592 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013379705789262433, + "loss": 1.707, + "step": 4593 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013376993798227548, + "loss": 1.6133, + "step": 4594 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013374281526818503, + "loss": 1.7422, + "step": 4595 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013371568975260489, + "loss": 1.582, + "step": 4596 + }, + { + "epoch": 0.41, + "learning_rate": 0.001336885614377871, + "loss": 1.6504, + "step": 4597 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013366143032598403, + "loss": 1.5312, + "step": 4598 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013363429641944818, + "loss": 1.4902, + "step": 4599 + }, + { + "epoch": 0.41, + "learning_rate": 0.001336071597204324, + "loss": 1.625, + "step": 4600 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013358002023118965, + "loss": 1.7324, + "step": 4601 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013355287795397324, + "loss": 1.5488, + "step": 4602 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013352573289103658, + "loss": 1.5723, + "step": 4603 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013349858504463344, + "loss": 1.5137, + "step": 4604 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013347143441701772, + "loss": 1.6035, + "step": 4605 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013344428101044366, + "loss": 1.6426, + "step": 4606 + }, + { + "epoch": 0.41, + "learning_rate": 0.001334171248271656, + "loss": 1.5176, + "step": 4607 + }, + { + "epoch": 0.41, + "learning_rate": 0.001333899658694382, + "loss": 1.543, + "step": 4608 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013336280413951635, + "loss": 1.5508, + "step": 4609 + }, + { + "epoch": 0.41, + "learning_rate": 0.001333356396396551, + "loss": 1.6016, + "step": 4610 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013330847237210978, + "loss": 1.6953, + "step": 4611 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013328130233913602, + "loss": 1.5098, + "step": 4612 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013325412954298952, + "loss": 1.6934, + "step": 4613 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013322695398592633, + "loss": 1.582, + "step": 4614 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013319977567020267, + "loss": 1.6094, + "step": 4615 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013317259459807507, + "loss": 1.627, + "step": 4616 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013314541077180018, + "loss": 1.6523, + "step": 4617 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013311822419363498, + "loss": 1.6289, + "step": 4618 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013309103486583657, + "loss": 1.6094, + "step": 4619 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013306384279066236, + "loss": 1.6523, + "step": 4620 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013303664797036999, + "loss": 1.6211, + "step": 4621 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013300945040721729, + "loss": 1.5527, + "step": 4622 + }, + { + "epoch": 0.41, + "learning_rate": 0.001329822501034623, + "loss": 1.5879, + "step": 4623 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013295504706136337, + "loss": 1.625, + "step": 4624 + }, + { + "epoch": 0.41, + "learning_rate": 0.00132927841283179, + "loss": 1.5957, + "step": 4625 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013290063277116796, + "loss": 1.5762, + "step": 4626 + }, + { + "epoch": 0.41, + "learning_rate": 0.001328734215275892, + "loss": 1.7324, + "step": 4627 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013284620755470195, + "loss": 1.7598, + "step": 4628 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013281899085476564, + "loss": 1.5332, + "step": 4629 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013279177143003993, + "loss": 1.5684, + "step": 4630 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013276454928278472, + "loss": 1.5918, + "step": 4631 + }, + { + "epoch": 0.41, + "learning_rate": 0.001327373244152601, + "loss": 1.4395, + "step": 4632 + }, + { + "epoch": 0.41, + "learning_rate": 0.001327100968297264, + "loss": 1.5723, + "step": 4633 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013268286652844425, + "loss": 1.6289, + "step": 4634 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013265563351367434, + "loss": 1.6016, + "step": 4635 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013262839778767777, + "loss": 1.5801, + "step": 4636 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013260115935271573, + "loss": 1.6562, + "step": 4637 + }, + { + "epoch": 0.41, + "learning_rate": 0.001325739182110497, + "loss": 1.5859, + "step": 4638 + }, + { + "epoch": 0.41, + "learning_rate": 0.001325466743649414, + "loss": 1.6328, + "step": 4639 + }, + { + "epoch": 0.41, + "learning_rate": 0.001325194278166527, + "loss": 1.5898, + "step": 4640 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013249217856844573, + "loss": 1.6211, + "step": 4641 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013246492662258292, + "loss": 1.668, + "step": 4642 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013243767198132677, + "loss": 1.625, + "step": 4643 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013241041464694017, + "loss": 1.5, + "step": 4644 + }, + { + "epoch": 0.41, + "learning_rate": 0.001323831546216861, + "loss": 1.6758, + "step": 4645 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013235589190782784, + "loss": 1.8438, + "step": 4646 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013232862650762887, + "loss": 1.5684, + "step": 4647 + }, + { + "epoch": 0.41, + "learning_rate": 0.001323013584233529, + "loss": 1.7031, + "step": 4648 + }, + { + "epoch": 0.41, + "learning_rate": 0.001322740876572638, + "loss": 1.6738, + "step": 4649 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013224681421162583, + "loss": 1.5195, + "step": 4650 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013221953808870328, + "loss": 1.5723, + "step": 4651 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013219225929076075, + "loss": 1.584, + "step": 4652 + }, + { + "epoch": 0.41, + "learning_rate": 0.001321649778200631, + "loss": 1.666, + "step": 4653 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013213769367887527, + "loss": 1.4688, + "step": 4654 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013211040686946266, + "loss": 1.6074, + "step": 4655 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013208311739409063, + "loss": 1.6543, + "step": 4656 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013205582525502493, + "loss": 1.6621, + "step": 4657 + }, + { + "epoch": 0.41, + "learning_rate": 0.001320285304545315, + "loss": 1.5527, + "step": 4658 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013200123299487647, + "loss": 1.6777, + "step": 4659 + }, + { + "epoch": 0.41, + "learning_rate": 0.001319739328783262, + "loss": 1.5234, + "step": 4660 + }, + { + "epoch": 0.41, + "learning_rate": 0.001319466301071473, + "loss": 1.4531, + "step": 4661 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013191932468360652, + "loss": 1.6602, + "step": 4662 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013189201660997094, + "loss": 1.5859, + "step": 4663 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013186470588850779, + "loss": 1.5234, + "step": 4664 + }, + { + "epoch": 0.41, + "learning_rate": 0.0013183739252148453, + "loss": 1.6758, + "step": 4665 + }, + { + "epoch": 0.42, + "learning_rate": 0.0013181007651116888, + "loss": 1.7363, + "step": 4666 + }, + { + "epoch": 0.42, + "learning_rate": 0.001317827578598287, + "loss": 1.6309, + "step": 4667 + }, + { + "epoch": 0.42, + "learning_rate": 0.0013175543656973215, + "loss": 1.5176, + "step": 4668 + }, + { + "epoch": 0.42, + "learning_rate": 0.0013172811264314753, + "loss": 1.543, + "step": 4669 + }, + { + "epoch": 0.42, + "learning_rate": 0.0013170078608234347, + "loss": 1.6504, + "step": 4670 + }, + { + "epoch": 0.42, + "learning_rate": 0.0013167345688958867, + "loss": 1.7051, + "step": 4671 + }, + { + "epoch": 0.42, + "learning_rate": 0.0013164612506715216, + "loss": 1.5176, + "step": 4672 + }, + { + "epoch": 0.42, + "learning_rate": 0.0013161879061730321, + "loss": 1.6016, + "step": 4673 + }, + { + "epoch": 0.42, + "learning_rate": 0.0013159145354231122, + "loss": 1.709, + "step": 4674 + }, + { + "epoch": 0.42, + "learning_rate": 0.0013156411384444581, + "loss": 1.5098, + "step": 4675 + }, + { + "epoch": 0.42, + "learning_rate": 0.001315367715259769, + "loss": 1.4863, + "step": 4676 + }, + { + "epoch": 0.42, + "learning_rate": 0.0013150942658917458, + "loss": 1.5996, + "step": 4677 + }, + { + "epoch": 0.42, + "learning_rate": 0.0013148207903630911, + "loss": 1.6094, + "step": 4678 + }, + { + "epoch": 0.42, + "learning_rate": 0.0013145472886965108, + "loss": 1.6523, + "step": 4679 + }, + { + "epoch": 0.42, + "learning_rate": 0.0013142737609147118, + "loss": 1.5449, + "step": 4680 + }, + { + "epoch": 0.42, + "learning_rate": 0.0013140002070404038, + "loss": 1.6992, + "step": 4681 + }, + { + "epoch": 0.42, + "learning_rate": 0.0013137266270962986, + "loss": 1.5312, + "step": 4682 + }, + { + "epoch": 0.42, + "learning_rate": 0.0013134530211051103, + "loss": 1.5996, + "step": 4683 + }, + { + "epoch": 0.42, + "learning_rate": 0.0013131793890895547, + "loss": 1.6074, + "step": 4684 + }, + { + "epoch": 0.42, + "learning_rate": 0.00131290573107235, + "loss": 1.6543, + "step": 4685 + }, + { + "epoch": 0.42, + "learning_rate": 0.0013126320470762168, + "loss": 1.7188, + "step": 4686 + }, + { + "epoch": 0.42, + "learning_rate": 0.0013123583371238779, + "loss": 1.668, + "step": 4687 + }, + { + "epoch": 0.42, + "learning_rate": 0.0013120846012380572, + "loss": 1.6621, + "step": 4688 + }, + { + "epoch": 0.42, + "learning_rate": 0.0013118108394414823, + "loss": 1.7168, + "step": 4689 + }, + { + "epoch": 0.42, + "learning_rate": 0.0013115370517568822, + "loss": 1.6504, + "step": 4690 + }, + { + "epoch": 0.42, + "learning_rate": 0.0013112632382069876, + "loss": 1.4863, + "step": 4691 + }, + { + "epoch": 0.42, + "learning_rate": 0.0013109893988145318, + "loss": 1.6523, + "step": 4692 + }, + { + "epoch": 0.42, + "learning_rate": 0.001310715533602251, + "loss": 1.7324, + "step": 4693 + }, + { + "epoch": 0.42, + "learning_rate": 0.0013104416425928815, + "loss": 1.4746, + "step": 4694 + }, + { + "epoch": 0.42, + "learning_rate": 0.0013101677258091646, + "loss": 1.7129, + "step": 4695 + }, + { + "epoch": 0.42, + "learning_rate": 0.0013098937832738409, + "loss": 1.666, + "step": 4696 + }, + { + "epoch": 0.42, + "learning_rate": 0.0013096198150096552, + "loss": 1.5957, + "step": 4697 + }, + { + "epoch": 0.42, + "learning_rate": 0.0013093458210393534, + "loss": 1.7168, + "step": 4698 + }, + { + "epoch": 0.42, + "learning_rate": 0.0013090718013856837, + "loss": 1.6875, + "step": 4699 + }, + { + "epoch": 0.42, + "learning_rate": 0.0013087977560713965, + "loss": 1.5508, + "step": 4700 + }, + { + "epoch": 0.42, + "learning_rate": 0.0013085236851192446, + "loss": 1.6484, + "step": 4701 + }, + { + "epoch": 0.42, + "learning_rate": 0.0013082495885519824, + "loss": 1.6211, + "step": 4702 + }, + { + "epoch": 0.42, + "learning_rate": 0.0013079754663923669, + "loss": 1.7246, + "step": 4703 + }, + { + "epoch": 0.42, + "learning_rate": 0.0013077013186631565, + "loss": 1.6895, + "step": 4704 + }, + { + "epoch": 0.42, + "learning_rate": 0.0013074271453871135, + "loss": 1.5957, + "step": 4705 + }, + { + "epoch": 0.42, + "learning_rate": 0.0013071529465869995, + "loss": 1.707, + "step": 4706 + }, + { + "epoch": 0.42, + "learning_rate": 0.0013068787222855812, + "loss": 1.6289, + "step": 4707 + }, + { + "epoch": 0.42, + "learning_rate": 0.001306604472505625, + "loss": 1.5312, + "step": 4708 + }, + { + "epoch": 0.42, + "learning_rate": 0.001306330197269901, + "loss": 1.707, + "step": 4709 + }, + { + "epoch": 0.42, + "learning_rate": 0.0013060558966011809, + "loss": 1.6113, + "step": 4710 + }, + { + "epoch": 0.42, + "learning_rate": 0.001305781570522238, + "loss": 1.6133, + "step": 4711 + }, + { + "epoch": 0.42, + "learning_rate": 0.0013055072190558481, + "loss": 1.5117, + "step": 4712 + }, + { + "epoch": 0.42, + "learning_rate": 0.0013052328422247898, + "loss": 1.5625, + "step": 4713 + }, + { + "epoch": 0.42, + "learning_rate": 0.0013049584400518426, + "loss": 1.6602, + "step": 4714 + }, + { + "epoch": 0.42, + "learning_rate": 0.0013046840125597891, + "loss": 1.5762, + "step": 4715 + }, + { + "epoch": 0.42, + "learning_rate": 0.0013044095597714131, + "loss": 1.7246, + "step": 4716 + }, + { + "epoch": 0.42, + "learning_rate": 0.0013041350817095015, + "loss": 1.6035, + "step": 4717 + }, + { + "epoch": 0.42, + "learning_rate": 0.0013038605783968426, + "loss": 1.6348, + "step": 4718 + }, + { + "epoch": 0.42, + "learning_rate": 0.0013035860498562266, + "loss": 1.6992, + "step": 4719 + }, + { + "epoch": 0.42, + "learning_rate": 0.0013033114961104467, + "loss": 1.5215, + "step": 4720 + }, + { + "epoch": 0.42, + "learning_rate": 0.0013030369171822974, + "loss": 1.6152, + "step": 4721 + }, + { + "epoch": 0.42, + "learning_rate": 0.0013027623130945755, + "loss": 1.5566, + "step": 4722 + }, + { + "epoch": 0.42, + "learning_rate": 0.0013024876838700803, + "loss": 1.7344, + "step": 4723 + }, + { + "epoch": 0.42, + "learning_rate": 0.0013022130295316124, + "loss": 1.6543, + "step": 4724 + }, + { + "epoch": 0.42, + "learning_rate": 0.001301938350101975, + "loss": 1.6797, + "step": 4725 + }, + { + "epoch": 0.42, + "learning_rate": 0.0013016636456039735, + "loss": 1.7559, + "step": 4726 + }, + { + "epoch": 0.42, + "learning_rate": 0.0013013889160604151, + "loss": 1.5625, + "step": 4727 + }, + { + "epoch": 0.42, + "learning_rate": 0.0013011141614941088, + "loss": 1.4375, + "step": 4728 + }, + { + "epoch": 0.42, + "learning_rate": 0.001300839381927867, + "loss": 1.5098, + "step": 4729 + }, + { + "epoch": 0.42, + "learning_rate": 0.0013005645773845023, + "loss": 1.6055, + "step": 4730 + }, + { + "epoch": 0.42, + "learning_rate": 0.0013002897478868304, + "loss": 1.6445, + "step": 4731 + }, + { + "epoch": 0.42, + "learning_rate": 0.0013000148934576691, + "loss": 1.6094, + "step": 4732 + }, + { + "epoch": 0.42, + "learning_rate": 0.0012997400141198385, + "loss": 1.6055, + "step": 4733 + }, + { + "epoch": 0.42, + "learning_rate": 0.0012994651098961597, + "loss": 1.7383, + "step": 4734 + }, + { + "epoch": 0.42, + "learning_rate": 0.001299190180809457, + "loss": 1.6875, + "step": 4735 + }, + { + "epoch": 0.42, + "learning_rate": 0.0012989152268825565, + "loss": 1.4668, + "step": 4736 + }, + { + "epoch": 0.42, + "learning_rate": 0.0012986402481382855, + "loss": 1.7188, + "step": 4737 + }, + { + "epoch": 0.42, + "learning_rate": 0.0012983652445994747, + "loss": 1.5977, + "step": 4738 + }, + { + "epoch": 0.42, + "learning_rate": 0.001298090216288956, + "loss": 1.6934, + "step": 4739 + }, + { + "epoch": 0.42, + "learning_rate": 0.0012978151632295635, + "loss": 1.541, + "step": 4740 + }, + { + "epoch": 0.42, + "learning_rate": 0.0012975400854441333, + "loss": 1.582, + "step": 4741 + }, + { + "epoch": 0.42, + "learning_rate": 0.0012972649829555041, + "loss": 1.5664, + "step": 4742 + }, + { + "epoch": 0.42, + "learning_rate": 0.0012969898557865156, + "loss": 1.4961, + "step": 4743 + }, + { + "epoch": 0.42, + "learning_rate": 0.001296714703960011, + "loss": 1.709, + "step": 4744 + }, + { + "epoch": 0.42, + "learning_rate": 0.0012964395274988338, + "loss": 1.6309, + "step": 4745 + }, + { + "epoch": 0.42, + "learning_rate": 0.001296164326425831, + "loss": 1.5586, + "step": 4746 + }, + { + "epoch": 0.42, + "learning_rate": 0.0012958891007638514, + "loss": 1.5938, + "step": 4747 + }, + { + "epoch": 0.42, + "learning_rate": 0.0012956138505357448, + "loss": 1.6992, + "step": 4748 + }, + { + "epoch": 0.42, + "learning_rate": 0.0012953385757643644, + "loss": 1.5254, + "step": 4749 + }, + { + "epoch": 0.42, + "learning_rate": 0.0012950632764725644, + "loss": 1.7383, + "step": 4750 + }, + { + "epoch": 0.42, + "learning_rate": 0.0012947879526832016, + "loss": 1.666, + "step": 4751 + }, + { + "epoch": 0.42, + "learning_rate": 0.001294512604419135, + "loss": 1.7969, + "step": 4752 + }, + { + "epoch": 0.42, + "learning_rate": 0.001294237231703225, + "loss": 1.5586, + "step": 4753 + }, + { + "epoch": 0.42, + "learning_rate": 0.0012939618345583345, + "loss": 1.5059, + "step": 4754 + }, + { + "epoch": 0.42, + "learning_rate": 0.0012936864130073276, + "loss": 1.5117, + "step": 4755 + }, + { + "epoch": 0.42, + "learning_rate": 0.0012934109670730723, + "loss": 1.6602, + "step": 4756 + }, + { + "epoch": 0.42, + "learning_rate": 0.0012931354967784365, + "loss": 1.6289, + "step": 4757 + }, + { + "epoch": 0.42, + "learning_rate": 0.0012928600021462919, + "loss": 1.6816, + "step": 4758 + }, + { + "epoch": 0.42, + "learning_rate": 0.0012925844831995106, + "loss": 1.5645, + "step": 4759 + }, + { + "epoch": 0.42, + "learning_rate": 0.001292308939960968, + "loss": 1.5996, + "step": 4760 + }, + { + "epoch": 0.42, + "learning_rate": 0.0012920333724535404, + "loss": 1.5605, + "step": 4761 + }, + { + "epoch": 0.42, + "learning_rate": 0.0012917577807001078, + "loss": 1.4863, + "step": 4762 + }, + { + "epoch": 0.42, + "learning_rate": 0.00129148216472355, + "loss": 1.6543, + "step": 4763 + }, + { + "epoch": 0.42, + "learning_rate": 0.0012912065245467505, + "loss": 1.6387, + "step": 4764 + }, + { + "epoch": 0.42, + "learning_rate": 0.0012909308601925943, + "loss": 1.5781, + "step": 4765 + }, + { + "epoch": 0.42, + "learning_rate": 0.0012906551716839682, + "loss": 1.6328, + "step": 4766 + }, + { + "epoch": 0.42, + "learning_rate": 0.0012903794590437615, + "loss": 1.7168, + "step": 4767 + }, + { + "epoch": 0.42, + "learning_rate": 0.0012901037222948649, + "loss": 1.6973, + "step": 4768 + }, + { + "epoch": 0.42, + "learning_rate": 0.0012898279614601714, + "loss": 1.6934, + "step": 4769 + }, + { + "epoch": 0.42, + "learning_rate": 0.001289552176562576, + "loss": 1.6348, + "step": 4770 + }, + { + "epoch": 0.42, + "learning_rate": 0.0012892763676249754, + "loss": 1.6074, + "step": 4771 + }, + { + "epoch": 0.42, + "learning_rate": 0.0012890005346702692, + "loss": 1.5176, + "step": 4772 + }, + { + "epoch": 0.42, + "learning_rate": 0.001288724677721358, + "loss": 1.6211, + "step": 4773 + }, + { + "epoch": 0.42, + "learning_rate": 0.0012884487968011444, + "loss": 1.6543, + "step": 4774 + }, + { + "epoch": 0.42, + "learning_rate": 0.001288172891932534, + "loss": 1.582, + "step": 4775 + }, + { + "epoch": 0.42, + "learning_rate": 0.0012878969631384338, + "loss": 1.666, + "step": 4776 + }, + { + "epoch": 0.42, + "learning_rate": 0.0012876210104417521, + "loss": 1.6406, + "step": 4777 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012873450338654004, + "loss": 1.6777, + "step": 4778 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012870690334322909, + "loss": 1.6523, + "step": 4779 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012867930091653391, + "loss": 1.5605, + "step": 4780 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012865169610874615, + "loss": 1.5703, + "step": 4781 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012862408892215774, + "loss": 1.7129, + "step": 4782 + }, + { + "epoch": 0.43, + "learning_rate": 0.001285964793590607, + "loss": 1.7246, + "step": 4783 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012856886742174734, + "loss": 1.5645, + "step": 4784 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012854125311251012, + "loss": 1.625, + "step": 4785 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012851363643364175, + "loss": 1.6289, + "step": 4786 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012848601738743503, + "loss": 1.6055, + "step": 4787 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012845839597618312, + "loss": 1.6953, + "step": 4788 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012843077220217923, + "loss": 1.6836, + "step": 4789 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012840314606771678, + "loss": 1.5977, + "step": 4790 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012837551757508944, + "loss": 1.75, + "step": 4791 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012834788672659112, + "loss": 1.5742, + "step": 4792 + }, + { + "epoch": 0.43, + "learning_rate": 0.001283202535245158, + "loss": 1.5391, + "step": 4793 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012829261797115779, + "loss": 1.582, + "step": 4794 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012826498006881142, + "loss": 1.5156, + "step": 4795 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012823733981977142, + "loss": 1.6758, + "step": 4796 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012820969722633256, + "loss": 1.6543, + "step": 4797 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012818205229078988, + "loss": 1.5137, + "step": 4798 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012815440501543858, + "loss": 1.6602, + "step": 4799 + }, + { + "epoch": 0.43, + "learning_rate": 0.001281267554025741, + "loss": 1.5547, + "step": 4800 + }, + { + "epoch": 0.43, + "learning_rate": 0.00128099103454492, + "loss": 1.6465, + "step": 4801 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012807144917348812, + "loss": 1.6543, + "step": 4802 + }, + { + "epoch": 0.43, + "learning_rate": 0.001280437925618584, + "loss": 1.5156, + "step": 4803 + }, + { + "epoch": 0.43, + "learning_rate": 0.001280161336218991, + "loss": 1.6445, + "step": 4804 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012798847235590649, + "loss": 1.7754, + "step": 4805 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012796080876617726, + "loss": 1.6641, + "step": 4806 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012793314285500807, + "loss": 1.582, + "step": 4807 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012790547462469598, + "loss": 1.6836, + "step": 4808 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012787780407753804, + "loss": 1.6055, + "step": 4809 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012785013121583162, + "loss": 1.5977, + "step": 4810 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012782245604187428, + "loss": 1.625, + "step": 4811 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012779477855796376, + "loss": 1.7539, + "step": 4812 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012776709876639789, + "loss": 1.6152, + "step": 4813 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012773941666947486, + "loss": 1.5, + "step": 4814 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012771173226949295, + "loss": 1.5605, + "step": 4815 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012768404556875064, + "loss": 1.6602, + "step": 4816 + }, + { + "epoch": 0.43, + "learning_rate": 0.001276563565695466, + "loss": 1.7383, + "step": 4817 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012762866527417974, + "loss": 1.6582, + "step": 4818 + }, + { + "epoch": 0.43, + "learning_rate": 0.001276009716849491, + "loss": 1.668, + "step": 4819 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012757327580415394, + "loss": 1.5898, + "step": 4820 + }, + { + "epoch": 0.43, + "learning_rate": 0.001275455776340937, + "loss": 1.5195, + "step": 4821 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012751787717706802, + "loss": 1.5938, + "step": 4822 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012749017443537674, + "loss": 1.5898, + "step": 4823 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012746246941131982, + "loss": 1.6582, + "step": 4824 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012743476210719754, + "loss": 1.6152, + "step": 4825 + }, + { + "epoch": 0.43, + "learning_rate": 0.001274070525253102, + "loss": 1.6875, + "step": 4826 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012737934066795846, + "loss": 1.8066, + "step": 4827 + }, + { + "epoch": 0.43, + "learning_rate": 0.001273516265374431, + "loss": 1.5547, + "step": 4828 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012732391013606501, + "loss": 1.6055, + "step": 4829 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012729619146612536, + "loss": 1.5996, + "step": 4830 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012726847052992555, + "loss": 1.4922, + "step": 4831 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012724074732976699, + "loss": 1.6484, + "step": 4832 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012721302186795148, + "loss": 1.6367, + "step": 4833 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012718529414678088, + "loss": 1.584, + "step": 4834 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012715756416855732, + "loss": 1.5801, + "step": 4835 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012712983193558303, + "loss": 1.6641, + "step": 4836 + }, + { + "epoch": 0.43, + "learning_rate": 0.001271020974501605, + "loss": 1.6465, + "step": 4837 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012707436071459238, + "loss": 1.582, + "step": 4838 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012704662173118149, + "loss": 1.6465, + "step": 4839 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012701888050223086, + "loss": 1.6289, + "step": 4840 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012699113703004369, + "loss": 1.5449, + "step": 4841 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012696339131692339, + "loss": 1.6133, + "step": 4842 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012693564336517353, + "loss": 1.7168, + "step": 4843 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012690789317709788, + "loss": 1.6973, + "step": 4844 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012688014075500043, + "loss": 1.627, + "step": 4845 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012685238610118527, + "loss": 1.5605, + "step": 4846 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012682462921795672, + "loss": 1.5977, + "step": 4847 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012679687010761934, + "loss": 1.7188, + "step": 4848 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012676910877247778, + "loss": 1.6387, + "step": 4849 + }, + { + "epoch": 0.43, + "learning_rate": 0.001267413452148369, + "loss": 1.6562, + "step": 4850 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012671357943700183, + "loss": 1.5898, + "step": 4851 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012668581144127777, + "loss": 1.5977, + "step": 4852 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012665804122997018, + "loss": 1.5254, + "step": 4853 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012663026880538465, + "loss": 1.6973, + "step": 4854 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012660249416982699, + "loss": 1.5879, + "step": 4855 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012657471732560317, + "loss": 1.6328, + "step": 4856 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012654693827501937, + "loss": 1.6816, + "step": 4857 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012651915702038194, + "loss": 1.7266, + "step": 4858 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012649137356399742, + "loss": 1.6562, + "step": 4859 + }, + { + "epoch": 0.43, + "learning_rate": 0.001264635879081725, + "loss": 1.5469, + "step": 4860 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012643580005521412, + "loss": 1.5996, + "step": 4861 + }, + { + "epoch": 0.43, + "learning_rate": 0.001264080100074293, + "loss": 1.7285, + "step": 4862 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012638021776712536, + "loss": 1.6797, + "step": 4863 + }, + { + "epoch": 0.43, + "learning_rate": 0.001263524233366097, + "loss": 1.6582, + "step": 4864 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012632462671818999, + "loss": 1.5723, + "step": 4865 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012629682791417398, + "loss": 1.5566, + "step": 4866 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012626902692686975, + "loss": 1.582, + "step": 4867 + }, + { + "epoch": 0.43, + "learning_rate": 0.001262412237585854, + "loss": 1.5801, + "step": 4868 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012621341841162928, + "loss": 1.6543, + "step": 4869 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012618561088830994, + "loss": 1.7227, + "step": 4870 + }, + { + "epoch": 0.43, + "learning_rate": 0.001261578011909361, + "loss": 1.7227, + "step": 4871 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012612998932181663, + "loss": 1.4922, + "step": 4872 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012610217528326061, + "loss": 1.5762, + "step": 4873 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012607435907757733, + "loss": 1.5371, + "step": 4874 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012604654070707622, + "loss": 1.7305, + "step": 4875 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012601872017406683, + "loss": 1.6465, + "step": 4876 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012599089748085903, + "loss": 1.666, + "step": 4877 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012596307262976274, + "loss": 1.6309, + "step": 4878 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012593524562308817, + "loss": 1.5527, + "step": 4879 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012590741646314556, + "loss": 1.6133, + "step": 4880 + }, + { + "epoch": 0.43, + "learning_rate": 0.001258795851522455, + "loss": 1.6074, + "step": 4881 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012585175169269865, + "loss": 1.582, + "step": 4882 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012582391608681588, + "loss": 1.7422, + "step": 4883 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012579607833690823, + "loss": 1.6191, + "step": 4884 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012576823844528693, + "loss": 1.6445, + "step": 4885 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012574039641426338, + "loss": 1.5859, + "step": 4886 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012571255224614916, + "loss": 1.5293, + "step": 4887 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012568470594325602, + "loss": 1.7617, + "step": 4888 + }, + { + "epoch": 0.43, + "learning_rate": 0.0012565685750789589, + "loss": 1.6309, + "step": 4889 + }, + { + "epoch": 0.44, + "learning_rate": 0.001256290069423809, + "loss": 1.6484, + "step": 4890 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012560115424902329, + "loss": 1.4746, + "step": 4891 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012557329943013555, + "loss": 1.4961, + "step": 4892 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012554544248803037, + "loss": 1.5527, + "step": 4893 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012551758342502048, + "loss": 1.5801, + "step": 4894 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012548972224341894, + "loss": 1.6426, + "step": 4895 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012546185894553887, + "loss": 1.625, + "step": 4896 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012543399353369367, + "loss": 1.5449, + "step": 4897 + }, + { + "epoch": 0.44, + "learning_rate": 0.001254061260101968, + "loss": 1.6387, + "step": 4898 + }, + { + "epoch": 0.44, + "learning_rate": 0.00125378256377362, + "loss": 1.6523, + "step": 4899 + }, + { + "epoch": 0.44, + "learning_rate": 0.001253503846375031, + "loss": 1.6289, + "step": 4900 + }, + { + "epoch": 0.44, + "learning_rate": 0.001253225107929342, + "loss": 1.5586, + "step": 4901 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012529463484596945, + "loss": 1.584, + "step": 4902 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012526675679892327, + "loss": 1.6523, + "step": 4903 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012523887665411028, + "loss": 1.625, + "step": 4904 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012521099441384516, + "loss": 1.6406, + "step": 4905 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012518311008044285, + "loss": 1.6055, + "step": 4906 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012515522365621847, + "loss": 1.5527, + "step": 4907 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012512733514348723, + "loss": 1.5547, + "step": 4908 + }, + { + "epoch": 0.44, + "learning_rate": 0.001250994445445646, + "loss": 1.5801, + "step": 4909 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012507155186176617, + "loss": 1.5156, + "step": 4910 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012504365709740772, + "loss": 1.6875, + "step": 4911 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012501576025380527, + "loss": 1.7695, + "step": 4912 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012498786133327484, + "loss": 1.6191, + "step": 4913 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012495996033813283, + "loss": 1.8145, + "step": 4914 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012493205727069565, + "loss": 1.5254, + "step": 4915 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012490415213328, + "loss": 1.5547, + "step": 4916 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012487624492820266, + "loss": 1.6543, + "step": 4917 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012484833565778064, + "loss": 1.5449, + "step": 4918 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012482042432433106, + "loss": 1.6914, + "step": 4919 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012479251093017135, + "loss": 1.6816, + "step": 4920 + }, + { + "epoch": 0.44, + "learning_rate": 0.001247645954776189, + "loss": 1.5742, + "step": 4921 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012473667796899144, + "loss": 1.6816, + "step": 4922 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012470875840660679, + "loss": 1.5293, + "step": 4923 + }, + { + "epoch": 0.44, + "learning_rate": 0.00124680836792783, + "loss": 1.6152, + "step": 4924 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012465291312983822, + "loss": 1.5898, + "step": 4925 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012462498742009083, + "loss": 1.5996, + "step": 4926 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012459705966585937, + "loss": 1.707, + "step": 4927 + }, + { + "epoch": 0.44, + "learning_rate": 0.001245691298694625, + "loss": 1.5645, + "step": 4928 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012454119803321907, + "loss": 1.5156, + "step": 4929 + }, + { + "epoch": 0.44, + "learning_rate": 0.001245132641594482, + "loss": 1.6406, + "step": 4930 + }, + { + "epoch": 0.44, + "learning_rate": 0.00124485328250469, + "loss": 1.5957, + "step": 4931 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012445739030860089, + "loss": 1.5176, + "step": 4932 + }, + { + "epoch": 0.44, + "learning_rate": 0.001244294503361634, + "loss": 1.6113, + "step": 4933 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012440150833547623, + "loss": 1.5879, + "step": 4934 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012437356430885927, + "loss": 1.5762, + "step": 4935 + }, + { + "epoch": 0.44, + "learning_rate": 0.001243456182586326, + "loss": 1.6699, + "step": 4936 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012431767018711635, + "loss": 1.6504, + "step": 4937 + }, + { + "epoch": 0.44, + "learning_rate": 0.00124289720096631, + "loss": 1.541, + "step": 4938 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012426176798949703, + "loss": 1.6387, + "step": 4939 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012423381386803519, + "loss": 1.7227, + "step": 4940 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012420585773456633, + "loss": 1.7871, + "step": 4941 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012417789959141157, + "loss": 1.623, + "step": 4942 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012414993944089205, + "loss": 1.707, + "step": 4943 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012412197728532924, + "loss": 1.6738, + "step": 4944 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012409401312704457, + "loss": 1.6797, + "step": 4945 + }, + { + "epoch": 0.44, + "learning_rate": 0.001240660469683599, + "loss": 1.7305, + "step": 4946 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012403807881159704, + "loss": 1.7148, + "step": 4947 + }, + { + "epoch": 0.44, + "learning_rate": 0.00124010108659078, + "loss": 1.752, + "step": 4948 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012398213651312506, + "loss": 1.6934, + "step": 4949 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012395416237606061, + "loss": 1.7383, + "step": 4950 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012392618625020712, + "loss": 1.5254, + "step": 4951 + }, + { + "epoch": 0.44, + "learning_rate": 0.001238982081378874, + "loss": 1.6816, + "step": 4952 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012387022804142423, + "loss": 1.6914, + "step": 4953 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012384224596314072, + "loss": 1.6602, + "step": 4954 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012381426190536005, + "loss": 1.5684, + "step": 4955 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012378627587040561, + "loss": 1.6543, + "step": 4956 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012375828786060088, + "loss": 1.7461, + "step": 4957 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012373029787826964, + "loss": 1.5234, + "step": 4958 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012370230592573567, + "loss": 1.4824, + "step": 4959 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012367431200532306, + "loss": 1.502, + "step": 4960 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012364631611935595, + "loss": 1.6348, + "step": 4961 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012361831827015874, + "loss": 1.5859, + "step": 4962 + }, + { + "epoch": 0.44, + "learning_rate": 0.001235903184600559, + "loss": 1.7012, + "step": 4963 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012356231669137217, + "loss": 1.6191, + "step": 4964 + }, + { + "epoch": 0.44, + "learning_rate": 0.001235343129664323, + "loss": 1.6621, + "step": 4965 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012350630728756137, + "loss": 1.6484, + "step": 4966 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012347829965708452, + "loss": 1.5039, + "step": 4967 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012345029007732706, + "loss": 1.6855, + "step": 4968 + }, + { + "epoch": 0.44, + "learning_rate": 0.001234222785506145, + "loss": 1.6621, + "step": 4969 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012339426507927249, + "loss": 1.5742, + "step": 4970 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012336624966562681, + "loss": 1.7012, + "step": 4971 + }, + { + "epoch": 0.44, + "learning_rate": 0.001233382323120035, + "loss": 1.6406, + "step": 4972 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012331021302072862, + "loss": 1.6641, + "step": 4973 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012328219179412853, + "loss": 1.5762, + "step": 4974 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012325416863452963, + "loss": 1.7305, + "step": 4975 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012322614354425858, + "loss": 1.6426, + "step": 4976 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012319811652564212, + "loss": 1.752, + "step": 4977 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012317008758100723, + "loss": 1.6953, + "step": 4978 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012314205671268097, + "loss": 1.5645, + "step": 4979 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012311402392299062, + "loss": 1.5273, + "step": 4980 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012308598921426358, + "loss": 1.6055, + "step": 4981 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012305795258882744, + "loss": 1.5918, + "step": 4982 + }, + { + "epoch": 0.44, + "learning_rate": 0.001230299140490099, + "loss": 1.625, + "step": 4983 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012300187359713891, + "loss": 1.5879, + "step": 4984 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012297383123554247, + "loss": 1.5117, + "step": 4985 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012294578696654883, + "loss": 1.582, + "step": 4986 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012291774079248635, + "loss": 1.6797, + "step": 4987 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012288969271568354, + "loss": 1.7305, + "step": 4988 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012286164273846909, + "loss": 1.5801, + "step": 4989 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012283359086317184, + "loss": 1.5723, + "step": 4990 + }, + { + "epoch": 0.44, + "learning_rate": 0.001228055370921208, + "loss": 1.5898, + "step": 4991 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012277748142764512, + "loss": 1.6543, + "step": 4992 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012274942387207415, + "loss": 1.6133, + "step": 4993 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012272136442773732, + "loss": 1.6309, + "step": 4994 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012269330309696428, + "loss": 1.6387, + "step": 4995 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012266523988208478, + "loss": 1.748, + "step": 4996 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012263717478542882, + "loss": 1.5938, + "step": 4997 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012260910780932646, + "loss": 1.4629, + "step": 4998 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012258103895610797, + "loss": 1.6348, + "step": 4999 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012255296822810374, + "loss": 1.6445, + "step": 5000 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012252489562764436, + "loss": 1.6387, + "step": 5001 + }, + { + "epoch": 0.44, + "learning_rate": 0.0012249682115706055, + "loss": 1.6621, + "step": 5002 + }, + { + "epoch": 0.45, + "learning_rate": 0.0012246874481868317, + "loss": 1.6602, + "step": 5003 + }, + { + "epoch": 0.45, + "learning_rate": 0.0012244066661484326, + "loss": 1.7109, + "step": 5004 + }, + { + "epoch": 0.45, + "learning_rate": 0.0012241258654787201, + "loss": 1.5371, + "step": 5005 + }, + { + "epoch": 0.45, + "learning_rate": 0.0012238450462010077, + "loss": 1.6211, + "step": 5006 + }, + { + "epoch": 0.45, + "learning_rate": 0.0012235642083386104, + "loss": 1.5723, + "step": 5007 + }, + { + "epoch": 0.45, + "learning_rate": 0.001223283351914844, + "loss": 1.6113, + "step": 5008 + }, + { + "epoch": 0.45, + "learning_rate": 0.0012230024769530274, + "loss": 1.5684, + "step": 5009 + }, + { + "epoch": 0.45, + "learning_rate": 0.0012227215834764798, + "loss": 1.6465, + "step": 5010 + }, + { + "epoch": 0.45, + "learning_rate": 0.0012224406715085224, + "loss": 1.4805, + "step": 5011 + }, + { + "epoch": 0.45, + "learning_rate": 0.0012221597410724777, + "loss": 1.5996, + "step": 5012 + }, + { + "epoch": 0.45, + "learning_rate": 0.0012218787921916701, + "loss": 1.707, + "step": 5013 + }, + { + "epoch": 0.45, + "learning_rate": 0.0012215978248894253, + "loss": 1.5137, + "step": 5014 + }, + { + "epoch": 0.45, + "learning_rate": 0.0012213168391890705, + "loss": 1.5859, + "step": 5015 + }, + { + "epoch": 0.45, + "learning_rate": 0.0012210358351139343, + "loss": 1.625, + "step": 5016 + }, + { + "epoch": 0.45, + "learning_rate": 0.0012207548126873474, + "loss": 1.6113, + "step": 5017 + }, + { + "epoch": 0.45, + "learning_rate": 0.001220473771932641, + "loss": 1.6406, + "step": 5018 + }, + { + "epoch": 0.45, + "learning_rate": 0.0012201927128731493, + "loss": 1.6816, + "step": 5019 + }, + { + "epoch": 0.45, + "learning_rate": 0.0012199116355322065, + "loss": 1.6035, + "step": 5020 + }, + { + "epoch": 0.45, + "learning_rate": 0.0012196305399331492, + "loss": 1.5918, + "step": 5021 + }, + { + "epoch": 0.45, + "learning_rate": 0.001219349426099315, + "loss": 1.5664, + "step": 5022 + }, + { + "epoch": 0.45, + "learning_rate": 0.0012190682940540442, + "loss": 1.6875, + "step": 5023 + }, + { + "epoch": 0.45, + "learning_rate": 0.0012187871438206763, + "loss": 1.5957, + "step": 5024 + }, + { + "epoch": 0.45, + "learning_rate": 0.0012185059754225552, + "loss": 1.5156, + "step": 5025 + }, + { + "epoch": 0.45, + "learning_rate": 0.0012182247888830242, + "loss": 1.5605, + "step": 5026 + }, + { + "epoch": 0.45, + "learning_rate": 0.0012179435842254284, + "loss": 1.5391, + "step": 5027 + }, + { + "epoch": 0.45, + "learning_rate": 0.0012176623614731155, + "loss": 1.5762, + "step": 5028 + }, + { + "epoch": 0.45, + "learning_rate": 0.0012173811206494334, + "loss": 1.6797, + "step": 5029 + }, + { + "epoch": 0.45, + "learning_rate": 0.001217099861777732, + "loss": 1.6309, + "step": 5030 + }, + { + "epoch": 0.45, + "learning_rate": 0.0012168185848813633, + "loss": 1.6094, + "step": 5031 + }, + { + "epoch": 0.45, + "learning_rate": 0.0012165372899836796, + "loss": 1.6973, + "step": 5032 + }, + { + "epoch": 0.45, + "learning_rate": 0.0012162559771080358, + "loss": 1.5508, + "step": 5033 + }, + { + "epoch": 0.45, + "learning_rate": 0.0012159746462777877, + "loss": 1.5684, + "step": 5034 + }, + { + "epoch": 0.45, + "learning_rate": 0.001215693297516293, + "loss": 1.6133, + "step": 5035 + }, + { + "epoch": 0.45, + "learning_rate": 0.0012154119308469098, + "loss": 1.5859, + "step": 5036 + }, + { + "epoch": 0.45, + "learning_rate": 0.0012151305462929996, + "loss": 1.5781, + "step": 5037 + }, + { + "epoch": 0.45, + "learning_rate": 0.0012148491438779233, + "loss": 1.5605, + "step": 5038 + }, + { + "epoch": 0.45, + "learning_rate": 0.001214567723625045, + "loss": 1.7832, + "step": 5039 + }, + { + "epoch": 0.45, + "learning_rate": 0.0012142862855577293, + "loss": 1.6484, + "step": 5040 + }, + { + "epoch": 0.45, + "learning_rate": 0.0012140048296993425, + "loss": 1.4961, + "step": 5041 + }, + { + "epoch": 0.45, + "learning_rate": 0.0012137233560732521, + "loss": 1.6699, + "step": 5042 + }, + { + "epoch": 0.45, + "learning_rate": 0.001213441864702828, + "loss": 1.6797, + "step": 5043 + }, + { + "epoch": 0.45, + "learning_rate": 0.0012131603556114405, + "loss": 1.6094, + "step": 5044 + }, + { + "epoch": 0.45, + "learning_rate": 0.0012128788288224624, + "loss": 1.5684, + "step": 5045 + }, + { + "epoch": 0.45, + "learning_rate": 0.0012125972843592668, + "loss": 1.5605, + "step": 5046 + }, + { + "epoch": 0.45, + "learning_rate": 0.0012123157222452287, + "loss": 1.6152, + "step": 5047 + }, + { + "epoch": 0.45, + "learning_rate": 0.0012120341425037255, + "loss": 1.6777, + "step": 5048 + }, + { + "epoch": 0.45, + "learning_rate": 0.0012117525451581347, + "loss": 1.623, + "step": 5049 + }, + { + "epoch": 0.45, + "learning_rate": 0.001211470930231836, + "loss": 1.6367, + "step": 5050 + }, + { + "epoch": 0.45, + "learning_rate": 0.001211189297748211, + "loss": 1.6465, + "step": 5051 + }, + { + "epoch": 0.45, + "learning_rate": 0.0012109076477306408, + "loss": 1.5859, + "step": 5052 + }, + { + "epoch": 0.45, + "learning_rate": 0.0012106259802025106, + "loss": 1.6816, + "step": 5053 + }, + { + "epoch": 0.45, + "learning_rate": 0.0012103442951872053, + "loss": 1.6074, + "step": 5054 + }, + { + "epoch": 0.45, + "learning_rate": 0.0012100625927081118, + "loss": 1.5762, + "step": 5055 + }, + { + "epoch": 0.45, + "learning_rate": 0.0012097808727886182, + "loss": 1.5996, + "step": 5056 + }, + { + "epoch": 0.45, + "learning_rate": 0.0012094991354521145, + "loss": 1.6914, + "step": 5057 + }, + { + "epoch": 0.45, + "learning_rate": 0.001209217380721992, + "loss": 1.584, + "step": 5058 + }, + { + "epoch": 0.45, + "learning_rate": 0.0012089356086216428, + "loss": 1.6113, + "step": 5059 + }, + { + "epoch": 0.45, + "learning_rate": 0.0012086538191744614, + "loss": 1.6953, + "step": 5060 + }, + { + "epoch": 0.45, + "learning_rate": 0.001208372012403843, + "loss": 1.6465, + "step": 5061 + }, + { + "epoch": 0.45, + "learning_rate": 0.001208090188333185, + "loss": 1.5098, + "step": 5062 + }, + { + "epoch": 0.45, + "learning_rate": 0.0012078083469858854, + "loss": 1.5781, + "step": 5063 + }, + { + "epoch": 0.45, + "learning_rate": 0.001207526488385344, + "loss": 1.6094, + "step": 5064 + }, + { + "epoch": 0.45, + "learning_rate": 0.0012072446125549624, + "loss": 1.7168, + "step": 5065 + }, + { + "epoch": 0.45, + "learning_rate": 0.001206962719518143, + "loss": 1.5762, + "step": 5066 + }, + { + "epoch": 0.45, + "learning_rate": 0.0012066808092982897, + "loss": 1.6836, + "step": 5067 + }, + { + "epoch": 0.45, + "learning_rate": 0.0012063988819188086, + "loss": 1.6758, + "step": 5068 + }, + { + "epoch": 0.45, + "learning_rate": 0.0012061169374031062, + "loss": 1.627, + "step": 5069 + }, + { + "epoch": 0.45, + "learning_rate": 0.0012058349757745907, + "loss": 1.5273, + "step": 5070 + }, + { + "epoch": 0.45, + "learning_rate": 0.0012055529970566728, + "loss": 1.6074, + "step": 5071 + }, + { + "epoch": 0.45, + "learning_rate": 0.0012052710012727627, + "loss": 1.4941, + "step": 5072 + }, + { + "epoch": 0.45, + "learning_rate": 0.0012049889884462736, + "loss": 1.7266, + "step": 5073 + }, + { + "epoch": 0.45, + "learning_rate": 0.0012047069586006195, + "loss": 1.5039, + "step": 5074 + }, + { + "epoch": 0.45, + "learning_rate": 0.0012044249117592156, + "loss": 1.6562, + "step": 5075 + }, + { + "epoch": 0.45, + "learning_rate": 0.0012041428479454793, + "loss": 1.6328, + "step": 5076 + }, + { + "epoch": 0.45, + "learning_rate": 0.001203860767182828, + "loss": 1.459, + "step": 5077 + }, + { + "epoch": 0.45, + "learning_rate": 0.0012035786694946826, + "loss": 1.7305, + "step": 5078 + }, + { + "epoch": 0.45, + "learning_rate": 0.0012032965549044633, + "loss": 1.793, + "step": 5079 + }, + { + "epoch": 0.45, + "learning_rate": 0.0012030144234355929, + "loss": 1.5625, + "step": 5080 + }, + { + "epoch": 0.45, + "learning_rate": 0.0012027322751114948, + "loss": 1.5957, + "step": 5081 + }, + { + "epoch": 0.45, + "learning_rate": 0.0012024501099555953, + "loss": 1.6094, + "step": 5082 + }, + { + "epoch": 0.45, + "learning_rate": 0.0012021679279913202, + "loss": 1.4863, + "step": 5083 + }, + { + "epoch": 0.45, + "learning_rate": 0.0012018857292420983, + "loss": 1.4688, + "step": 5084 + }, + { + "epoch": 0.45, + "learning_rate": 0.0012016035137313585, + "loss": 1.6016, + "step": 5085 + }, + { + "epoch": 0.45, + "learning_rate": 0.0012013212814825321, + "loss": 1.5664, + "step": 5086 + }, + { + "epoch": 0.45, + "learning_rate": 0.001201039032519051, + "loss": 1.5254, + "step": 5087 + }, + { + "epoch": 0.45, + "learning_rate": 0.0012007567668643492, + "loss": 1.6504, + "step": 5088 + }, + { + "epoch": 0.45, + "learning_rate": 0.0012004744845418613, + "loss": 1.5879, + "step": 5089 + }, + { + "epoch": 0.45, + "learning_rate": 0.0012001921855750244, + "loss": 1.8027, + "step": 5090 + }, + { + "epoch": 0.45, + "learning_rate": 0.0011999098699872755, + "loss": 1.7461, + "step": 5091 + }, + { + "epoch": 0.45, + "learning_rate": 0.0011996275378020544, + "loss": 1.582, + "step": 5092 + }, + { + "epoch": 0.45, + "learning_rate": 0.0011993451890428015, + "loss": 1.5977, + "step": 5093 + }, + { + "epoch": 0.45, + "learning_rate": 0.0011990628237329589, + "loss": 1.7227, + "step": 5094 + }, + { + "epoch": 0.45, + "learning_rate": 0.0011987804418959695, + "loss": 1.5684, + "step": 5095 + }, + { + "epoch": 0.45, + "learning_rate": 0.0011984980435552782, + "loss": 1.5938, + "step": 5096 + }, + { + "epoch": 0.45, + "learning_rate": 0.0011982156287343312, + "loss": 1.6562, + "step": 5097 + }, + { + "epoch": 0.45, + "learning_rate": 0.0011979331974565756, + "loss": 1.6445, + "step": 5098 + }, + { + "epoch": 0.45, + "learning_rate": 0.0011976507497454606, + "loss": 1.5918, + "step": 5099 + }, + { + "epoch": 0.45, + "learning_rate": 0.001197368285624436, + "loss": 1.4141, + "step": 5100 + }, + { + "epoch": 0.45, + "learning_rate": 0.0011970858051169534, + "loss": 1.5293, + "step": 5101 + }, + { + "epoch": 0.45, + "learning_rate": 0.0011968033082464663, + "loss": 1.6836, + "step": 5102 + }, + { + "epoch": 0.45, + "learning_rate": 0.001196520795036428, + "loss": 1.6133, + "step": 5103 + }, + { + "epoch": 0.45, + "learning_rate": 0.0011962382655102946, + "loss": 1.6094, + "step": 5104 + }, + { + "epoch": 0.45, + "learning_rate": 0.001195955719691523, + "loss": 1.5977, + "step": 5105 + }, + { + "epoch": 0.45, + "learning_rate": 0.0011956731576035712, + "loss": 1.5859, + "step": 5106 + }, + { + "epoch": 0.45, + "learning_rate": 0.0011953905792698994, + "loss": 1.7363, + "step": 5107 + }, + { + "epoch": 0.45, + "learning_rate": 0.0011951079847139683, + "loss": 1.5664, + "step": 5108 + }, + { + "epoch": 0.45, + "learning_rate": 0.0011948253739592401, + "loss": 1.6348, + "step": 5109 + }, + { + "epoch": 0.45, + "learning_rate": 0.0011945427470291788, + "loss": 1.6094, + "step": 5110 + }, + { + "epoch": 0.45, + "learning_rate": 0.001194260103947249, + "loss": 1.5762, + "step": 5111 + }, + { + "epoch": 0.45, + "learning_rate": 0.0011939774447369178, + "loss": 1.6973, + "step": 5112 + }, + { + "epoch": 0.45, + "learning_rate": 0.001193694769421652, + "loss": 1.6465, + "step": 5113 + }, + { + "epoch": 0.45, + "learning_rate": 0.0011934120780249216, + "loss": 1.5684, + "step": 5114 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011931293705701963, + "loss": 1.5391, + "step": 5115 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011928466470809483, + "loss": 1.5938, + "step": 5116 + }, + { + "epoch": 0.46, + "learning_rate": 0.00119256390758065, + "loss": 1.6426, + "step": 5117 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011922811520927766, + "loss": 1.6758, + "step": 5118 + }, + { + "epoch": 0.46, + "learning_rate": 0.001191998380640803, + "loss": 1.6602, + "step": 5119 + }, + { + "epoch": 0.46, + "learning_rate": 0.001191715593248207, + "loss": 1.6543, + "step": 5120 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011914327899384665, + "loss": 1.666, + "step": 5121 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011911499707350612, + "loss": 1.5605, + "step": 5122 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011908671356614723, + "loss": 1.6152, + "step": 5123 + }, + { + "epoch": 0.46, + "learning_rate": 0.001190584284741182, + "loss": 1.4746, + "step": 5124 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011903014179976742, + "loss": 1.7188, + "step": 5125 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011900185354544331, + "loss": 1.5918, + "step": 5126 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011897356371349456, + "loss": 1.5332, + "step": 5127 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011894527230626995, + "loss": 1.6484, + "step": 5128 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011891697932611828, + "loss": 1.707, + "step": 5129 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011888868477538865, + "loss": 1.6133, + "step": 5130 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011886038865643015, + "loss": 1.6113, + "step": 5131 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011883209097159212, + "loss": 1.707, + "step": 5132 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011880379172322388, + "loss": 1.5469, + "step": 5133 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011877549091367508, + "loss": 1.6836, + "step": 5134 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011874718854529532, + "loss": 1.5938, + "step": 5135 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011871888462043438, + "loss": 1.5586, + "step": 5136 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011869057914144223, + "loss": 1.5938, + "step": 5137 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011866227211066896, + "loss": 1.6152, + "step": 5138 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011863396353046466, + "loss": 1.6777, + "step": 5139 + }, + { + "epoch": 0.46, + "learning_rate": 0.001186056534031797, + "loss": 1.5039, + "step": 5140 + }, + { + "epoch": 0.46, + "learning_rate": 0.001185773417311645, + "loss": 1.6836, + "step": 5141 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011854902851676967, + "loss": 1.6152, + "step": 5142 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011852071376234586, + "loss": 1.6797, + "step": 5143 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011849239747024395, + "loss": 1.6172, + "step": 5144 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011846407964281485, + "loss": 1.623, + "step": 5145 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011843576028240962, + "loss": 1.752, + "step": 5146 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011840743939137957, + "loss": 1.6426, + "step": 5147 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011837911697207594, + "loss": 1.5781, + "step": 5148 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011835079302685019, + "loss": 1.6016, + "step": 5149 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011832246755805395, + "loss": 1.6426, + "step": 5150 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011829414056803893, + "loss": 1.6875, + "step": 5151 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011826581205915698, + "loss": 1.5938, + "step": 5152 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011823748203376003, + "loss": 1.5918, + "step": 5153 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011820915049420022, + "loss": 1.6621, + "step": 5154 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011818081744282974, + "loss": 1.6367, + "step": 5155 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011815248288200094, + "loss": 1.6055, + "step": 5156 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011812414681406631, + "loss": 1.5371, + "step": 5157 + }, + { + "epoch": 0.46, + "learning_rate": 0.001180958092413784, + "loss": 1.6992, + "step": 5158 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011806747016629001, + "loss": 1.7402, + "step": 5159 + }, + { + "epoch": 0.46, + "learning_rate": 0.001180391295911539, + "loss": 1.6152, + "step": 5160 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011801078751832311, + "loss": 1.6191, + "step": 5161 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011798244395015067, + "loss": 1.502, + "step": 5162 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011795409888898985, + "loss": 1.6523, + "step": 5163 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011792575233719397, + "loss": 1.6914, + "step": 5164 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011789740429711648, + "loss": 1.5293, + "step": 5165 + }, + { + "epoch": 0.46, + "learning_rate": 0.00117869054771111, + "loss": 1.498, + "step": 5166 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011784070376153126, + "loss": 1.7344, + "step": 5167 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011781235127073103, + "loss": 1.6523, + "step": 5168 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011778399730106434, + "loss": 1.5312, + "step": 5169 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011775564185488516, + "loss": 1.627, + "step": 5170 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011772728493454785, + "loss": 1.5957, + "step": 5171 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011769892654240664, + "loss": 1.5801, + "step": 5172 + }, + { + "epoch": 0.46, + "learning_rate": 0.00117670566680816, + "loss": 1.5195, + "step": 5173 + }, + { + "epoch": 0.46, + "learning_rate": 0.001176422053521305, + "loss": 1.6094, + "step": 5174 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011761384255870482, + "loss": 1.6172, + "step": 5175 + }, + { + "epoch": 0.46, + "learning_rate": 0.001175854783028938, + "loss": 1.5703, + "step": 5176 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011755711258705235, + "loss": 1.6836, + "step": 5177 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011752874541353555, + "loss": 1.5742, + "step": 5178 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011750037678469858, + "loss": 1.6562, + "step": 5179 + }, + { + "epoch": 0.46, + "learning_rate": 0.001174720067028967, + "loss": 1.6973, + "step": 5180 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011744363517048539, + "loss": 1.5605, + "step": 5181 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011741526218982013, + "loss": 1.7305, + "step": 5182 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011738688776325662, + "loss": 1.6406, + "step": 5183 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011735851189315068, + "loss": 1.6875, + "step": 5184 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011733013458185809, + "loss": 1.543, + "step": 5185 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011730175583173498, + "loss": 1.6113, + "step": 5186 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011727337564513744, + "loss": 1.4902, + "step": 5187 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011724499402442172, + "loss": 1.6035, + "step": 5188 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011721661097194422, + "loss": 1.6641, + "step": 5189 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011718822649006144, + "loss": 1.5762, + "step": 5190 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011715984058113, + "loss": 1.625, + "step": 5191 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011713145324750661, + "loss": 1.627, + "step": 5192 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011710306449154812, + "loss": 1.6309, + "step": 5193 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011707467431561153, + "loss": 1.5859, + "step": 5194 + }, + { + "epoch": 0.46, + "learning_rate": 0.001170462827220539, + "loss": 1.5996, + "step": 5195 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011701788971323244, + "loss": 1.5527, + "step": 5196 + }, + { + "epoch": 0.46, + "learning_rate": 0.001169894952915045, + "loss": 1.6719, + "step": 5197 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011696109945922748, + "loss": 1.6992, + "step": 5198 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011693270221875898, + "loss": 1.6426, + "step": 5199 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011690430357245664, + "loss": 1.623, + "step": 5200 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011687590352267828, + "loss": 1.5371, + "step": 5201 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011684750207178176, + "loss": 1.5293, + "step": 5202 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011681909922212515, + "loss": 1.5273, + "step": 5203 + }, + { + "epoch": 0.46, + "learning_rate": 0.001167906949760666, + "loss": 1.709, + "step": 5204 + }, + { + "epoch": 0.46, + "learning_rate": 0.001167622893359643, + "loss": 1.6543, + "step": 5205 + }, + { + "epoch": 0.46, + "learning_rate": 0.001167338823041767, + "loss": 1.6914, + "step": 5206 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011670547388306226, + "loss": 1.6406, + "step": 5207 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011667706407497956, + "loss": 1.5605, + "step": 5208 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011664865288228733, + "loss": 1.6426, + "step": 5209 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011662024030734438, + "loss": 1.6094, + "step": 5210 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011659182635250973, + "loss": 1.6113, + "step": 5211 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011656341102014235, + "loss": 1.5625, + "step": 5212 + }, + { + "epoch": 0.46, + "learning_rate": 0.001165349943126015, + "loss": 1.6328, + "step": 5213 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011650657623224641, + "loss": 1.543, + "step": 5214 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011647815678143651, + "loss": 1.5586, + "step": 5215 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011644973596253133, + "loss": 1.5723, + "step": 5216 + }, + { + "epoch": 0.46, + "learning_rate": 0.001164213137778905, + "loss": 1.7363, + "step": 5217 + }, + { + "epoch": 0.46, + "learning_rate": 0.001163928902298737, + "loss": 1.5996, + "step": 5218 + }, + { + "epoch": 0.46, + "learning_rate": 0.001163644653208409, + "loss": 1.5859, + "step": 5219 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011633603905315197, + "loss": 1.707, + "step": 5220 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011630761142916708, + "loss": 1.5957, + "step": 5221 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011627918245124636, + "loss": 1.6348, + "step": 5222 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011625075212175016, + "loss": 1.6152, + "step": 5223 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011622232044303886, + "loss": 1.5488, + "step": 5224 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011619388741747306, + "loss": 1.6426, + "step": 5225 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011616545304741336, + "loss": 1.7227, + "step": 5226 + }, + { + "epoch": 0.46, + "learning_rate": 0.0011613701733522053, + "loss": 1.6504, + "step": 5227 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011610858028325542, + "loss": 1.6641, + "step": 5228 + }, + { + "epoch": 0.47, + "learning_rate": 0.00116080141893879, + "loss": 1.6953, + "step": 5229 + }, + { + "epoch": 0.47, + "learning_rate": 0.001160517021694524, + "loss": 1.5859, + "step": 5230 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011602326111233682, + "loss": 1.5273, + "step": 5231 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011599481872489354, + "loss": 1.6992, + "step": 5232 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011596637500948404, + "loss": 1.6113, + "step": 5233 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011593792996846977, + "loss": 1.5664, + "step": 5234 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011590948360421243, + "loss": 1.6133, + "step": 5235 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011588103591907376, + "loss": 1.5723, + "step": 5236 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011585258691541563, + "loss": 1.4238, + "step": 5237 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011582413659559998, + "loss": 1.6152, + "step": 5238 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011579568496198895, + "loss": 1.5723, + "step": 5239 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011576723201694467, + "loss": 1.5723, + "step": 5240 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011573877776282946, + "loss": 1.5625, + "step": 5241 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011571032220200578, + "loss": 1.6914, + "step": 5242 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011568186533683607, + "loss": 1.6367, + "step": 5243 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011565340716968298, + "loss": 1.5898, + "step": 5244 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011562494770290927, + "loss": 1.6621, + "step": 5245 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011559648693887778, + "loss": 1.6582, + "step": 5246 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011556802487995136, + "loss": 1.7109, + "step": 5247 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011553956152849324, + "loss": 1.5391, + "step": 5248 + }, + { + "epoch": 0.47, + "learning_rate": 0.001155110968868664, + "loss": 1.5762, + "step": 5249 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011548263095743428, + "loss": 1.625, + "step": 5250 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011545416374256013, + "loss": 1.627, + "step": 5251 + }, + { + "epoch": 0.47, + "learning_rate": 0.001154256952446075, + "loss": 1.5664, + "step": 5252 + }, + { + "epoch": 0.47, + "learning_rate": 0.001153972254659399, + "loss": 1.5996, + "step": 5253 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011536875440892116, + "loss": 1.6348, + "step": 5254 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011534028207591499, + "loss": 1.7148, + "step": 5255 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011531180846928533, + "loss": 1.6934, + "step": 5256 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011528333359139616, + "loss": 1.6465, + "step": 5257 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011525485744461163, + "loss": 1.6348, + "step": 5258 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011522638003129595, + "loss": 1.6582, + "step": 5259 + }, + { + "epoch": 0.47, + "learning_rate": 0.001151979013538135, + "loss": 1.6992, + "step": 5260 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011516942141452863, + "loss": 1.541, + "step": 5261 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011514094021580598, + "loss": 1.5703, + "step": 5262 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011511245776001013, + "loss": 1.6074, + "step": 5263 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011508397404950584, + "loss": 1.6309, + "step": 5264 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011505548908665798, + "loss": 1.707, + "step": 5265 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011502700287383148, + "loss": 1.4805, + "step": 5266 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011499851541339138, + "loss": 1.5996, + "step": 5267 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011497002670770294, + "loss": 1.4512, + "step": 5268 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011494153675913133, + "loss": 1.6133, + "step": 5269 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011491304557004198, + "loss": 1.627, + "step": 5270 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011488455314280033, + "loss": 1.6016, + "step": 5271 + }, + { + "epoch": 0.47, + "learning_rate": 0.00114856059479772, + "loss": 1.5312, + "step": 5272 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011482756458332261, + "loss": 1.6172, + "step": 5273 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011479906845581803, + "loss": 1.5273, + "step": 5274 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011477057109962403, + "loss": 1.582, + "step": 5275 + }, + { + "epoch": 0.47, + "learning_rate": 0.001147420725171067, + "loss": 1.5312, + "step": 5276 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011471357271063205, + "loss": 1.584, + "step": 5277 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011468507168256633, + "loss": 1.6641, + "step": 5278 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011465656943527578, + "loss": 1.666, + "step": 5279 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011462806597112684, + "loss": 1.6484, + "step": 5280 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011459956129248599, + "loss": 1.6543, + "step": 5281 + }, + { + "epoch": 0.47, + "learning_rate": 0.001145710554017198, + "loss": 1.627, + "step": 5282 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011454254830119501, + "loss": 1.6719, + "step": 5283 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011451403999327837, + "loss": 1.5898, + "step": 5284 + }, + { + "epoch": 0.47, + "learning_rate": 0.001144855304803368, + "loss": 1.6055, + "step": 5285 + }, + { + "epoch": 0.47, + "learning_rate": 0.001144570197647373, + "loss": 1.5039, + "step": 5286 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011442850784884695, + "loss": 1.5938, + "step": 5287 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011439999473503296, + "loss": 1.5996, + "step": 5288 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011437148042566258, + "loss": 1.6328, + "step": 5289 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011434296492310327, + "loss": 1.6152, + "step": 5290 + }, + { + "epoch": 0.47, + "learning_rate": 0.001143144482297225, + "loss": 1.6152, + "step": 5291 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011428593034788785, + "loss": 1.6113, + "step": 5292 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011425741127996701, + "loss": 1.6016, + "step": 5293 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011422889102832778, + "loss": 1.6289, + "step": 5294 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011420036959533804, + "loss": 1.5957, + "step": 5295 + }, + { + "epoch": 0.47, + "learning_rate": 0.001141718469833658, + "loss": 1.498, + "step": 5296 + }, + { + "epoch": 0.47, + "learning_rate": 0.001141433231947791, + "loss": 1.4707, + "step": 5297 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011411479823194618, + "loss": 1.5391, + "step": 5298 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011408627209723524, + "loss": 1.6621, + "step": 5299 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011405774479301472, + "loss": 1.5234, + "step": 5300 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011402921632165306, + "loss": 1.6191, + "step": 5301 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011400068668551886, + "loss": 1.5469, + "step": 5302 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011397215588698076, + "loss": 1.5176, + "step": 5303 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011394362392840754, + "loss": 1.4805, + "step": 5304 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011391509081216805, + "loss": 1.6465, + "step": 5305 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011388655654063125, + "loss": 1.5879, + "step": 5306 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011385802111616617, + "loss": 1.7031, + "step": 5307 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011382948454114199, + "loss": 1.5293, + "step": 5308 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011380094681792791, + "loss": 1.7344, + "step": 5309 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011377240794889333, + "loss": 1.7246, + "step": 5310 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011374386793640764, + "loss": 1.7246, + "step": 5311 + }, + { + "epoch": 0.47, + "learning_rate": 0.001137153267828404, + "loss": 1.5703, + "step": 5312 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011368678449056118, + "loss": 1.7363, + "step": 5313 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011365824106193973, + "loss": 1.5645, + "step": 5314 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011362969649934586, + "loss": 1.5586, + "step": 5315 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011360115080514951, + "loss": 1.5195, + "step": 5316 + }, + { + "epoch": 0.47, + "learning_rate": 0.001135726039817206, + "loss": 1.5625, + "step": 5317 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011354405603142932, + "loss": 1.6855, + "step": 5318 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011351550695664577, + "loss": 1.6191, + "step": 5319 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011348695675974028, + "loss": 1.5508, + "step": 5320 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011345840544308321, + "loss": 1.5332, + "step": 5321 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011342985300904502, + "loss": 1.6914, + "step": 5322 + }, + { + "epoch": 0.47, + "learning_rate": 0.001134012994599963, + "loss": 1.5957, + "step": 5323 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011337274479830766, + "loss": 1.6621, + "step": 5324 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011334418902634988, + "loss": 1.6738, + "step": 5325 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011331563214649375, + "loss": 1.6055, + "step": 5326 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011328707416111026, + "loss": 1.6191, + "step": 5327 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011325851507257038, + "loss": 1.5859, + "step": 5328 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011322995488324526, + "loss": 1.5566, + "step": 5329 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011320139359550607, + "loss": 1.6602, + "step": 5330 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011317283121172414, + "loss": 1.623, + "step": 5331 + }, + { + "epoch": 0.47, + "learning_rate": 0.001131442677342708, + "loss": 1.6172, + "step": 5332 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011311570316551761, + "loss": 1.6797, + "step": 5333 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011308713750783604, + "loss": 1.5918, + "step": 5334 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011305857076359782, + "loss": 1.666, + "step": 5335 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011303000293517466, + "loss": 1.6777, + "step": 5336 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011300143402493844, + "loss": 1.5996, + "step": 5337 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011297286403526101, + "loss": 1.6602, + "step": 5338 + }, + { + "epoch": 0.47, + "learning_rate": 0.0011294429296851449, + "loss": 1.6562, + "step": 5339 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011291572082707088, + "loss": 1.6074, + "step": 5340 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011288714761330248, + "loss": 1.5723, + "step": 5341 + }, + { + "epoch": 0.48, + "learning_rate": 0.001128585733295815, + "loss": 1.5762, + "step": 5342 + }, + { + "epoch": 0.48, + "learning_rate": 0.001128299979782803, + "loss": 1.6016, + "step": 5343 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011280142156177145, + "loss": 1.6914, + "step": 5344 + }, + { + "epoch": 0.48, + "learning_rate": 0.001127728440824274, + "loss": 1.5918, + "step": 5345 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011274426554262079, + "loss": 1.6094, + "step": 5346 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011271568594472439, + "loss": 1.5273, + "step": 5347 + }, + { + "epoch": 0.48, + "learning_rate": 0.00112687105291111, + "loss": 1.5859, + "step": 5348 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011265852358415351, + "loss": 1.5879, + "step": 5349 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011262994082622494, + "loss": 1.6719, + "step": 5350 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011260135701969833, + "loss": 1.6914, + "step": 5351 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011257277216694685, + "loss": 1.6289, + "step": 5352 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011254418627034382, + "loss": 1.5996, + "step": 5353 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011251559933226248, + "loss": 1.5469, + "step": 5354 + }, + { + "epoch": 0.48, + "learning_rate": 0.001124870113550763, + "loss": 1.541, + "step": 5355 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011245842234115877, + "loss": 1.7344, + "step": 5356 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011242983229288353, + "loss": 1.6309, + "step": 5357 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011240124121262422, + "loss": 1.5547, + "step": 5358 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011237264910275463, + "loss": 1.5938, + "step": 5359 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011234405596564862, + "loss": 1.6738, + "step": 5360 + }, + { + "epoch": 0.48, + "learning_rate": 0.001123154618036801, + "loss": 1.6289, + "step": 5361 + }, + { + "epoch": 0.48, + "learning_rate": 0.001122868666192231, + "loss": 1.6367, + "step": 5362 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011225827041465176, + "loss": 1.5332, + "step": 5363 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011222967319234024, + "loss": 1.7266, + "step": 5364 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011220107495466282, + "loss": 1.7168, + "step": 5365 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011217247570399387, + "loss": 1.7109, + "step": 5366 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011214387544270785, + "loss": 1.5195, + "step": 5367 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011211527417317928, + "loss": 1.6973, + "step": 5368 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011208667189778277, + "loss": 1.5586, + "step": 5369 + }, + { + "epoch": 0.48, + "learning_rate": 0.00112058068618893, + "loss": 1.6934, + "step": 5370 + }, + { + "epoch": 0.48, + "learning_rate": 0.001120294643388848, + "loss": 1.5645, + "step": 5371 + }, + { + "epoch": 0.48, + "learning_rate": 0.00112000859060133, + "loss": 1.6016, + "step": 5372 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011197225278501254, + "loss": 1.6875, + "step": 5373 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011194364551589846, + "loss": 1.6992, + "step": 5374 + }, + { + "epoch": 0.48, + "learning_rate": 0.001119150372551659, + "loss": 1.5352, + "step": 5375 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011188642800518999, + "loss": 1.625, + "step": 5376 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011185781776834607, + "loss": 1.5703, + "step": 5377 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011182920654700945, + "loss": 1.6289, + "step": 5378 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011180059434355563, + "loss": 1.5312, + "step": 5379 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011177198116036004, + "loss": 1.6641, + "step": 5380 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011174336699979839, + "loss": 1.6973, + "step": 5381 + }, + { + "epoch": 0.48, + "learning_rate": 0.001117147518642463, + "loss": 1.6055, + "step": 5382 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011168613575607955, + "loss": 1.6562, + "step": 5383 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011165751867767398, + "loss": 1.7246, + "step": 5384 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011162890063140553, + "loss": 1.6758, + "step": 5385 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011160028161965017, + "loss": 1.625, + "step": 5386 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011157166164478406, + "loss": 1.6094, + "step": 5387 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011154304070918326, + "loss": 1.6309, + "step": 5388 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011151441881522412, + "loss": 1.7559, + "step": 5389 + }, + { + "epoch": 0.48, + "learning_rate": 0.001114857959652829, + "loss": 1.5918, + "step": 5390 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011145717216173604, + "loss": 1.625, + "step": 5391 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011142854740696002, + "loss": 1.7793, + "step": 5392 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011139992170333141, + "loss": 1.5703, + "step": 5393 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011137129505322683, + "loss": 1.7246, + "step": 5394 + }, + { + "epoch": 0.48, + "learning_rate": 0.00111342667459023, + "loss": 1.5938, + "step": 5395 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011131403892309674, + "loss": 1.7109, + "step": 5396 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011128540944782496, + "loss": 1.5918, + "step": 5397 + }, + { + "epoch": 0.48, + "learning_rate": 0.001112567790355845, + "loss": 1.6445, + "step": 5398 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011122814768875252, + "loss": 1.5996, + "step": 5399 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011119951540970608, + "loss": 1.4609, + "step": 5400 + }, + { + "epoch": 0.48, + "learning_rate": 0.001111708822008224, + "loss": 1.6621, + "step": 5401 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011114224806447863, + "loss": 1.6699, + "step": 5402 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011111361300305228, + "loss": 1.6465, + "step": 5403 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011108497701892066, + "loss": 1.6816, + "step": 5404 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011105634011446131, + "loss": 1.5312, + "step": 5405 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011102770229205175, + "loss": 1.6094, + "step": 5406 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011099906355406968, + "loss": 1.6738, + "step": 5407 + }, + { + "epoch": 0.48, + "learning_rate": 0.001109704239028928, + "loss": 1.582, + "step": 5408 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011094178334089895, + "loss": 1.7422, + "step": 5409 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011091314187046594, + "loss": 1.6445, + "step": 5410 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011088449949397177, + "loss": 1.6328, + "step": 5411 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011085585621379449, + "loss": 1.5703, + "step": 5412 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011082721203231212, + "loss": 1.584, + "step": 5413 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011079856695190291, + "loss": 1.6035, + "step": 5414 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011076992097494506, + "loss": 1.5938, + "step": 5415 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011074127410381695, + "loss": 1.3574, + "step": 5416 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011071262634089697, + "loss": 1.4961, + "step": 5417 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011068397768856356, + "loss": 1.6992, + "step": 5418 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011065532814919525, + "loss": 1.5391, + "step": 5419 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011062667772517075, + "loss": 1.625, + "step": 5420 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011059802641886867, + "loss": 1.6504, + "step": 5421 + }, + { + "epoch": 0.48, + "learning_rate": 0.001105693742326678, + "loss": 1.6328, + "step": 5422 + }, + { + "epoch": 0.48, + "learning_rate": 0.00110540721168947, + "loss": 1.5977, + "step": 5423 + }, + { + "epoch": 0.48, + "learning_rate": 0.001105120672300852, + "loss": 1.623, + "step": 5424 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011048341241846134, + "loss": 1.7266, + "step": 5425 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011045475673645449, + "loss": 1.6367, + "step": 5426 + }, + { + "epoch": 0.48, + "learning_rate": 0.001104261001864438, + "loss": 1.5859, + "step": 5427 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011039744277080848, + "loss": 1.7305, + "step": 5428 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011036878449192775, + "loss": 1.6816, + "step": 5429 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011034012535218105, + "loss": 1.584, + "step": 5430 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011031146535394772, + "loss": 1.4238, + "step": 5431 + }, + { + "epoch": 0.48, + "learning_rate": 0.001102828044996073, + "loss": 1.6133, + "step": 5432 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011025414279153929, + "loss": 1.6484, + "step": 5433 + }, + { + "epoch": 0.48, + "learning_rate": 0.001102254802321234, + "loss": 1.6094, + "step": 5434 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011019681682373928, + "loss": 1.6152, + "step": 5435 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011016815256876672, + "loss": 1.6582, + "step": 5436 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011013948746958554, + "loss": 1.6094, + "step": 5437 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011011082152857568, + "loss": 1.6152, + "step": 5438 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011008215474811712, + "loss": 1.5898, + "step": 5439 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011005348713058992, + "loss": 1.5547, + "step": 5440 + }, + { + "epoch": 0.48, + "learning_rate": 0.0011002481867837418, + "loss": 1.6289, + "step": 5441 + }, + { + "epoch": 0.48, + "learning_rate": 0.0010999614939385012, + "loss": 1.5625, + "step": 5442 + }, + { + "epoch": 0.48, + "learning_rate": 0.0010996747927939797, + "loss": 1.6973, + "step": 5443 + }, + { + "epoch": 0.48, + "learning_rate": 0.0010993880833739809, + "loss": 1.5996, + "step": 5444 + }, + { + "epoch": 0.48, + "learning_rate": 0.0010991013657023084, + "loss": 1.5664, + "step": 5445 + }, + { + "epoch": 0.48, + "learning_rate": 0.0010988146398027674, + "loss": 1.6074, + "step": 5446 + }, + { + "epoch": 0.48, + "learning_rate": 0.0010985279056991626, + "loss": 1.6406, + "step": 5447 + }, + { + "epoch": 0.48, + "learning_rate": 0.0010982411634153006, + "loss": 1.6465, + "step": 5448 + }, + { + "epoch": 0.48, + "learning_rate": 0.001097954412974988, + "loss": 1.6289, + "step": 5449 + }, + { + "epoch": 0.48, + "learning_rate": 0.0010976676544020318, + "loss": 1.6055, + "step": 5450 + }, + { + "epoch": 0.48, + "learning_rate": 0.0010973808877202404, + "loss": 1.6562, + "step": 5451 + }, + { + "epoch": 0.49, + "learning_rate": 0.001097094112953423, + "loss": 1.6152, + "step": 5452 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010968073301253877, + "loss": 1.6113, + "step": 5453 + }, + { + "epoch": 0.49, + "learning_rate": 0.001096520539259946, + "loss": 1.4766, + "step": 5454 + }, + { + "epoch": 0.49, + "learning_rate": 0.001096233740380908, + "loss": 1.6348, + "step": 5455 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010959469335120851, + "loss": 1.6387, + "step": 5456 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010956601186772892, + "loss": 1.6426, + "step": 5457 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010953732959003335, + "loss": 1.5703, + "step": 5458 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010950864652050313, + "loss": 1.6777, + "step": 5459 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010947996266151962, + "loss": 1.6934, + "step": 5460 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010945127801546433, + "loss": 1.7188, + "step": 5461 + }, + { + "epoch": 0.49, + "learning_rate": 0.001094225925847188, + "loss": 1.6543, + "step": 5462 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010939390637166465, + "loss": 1.502, + "step": 5463 + }, + { + "epoch": 0.49, + "learning_rate": 0.001093652193786835, + "loss": 1.5664, + "step": 5464 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010933653160815706, + "loss": 1.6816, + "step": 5465 + }, + { + "epoch": 0.49, + "learning_rate": 0.001093078430624672, + "loss": 1.6582, + "step": 5466 + }, + { + "epoch": 0.49, + "learning_rate": 0.001092791537439957, + "loss": 1.7266, + "step": 5467 + }, + { + "epoch": 0.49, + "learning_rate": 0.001092504636551246, + "loss": 1.6523, + "step": 5468 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010922177279823575, + "loss": 1.5215, + "step": 5469 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010919308117571132, + "loss": 1.4824, + "step": 5470 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010916438878993333, + "loss": 1.6738, + "step": 5471 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010913569564328404, + "loss": 1.5293, + "step": 5472 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010910700173814565, + "loss": 1.5723, + "step": 5473 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010907830707690047, + "loss": 1.7012, + "step": 5474 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010904961166193087, + "loss": 1.6523, + "step": 5475 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010902091549561926, + "loss": 1.6094, + "step": 5476 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010899221858034817, + "loss": 1.623, + "step": 5477 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010896352091850012, + "loss": 1.6504, + "step": 5478 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010893482251245774, + "loss": 1.6055, + "step": 5479 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010890612336460375, + "loss": 1.6523, + "step": 5480 + }, + { + "epoch": 0.49, + "learning_rate": 0.001088774234773208, + "loss": 1.6172, + "step": 5481 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010884872285299178, + "loss": 1.7168, + "step": 5482 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010882002149399954, + "loss": 1.7051, + "step": 5483 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010879131940272695, + "loss": 1.668, + "step": 5484 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010876261658155704, + "loss": 1.582, + "step": 5485 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010873391303287284, + "loss": 1.5742, + "step": 5486 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010870520875905744, + "loss": 1.7891, + "step": 5487 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010867650376249402, + "loss": 1.6816, + "step": 5488 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010864779804556585, + "loss": 1.5508, + "step": 5489 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010861909161065614, + "loss": 1.5859, + "step": 5490 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010859038446014825, + "loss": 1.5176, + "step": 5491 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010856167659642566, + "loss": 1.5723, + "step": 5492 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010853296802187174, + "loss": 1.7012, + "step": 5493 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010850425873887004, + "loss": 1.6309, + "step": 5494 + }, + { + "epoch": 0.49, + "learning_rate": 0.001084755487498042, + "loss": 1.6406, + "step": 5495 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010844683805705778, + "loss": 1.8203, + "step": 5496 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010841812666301453, + "loss": 1.6895, + "step": 5497 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010838941457005818, + "loss": 1.5547, + "step": 5498 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010836070178057257, + "loss": 1.7109, + "step": 5499 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010833198829694154, + "loss": 1.5684, + "step": 5500 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010830327412154905, + "loss": 1.6406, + "step": 5501 + }, + { + "epoch": 0.49, + "learning_rate": 0.001082745592567791, + "loss": 1.5664, + "step": 5502 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010824584370501568, + "loss": 1.5703, + "step": 5503 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010821712746864295, + "loss": 1.5625, + "step": 5504 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010818841055004501, + "loss": 1.4453, + "step": 5505 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010815969295160613, + "loss": 1.5762, + "step": 5506 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010813097467571055, + "loss": 1.5762, + "step": 5507 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010810225572474262, + "loss": 1.5977, + "step": 5508 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010807353610108672, + "loss": 1.7402, + "step": 5509 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010804481580712726, + "loss": 1.3809, + "step": 5510 + }, + { + "epoch": 0.49, + "learning_rate": 0.001080160948452488, + "loss": 1.6875, + "step": 5511 + }, + { + "epoch": 0.49, + "learning_rate": 0.001079873732178358, + "loss": 1.5586, + "step": 5512 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010795865092727297, + "loss": 1.6113, + "step": 5513 + }, + { + "epoch": 0.49, + "learning_rate": 0.001079299279759449, + "loss": 1.7539, + "step": 5514 + }, + { + "epoch": 0.49, + "learning_rate": 0.001079012043662363, + "loss": 1.7754, + "step": 5515 + }, + { + "epoch": 0.49, + "learning_rate": 0.00107872480100532, + "loss": 1.6094, + "step": 5516 + }, + { + "epoch": 0.49, + "learning_rate": 0.001078437551812168, + "loss": 1.5801, + "step": 5517 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010781502961067558, + "loss": 1.5859, + "step": 5518 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010778630339129324, + "loss": 1.6348, + "step": 5519 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010775757652545482, + "loss": 1.5566, + "step": 5520 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010772884901554534, + "loss": 1.5977, + "step": 5521 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010770012086394989, + "loss": 1.7383, + "step": 5522 + }, + { + "epoch": 0.49, + "learning_rate": 0.001076713920730536, + "loss": 1.6602, + "step": 5523 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010764266264524172, + "loss": 1.5684, + "step": 5524 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010761393258289947, + "loss": 1.6426, + "step": 5525 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010758520188841215, + "loss": 1.5684, + "step": 5526 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010755647056416514, + "loss": 1.6602, + "step": 5527 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010752773861254386, + "loss": 1.5391, + "step": 5528 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010749900603593374, + "loss": 1.5078, + "step": 5529 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010747027283672032, + "loss": 1.627, + "step": 5530 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010744153901728918, + "loss": 1.6543, + "step": 5531 + }, + { + "epoch": 0.49, + "learning_rate": 0.001074128045800259, + "loss": 1.5254, + "step": 5532 + }, + { + "epoch": 0.49, + "learning_rate": 0.001073840695273162, + "loss": 1.541, + "step": 5533 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010735533386154574, + "loss": 1.4512, + "step": 5534 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010732659758510034, + "loss": 1.6387, + "step": 5535 + }, + { + "epoch": 0.49, + "learning_rate": 0.001072978607003658, + "loss": 1.7871, + "step": 5536 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010726912320972804, + "loss": 1.707, + "step": 5537 + }, + { + "epoch": 0.49, + "learning_rate": 0.001072403851155729, + "loss": 1.6172, + "step": 5538 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010721164642028642, + "loss": 1.5703, + "step": 5539 + }, + { + "epoch": 0.49, + "learning_rate": 0.001071829071262546, + "loss": 1.7637, + "step": 5540 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010715416723586352, + "loss": 1.6953, + "step": 5541 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010712542675149932, + "loss": 1.6406, + "step": 5542 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010709668567554812, + "loss": 1.7109, + "step": 5543 + }, + { + "epoch": 0.49, + "learning_rate": 0.001070679440103962, + "loss": 1.6621, + "step": 5544 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010703920175842978, + "loss": 1.623, + "step": 5545 + }, + { + "epoch": 0.49, + "learning_rate": 0.001070104589220352, + "loss": 1.5918, + "step": 5546 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010698171550359886, + "loss": 1.6309, + "step": 5547 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010695297150550712, + "loss": 1.6191, + "step": 5548 + }, + { + "epoch": 0.49, + "learning_rate": 0.001069242269301465, + "loss": 1.6133, + "step": 5549 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010689548177990344, + "loss": 1.6504, + "step": 5550 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010686673605716458, + "loss": 1.6875, + "step": 5551 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010683798976431646, + "loss": 1.5684, + "step": 5552 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010680924290374578, + "loss": 1.502, + "step": 5553 + }, + { + "epoch": 0.49, + "learning_rate": 0.001067804954778392, + "loss": 1.6445, + "step": 5554 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010675174748898356, + "loss": 1.6465, + "step": 5555 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010672299893956555, + "loss": 1.5938, + "step": 5556 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010669424983197205, + "loss": 1.7344, + "step": 5557 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010666550016858994, + "loss": 1.4961, + "step": 5558 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010663674995180616, + "loss": 1.6758, + "step": 5559 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010660799918400772, + "loss": 1.5879, + "step": 5560 + }, + { + "epoch": 0.49, + "learning_rate": 0.001065792478675816, + "loss": 1.6387, + "step": 5561 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010655049600491492, + "loss": 1.5176, + "step": 5562 + }, + { + "epoch": 0.49, + "learning_rate": 0.0010652174359839475, + "loss": 1.668, + "step": 5563 + }, + { + "epoch": 0.49, + "learning_rate": 0.001064929906504083, + "loss": 1.4551, + "step": 5564 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010646423716334273, + "loss": 1.5293, + "step": 5565 + }, + { + "epoch": 0.5, + "learning_rate": 0.001064354831395853, + "loss": 1.6641, + "step": 5566 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010640672858152332, + "loss": 1.627, + "step": 5567 + }, + { + "epoch": 0.5, + "learning_rate": 0.001063779734915441, + "loss": 1.5508, + "step": 5568 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010634921787203509, + "loss": 1.5469, + "step": 5569 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010632046172538367, + "loss": 1.7051, + "step": 5570 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010629170505397733, + "loss": 1.541, + "step": 5571 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010626294786020357, + "loss": 1.6738, + "step": 5572 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010623419014644997, + "loss": 1.7637, + "step": 5573 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010620543191510413, + "loss": 1.5762, + "step": 5574 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010617667316855366, + "loss": 1.6211, + "step": 5575 + }, + { + "epoch": 0.5, + "learning_rate": 0.001061479139091863, + "loss": 1.5723, + "step": 5576 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010611915413938976, + "loss": 1.5469, + "step": 5577 + }, + { + "epoch": 0.5, + "learning_rate": 0.001060903938615518, + "loss": 1.7207, + "step": 5578 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010606163307806022, + "loss": 1.6367, + "step": 5579 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010603287179130297, + "loss": 1.5156, + "step": 5580 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010600411000366784, + "loss": 1.5898, + "step": 5581 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010597534771754284, + "loss": 1.6758, + "step": 5582 + }, + { + "epoch": 0.5, + "learning_rate": 0.001059465849353159, + "loss": 1.5762, + "step": 5583 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010591782165937512, + "loss": 1.6816, + "step": 5584 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010588905789210845, + "loss": 1.541, + "step": 5585 + }, + { + "epoch": 0.5, + "learning_rate": 0.001058602936359041, + "loss": 1.627, + "step": 5586 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010583152889315018, + "loss": 1.543, + "step": 5587 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010580276366623487, + "loss": 1.627, + "step": 5588 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010577399795754637, + "loss": 1.6094, + "step": 5589 + }, + { + "epoch": 0.5, + "learning_rate": 0.00105745231769473, + "loss": 1.4824, + "step": 5590 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010571646510440305, + "loss": 1.6113, + "step": 5591 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010568769796472485, + "loss": 1.5742, + "step": 5592 + }, + { + "epoch": 0.5, + "learning_rate": 0.001056589303528268, + "loss": 1.6523, + "step": 5593 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010563016227109732, + "loss": 1.4883, + "step": 5594 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010560139372192486, + "loss": 1.6758, + "step": 5595 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010557262470769794, + "loss": 1.7441, + "step": 5596 + }, + { + "epoch": 0.5, + "learning_rate": 0.001055438552308051, + "loss": 1.5664, + "step": 5597 + }, + { + "epoch": 0.5, + "learning_rate": 0.001055150852936349, + "loss": 1.543, + "step": 5598 + }, + { + "epoch": 0.5, + "learning_rate": 0.00105486314898576, + "loss": 1.623, + "step": 5599 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010545754404801702, + "loss": 1.5195, + "step": 5600 + }, + { + "epoch": 0.5, + "learning_rate": 0.001054287727443467, + "loss": 1.6445, + "step": 5601 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010540000098995367, + "loss": 1.6504, + "step": 5602 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010537122878722677, + "loss": 1.498, + "step": 5603 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010534245613855481, + "loss": 1.543, + "step": 5604 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010531368304632662, + "loss": 1.668, + "step": 5605 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010528490951293111, + "loss": 1.5977, + "step": 5606 + }, + { + "epoch": 0.5, + "learning_rate": 0.001052561355407571, + "loss": 1.5605, + "step": 5607 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010522736113219367, + "loss": 1.459, + "step": 5608 + }, + { + "epoch": 0.5, + "learning_rate": 0.001051985862896297, + "loss": 1.6016, + "step": 5609 + }, + { + "epoch": 0.5, + "learning_rate": 0.001051698110154543, + "loss": 1.5664, + "step": 5610 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010514103531205645, + "loss": 1.6289, + "step": 5611 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010511225918182535, + "loss": 1.6621, + "step": 5612 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010508348262715003, + "loss": 1.7344, + "step": 5613 + }, + { + "epoch": 0.5, + "learning_rate": 0.001050547056504197, + "loss": 1.5488, + "step": 5614 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010502592825402356, + "loss": 1.6816, + "step": 5615 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010499715044035086, + "loss": 1.6191, + "step": 5616 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010496837221179083, + "loss": 1.6465, + "step": 5617 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010493959357073283, + "loss": 1.6074, + "step": 5618 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010491081451956617, + "loss": 1.6406, + "step": 5619 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010488203506068023, + "loss": 1.6738, + "step": 5620 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010485325519646442, + "loss": 1.5879, + "step": 5621 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010482447492930818, + "loss": 1.666, + "step": 5622 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010479569426160098, + "loss": 1.5781, + "step": 5623 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010476691319573235, + "loss": 1.5176, + "step": 5624 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010473813173409179, + "loss": 1.5723, + "step": 5625 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010470934987906891, + "loss": 1.625, + "step": 5626 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010468056763305329, + "loss": 1.6348, + "step": 5627 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010465178499843462, + "loss": 1.6484, + "step": 5628 + }, + { + "epoch": 0.5, + "learning_rate": 0.001046230019776025, + "loss": 1.7344, + "step": 5629 + }, + { + "epoch": 0.5, + "learning_rate": 0.001045942185729467, + "loss": 1.7227, + "step": 5630 + }, + { + "epoch": 0.5, + "learning_rate": 0.001045654347868569, + "loss": 1.5195, + "step": 5631 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010453665062172292, + "loss": 1.5625, + "step": 5632 + }, + { + "epoch": 0.5, + "learning_rate": 0.001045078660799345, + "loss": 1.6328, + "step": 5633 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010447908116388155, + "loss": 1.7188, + "step": 5634 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010445029587595384, + "loss": 1.6465, + "step": 5635 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010442151021854134, + "loss": 1.5234, + "step": 5636 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010439272419403391, + "loss": 1.582, + "step": 5637 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010436393780482156, + "loss": 1.6719, + "step": 5638 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010433515105329424, + "loss": 1.7012, + "step": 5639 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010430636394184196, + "loss": 1.6074, + "step": 5640 + }, + { + "epoch": 0.5, + "learning_rate": 0.001042775764728548, + "loss": 1.5566, + "step": 5641 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010424878864872277, + "loss": 1.5527, + "step": 5642 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010422000047183603, + "loss": 1.582, + "step": 5643 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010419121194458472, + "loss": 1.623, + "step": 5644 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010416242306935893, + "loss": 1.6641, + "step": 5645 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010413363384854891, + "loss": 1.5879, + "step": 5646 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010410484428454486, + "loss": 1.6016, + "step": 5647 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010407605437973701, + "loss": 1.5605, + "step": 5648 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010404726413651567, + "loss": 1.6191, + "step": 5649 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010401847355727112, + "loss": 1.4219, + "step": 5650 + }, + { + "epoch": 0.5, + "learning_rate": 0.001039896826443937, + "loss": 1.623, + "step": 5651 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010396089140027379, + "loss": 1.6953, + "step": 5652 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010393209982730172, + "loss": 1.6094, + "step": 5653 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010390330792786795, + "loss": 1.7324, + "step": 5654 + }, + { + "epoch": 0.5, + "learning_rate": 0.001038745157043629, + "loss": 1.5215, + "step": 5655 + }, + { + "epoch": 0.5, + "learning_rate": 0.001038457231591771, + "loss": 1.5547, + "step": 5656 + }, + { + "epoch": 0.5, + "learning_rate": 0.001038169302947009, + "loss": 1.6016, + "step": 5657 + }, + { + "epoch": 0.5, + "learning_rate": 0.00103788137113325, + "loss": 1.5645, + "step": 5658 + }, + { + "epoch": 0.5, + "learning_rate": 0.001037593436174398, + "loss": 1.6426, + "step": 5659 + }, + { + "epoch": 0.5, + "learning_rate": 0.00103730549809436, + "loss": 1.5801, + "step": 5660 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010370175569170408, + "loss": 1.6738, + "step": 5661 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010367296126663472, + "loss": 1.4199, + "step": 5662 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010364416653661861, + "loss": 1.7695, + "step": 5663 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010361537150404634, + "loss": 1.6934, + "step": 5664 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010358657617130869, + "loss": 1.5352, + "step": 5665 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010355778054079632, + "loss": 1.6406, + "step": 5666 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010352898461490003, + "loss": 1.5039, + "step": 5667 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010350018839601059, + "loss": 1.4922, + "step": 5668 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010347139188651878, + "loss": 1.6387, + "step": 5669 + }, + { + "epoch": 0.5, + "learning_rate": 0.001034425950888154, + "loss": 1.5859, + "step": 5670 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010341379800529141, + "loss": 1.5762, + "step": 5671 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010338500063833754, + "loss": 1.6582, + "step": 5672 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010335620299034476, + "loss": 1.5469, + "step": 5673 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010332740506370397, + "loss": 1.5742, + "step": 5674 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010329860686080611, + "loss": 1.6152, + "step": 5675 + }, + { + "epoch": 0.5, + "learning_rate": 0.0010326980838404217, + "loss": 1.5645, + "step": 5676 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010324100963580312, + "loss": 1.5625, + "step": 5677 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010321221061847994, + "loss": 1.7324, + "step": 5678 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010318341133446372, + "loss": 1.5801, + "step": 5679 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010315461178614547, + "loss": 1.5879, + "step": 5680 + }, + { + "epoch": 0.51, + "learning_rate": 0.001031258119759163, + "loss": 1.5762, + "step": 5681 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010309701190616724, + "loss": 1.6328, + "step": 5682 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010306821157928952, + "loss": 1.5859, + "step": 5683 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010303941099767415, + "loss": 1.5918, + "step": 5684 + }, + { + "epoch": 0.51, + "learning_rate": 0.001030106101637124, + "loss": 1.4941, + "step": 5685 + }, + { + "epoch": 0.51, + "learning_rate": 0.001029818090797954, + "loss": 1.7637, + "step": 5686 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010295300774831437, + "loss": 1.6172, + "step": 5687 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010292420617166054, + "loss": 1.5488, + "step": 5688 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010289540435222512, + "loss": 1.6113, + "step": 5689 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010286660229239942, + "loss": 1.7227, + "step": 5690 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010283779999457469, + "loss": 1.6523, + "step": 5691 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010280899746114224, + "loss": 1.7129, + "step": 5692 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010278019469449344, + "loss": 1.5898, + "step": 5693 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010275139169701956, + "loss": 1.5527, + "step": 5694 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010272258847111202, + "loss": 1.6328, + "step": 5695 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010269378501916217, + "loss": 1.5996, + "step": 5696 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010266498134356141, + "loss": 1.666, + "step": 5697 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010263617744670117, + "loss": 1.6738, + "step": 5698 + }, + { + "epoch": 0.51, + "learning_rate": 0.001026073733309729, + "loss": 1.6367, + "step": 5699 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010257856899876805, + "loss": 1.623, + "step": 5700 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010254976445247805, + "loss": 1.5957, + "step": 5701 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010252095969449447, + "loss": 1.5684, + "step": 5702 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010249215472720875, + "loss": 1.5977, + "step": 5703 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010246334955301241, + "loss": 1.5996, + "step": 5704 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010243454417429704, + "loss": 1.7402, + "step": 5705 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010240573859345418, + "loss": 1.5234, + "step": 5706 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010237693281287543, + "loss": 1.5508, + "step": 5707 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010234812683495235, + "loss": 1.5918, + "step": 5708 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010231932066207656, + "loss": 1.6094, + "step": 5709 + }, + { + "epoch": 0.51, + "learning_rate": 0.001022905142966397, + "loss": 1.6191, + "step": 5710 + }, + { + "epoch": 0.51, + "learning_rate": 0.001022617077410334, + "loss": 1.5391, + "step": 5711 + }, + { + "epoch": 0.51, + "learning_rate": 0.001022329009976493, + "loss": 1.5898, + "step": 5712 + }, + { + "epoch": 0.51, + "learning_rate": 0.001022040940688791, + "loss": 1.5527, + "step": 5713 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010217528695711448, + "loss": 1.4336, + "step": 5714 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010214647966474717, + "loss": 1.5254, + "step": 5715 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010211767219416882, + "loss": 1.8105, + "step": 5716 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010208886454777127, + "loss": 1.5645, + "step": 5717 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010206005672794617, + "loss": 1.5469, + "step": 5718 + }, + { + "epoch": 0.51, + "learning_rate": 0.001020312487370853, + "loss": 1.6562, + "step": 5719 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010200244057758051, + "loss": 1.541, + "step": 5720 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010197363225182348, + "loss": 1.541, + "step": 5721 + }, + { + "epoch": 0.51, + "learning_rate": 0.001019448237622061, + "loss": 1.6699, + "step": 5722 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010191601511112015, + "loss": 1.543, + "step": 5723 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010188720630095749, + "loss": 1.6719, + "step": 5724 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010185839733410995, + "loss": 1.5391, + "step": 5725 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010182958821296937, + "loss": 1.5586, + "step": 5726 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010180077893992762, + "loss": 1.6973, + "step": 5727 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010177196951737663, + "loss": 1.7344, + "step": 5728 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010174315994770826, + "loss": 1.6445, + "step": 5729 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010171435023331441, + "loss": 1.4727, + "step": 5730 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010168554037658705, + "loss": 1.709, + "step": 5731 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010165673037991804, + "loss": 1.5625, + "step": 5732 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010162792024569936, + "loss": 1.5996, + "step": 5733 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010159910997632295, + "loss": 1.7148, + "step": 5734 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010157029957418083, + "loss": 1.5605, + "step": 5735 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010154148904166491, + "loss": 1.5781, + "step": 5736 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010151267838116721, + "loss": 1.5957, + "step": 5737 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010148386759507974, + "loss": 1.5957, + "step": 5738 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010145505668579448, + "loss": 1.5957, + "step": 5739 + }, + { + "epoch": 0.51, + "learning_rate": 0.001014262456557035, + "loss": 1.6055, + "step": 5740 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010139743450719874, + "loss": 1.5586, + "step": 5741 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010136862324267232, + "loss": 1.5996, + "step": 5742 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010133981186451627, + "loss": 1.5625, + "step": 5743 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010131100037512261, + "loss": 1.4961, + "step": 5744 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010128218877688347, + "loss": 1.5488, + "step": 5745 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010125337707219085, + "loss": 1.6055, + "step": 5746 + }, + { + "epoch": 0.51, + "learning_rate": 0.001012245652634369, + "loss": 1.6816, + "step": 5747 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010119575335301367, + "loss": 1.5664, + "step": 5748 + }, + { + "epoch": 0.51, + "learning_rate": 0.001011669413433133, + "loss": 1.6172, + "step": 5749 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010113812923672793, + "loss": 1.6816, + "step": 5750 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010110931703564956, + "loss": 1.543, + "step": 5751 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010108050474247044, + "loss": 1.5547, + "step": 5752 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010105169235958263, + "loss": 1.7051, + "step": 5753 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010102287988937831, + "loss": 1.6641, + "step": 5754 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010099406733424959, + "loss": 1.6367, + "step": 5755 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010096525469658869, + "loss": 1.4766, + "step": 5756 + }, + { + "epoch": 0.51, + "learning_rate": 0.001009364419787877, + "loss": 1.5996, + "step": 5757 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010090762918323886, + "loss": 1.5508, + "step": 5758 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010087881631233428, + "loss": 1.666, + "step": 5759 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010085000336846617, + "loss": 1.623, + "step": 5760 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010082119035402675, + "loss": 1.584, + "step": 5761 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010079237727140817, + "loss": 1.6875, + "step": 5762 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010076356412300263, + "loss": 1.7031, + "step": 5763 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010073475091120236, + "loss": 1.5742, + "step": 5764 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010070593763839954, + "loss": 1.6465, + "step": 5765 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010067712430698644, + "loss": 1.6074, + "step": 5766 + }, + { + "epoch": 0.51, + "learning_rate": 0.001006483109193552, + "loss": 1.5508, + "step": 5767 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010061949747789815, + "loss": 1.5703, + "step": 5768 + }, + { + "epoch": 0.51, + "learning_rate": 0.001005906839850074, + "loss": 1.5254, + "step": 5769 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010056187044307527, + "loss": 1.5898, + "step": 5770 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010053305685449397, + "loss": 1.4492, + "step": 5771 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010050424322165575, + "loss": 1.5508, + "step": 5772 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010047542954695282, + "loss": 1.582, + "step": 5773 + }, + { + "epoch": 0.51, + "learning_rate": 0.001004466158327775, + "loss": 1.6113, + "step": 5774 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010041780208152199, + "loss": 1.5684, + "step": 5775 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010038898829557855, + "loss": 1.6816, + "step": 5776 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010036017447733942, + "loss": 1.5684, + "step": 5777 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010033136062919691, + "loss": 1.5742, + "step": 5778 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010030254675354327, + "loss": 1.7246, + "step": 5779 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010027373285277073, + "loss": 1.6738, + "step": 5780 + }, + { + "epoch": 0.51, + "learning_rate": 0.001002449189292716, + "loss": 1.6172, + "step": 5781 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010021610498543812, + "loss": 1.6133, + "step": 5782 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010018729102366254, + "loss": 1.6406, + "step": 5783 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010015847704633722, + "loss": 1.7559, + "step": 5784 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010012966305585434, + "loss": 1.6133, + "step": 5785 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010010084905460624, + "loss": 1.5723, + "step": 5786 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010007203504498515, + "loss": 1.7207, + "step": 5787 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010004322102938337, + "loss": 1.6953, + "step": 5788 + }, + { + "epoch": 0.51, + "learning_rate": 0.0010001440701019318, + "loss": 1.5801, + "step": 5789 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009998559298980685, + "loss": 1.584, + "step": 5790 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009995677897061666, + "loss": 1.666, + "step": 5791 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009992796495501485, + "loss": 1.7246, + "step": 5792 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009989915094539379, + "loss": 1.5898, + "step": 5793 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009987033694414567, + "loss": 1.752, + "step": 5794 + }, + { + "epoch": 0.52, + "learning_rate": 0.000998415229536628, + "loss": 1.5293, + "step": 5795 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009981270897633744, + "loss": 1.6309, + "step": 5796 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009978389501456189, + "loss": 1.5625, + "step": 5797 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009975508107072843, + "loss": 1.4766, + "step": 5798 + }, + { + "epoch": 0.52, + "learning_rate": 0.000997262671472293, + "loss": 1.5312, + "step": 5799 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009969745324645675, + "loss": 1.5137, + "step": 5800 + }, + { + "epoch": 0.52, + "learning_rate": 0.000996686393708031, + "loss": 1.6094, + "step": 5801 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009963982552266058, + "loss": 1.6289, + "step": 5802 + }, + { + "epoch": 0.52, + "learning_rate": 0.000996110117044215, + "loss": 1.5078, + "step": 5803 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009958219791847802, + "loss": 1.6895, + "step": 5804 + }, + { + "epoch": 0.52, + "learning_rate": 0.000995533841672225, + "loss": 1.582, + "step": 5805 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009952457045304718, + "loss": 1.5566, + "step": 5806 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009949575677834427, + "loss": 1.6211, + "step": 5807 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009946694314550604, + "loss": 1.6758, + "step": 5808 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009943812955692474, + "loss": 1.6289, + "step": 5809 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009940931601499262, + "loss": 1.6465, + "step": 5810 + }, + { + "epoch": 0.52, + "learning_rate": 0.000993805025221019, + "loss": 1.6562, + "step": 5811 + }, + { + "epoch": 0.52, + "learning_rate": 0.000993516890806448, + "loss": 1.5879, + "step": 5812 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009932287569301358, + "loss": 1.5273, + "step": 5813 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009929406236160049, + "loss": 1.6875, + "step": 5814 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009926524908879769, + "loss": 1.6152, + "step": 5815 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009923643587699737, + "loss": 1.4688, + "step": 5816 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009920762272859185, + "loss": 1.543, + "step": 5817 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009917880964597327, + "loss": 1.6836, + "step": 5818 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009914999663153384, + "loss": 1.5625, + "step": 5819 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009912118368766573, + "loss": 1.625, + "step": 5820 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009909237081676117, + "loss": 1.6582, + "step": 5821 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009906355802121233, + "loss": 1.7148, + "step": 5822 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009903474530341134, + "loss": 1.6406, + "step": 5823 + }, + { + "epoch": 0.52, + "learning_rate": 0.000990059326657504, + "loss": 1.4941, + "step": 5824 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009897712011062171, + "loss": 1.6934, + "step": 5825 + }, + { + "epoch": 0.52, + "learning_rate": 0.000989483076404174, + "loss": 1.5527, + "step": 5826 + }, + { + "epoch": 0.52, + "learning_rate": 0.000989194952575296, + "loss": 1.5977, + "step": 5827 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009889068296435044, + "loss": 1.7461, + "step": 5828 + }, + { + "epoch": 0.52, + "learning_rate": 0.000988618707632721, + "loss": 1.5566, + "step": 5829 + }, + { + "epoch": 0.52, + "learning_rate": 0.000988330586566867, + "loss": 1.6504, + "step": 5830 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009880424664698636, + "loss": 1.6172, + "step": 5831 + }, + { + "epoch": 0.52, + "learning_rate": 0.000987754347365631, + "loss": 1.6484, + "step": 5832 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009874662292780917, + "loss": 1.7188, + "step": 5833 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009871781122311658, + "loss": 1.5879, + "step": 5834 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009868899962487741, + "loss": 1.5547, + "step": 5835 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009866018813548373, + "loss": 1.5312, + "step": 5836 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009863137675732768, + "loss": 1.6133, + "step": 5837 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009860256549280128, + "loss": 1.5586, + "step": 5838 + }, + { + "epoch": 0.52, + "learning_rate": 0.000985737543442965, + "loss": 1.6113, + "step": 5839 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009854494331420552, + "loss": 1.7031, + "step": 5840 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009851613240492029, + "loss": 1.666, + "step": 5841 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009848732161883281, + "loss": 1.582, + "step": 5842 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009845851095833507, + "loss": 1.5918, + "step": 5843 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009842970042581918, + "loss": 1.6484, + "step": 5844 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009840089002367705, + "loss": 1.6328, + "step": 5845 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009837207975430069, + "loss": 1.5195, + "step": 5846 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009834326962008197, + "loss": 1.4902, + "step": 5847 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009831445962341298, + "loss": 1.5996, + "step": 5848 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009828564976668561, + "loss": 1.6992, + "step": 5849 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009825684005229176, + "loss": 1.6465, + "step": 5850 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009822803048262336, + "loss": 1.6738, + "step": 5851 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009819922106007238, + "loss": 1.793, + "step": 5852 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009817041178703065, + "loss": 1.5508, + "step": 5853 + }, + { + "epoch": 0.52, + "learning_rate": 0.000981416026658901, + "loss": 1.623, + "step": 5854 + }, + { + "epoch": 0.52, + "learning_rate": 0.000981127936990425, + "loss": 1.5156, + "step": 5855 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009808398488887985, + "loss": 1.5449, + "step": 5856 + }, + { + "epoch": 0.52, + "learning_rate": 0.000980551762377939, + "loss": 1.459, + "step": 5857 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009802636774817654, + "loss": 1.543, + "step": 5858 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009799755942241951, + "loss": 1.6035, + "step": 5859 + }, + { + "epoch": 0.52, + "learning_rate": 0.000979687512629147, + "loss": 1.582, + "step": 5860 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009793994327205386, + "loss": 1.6934, + "step": 5861 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009791113545222878, + "loss": 1.7031, + "step": 5862 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009788232780583116, + "loss": 1.5039, + "step": 5863 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009785352033525284, + "loss": 1.5918, + "step": 5864 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009782471304288552, + "loss": 1.6816, + "step": 5865 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009779590593112092, + "loss": 1.625, + "step": 5866 + }, + { + "epoch": 0.52, + "learning_rate": 0.000977670990023507, + "loss": 1.6113, + "step": 5867 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009773829225896663, + "loss": 1.5078, + "step": 5868 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009770948570336033, + "loss": 1.5898, + "step": 5869 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009768067933792347, + "loss": 1.7148, + "step": 5870 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009765187316504764, + "loss": 1.5566, + "step": 5871 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009762306718712457, + "loss": 1.6523, + "step": 5872 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009759426140654582, + "loss": 1.6055, + "step": 5873 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009756545582570297, + "loss": 1.6738, + "step": 5874 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009753665044698759, + "loss": 1.541, + "step": 5875 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009750784527279128, + "loss": 1.6348, + "step": 5876 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009747904030550556, + "loss": 1.5801, + "step": 5877 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009745023554752196, + "loss": 1.5918, + "step": 5878 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009742143100123197, + "loss": 1.752, + "step": 5879 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009739262666902711, + "loss": 1.5312, + "step": 5880 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009736382255329884, + "loss": 1.7617, + "step": 5881 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009733501865643859, + "loss": 1.6738, + "step": 5882 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009730621498083783, + "loss": 1.5586, + "step": 5883 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009727741152888799, + "loss": 1.623, + "step": 5884 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009724860830298044, + "loss": 1.7344, + "step": 5885 + }, + { + "epoch": 0.52, + "learning_rate": 0.000972198053055066, + "loss": 1.6016, + "step": 5886 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009719100253885773, + "loss": 1.666, + "step": 5887 + }, + { + "epoch": 0.52, + "learning_rate": 0.000971622000054253, + "loss": 1.6035, + "step": 5888 + }, + { + "epoch": 0.52, + "learning_rate": 0.000971333977076006, + "loss": 1.6035, + "step": 5889 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009710459564777489, + "loss": 1.6777, + "step": 5890 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009707579382833946, + "loss": 1.5801, + "step": 5891 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009704699225168563, + "loss": 1.5273, + "step": 5892 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009701819092020461, + "loss": 1.4531, + "step": 5893 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009698938983628763, + "loss": 1.5781, + "step": 5894 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009696058900232583, + "loss": 1.6953, + "step": 5895 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009693178842071051, + "loss": 1.5625, + "step": 5896 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009690298809383275, + "loss": 1.6074, + "step": 5897 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009687418802408374, + "loss": 1.6406, + "step": 5898 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009684538821385453, + "loss": 1.5391, + "step": 5899 + }, + { + "epoch": 0.52, + "learning_rate": 0.000968165886655363, + "loss": 1.5293, + "step": 5900 + }, + { + "epoch": 0.52, + "learning_rate": 0.0009678778938152008, + "loss": 1.582, + "step": 5901 + }, + { + "epoch": 0.53, + "learning_rate": 0.000967589903641969, + "loss": 1.5195, + "step": 5902 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009673019161595783, + "loss": 1.5469, + "step": 5903 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009670139313919389, + "loss": 1.5918, + "step": 5904 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009667259493629605, + "loss": 1.6016, + "step": 5905 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009664379700965528, + "loss": 1.6484, + "step": 5906 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009661499936166247, + "loss": 1.584, + "step": 5907 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009658620199470862, + "loss": 1.498, + "step": 5908 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009655740491118459, + "loss": 1.5977, + "step": 5909 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009652860811348125, + "loss": 1.6641, + "step": 5910 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009649981160398941, + "loss": 1.5918, + "step": 5911 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009647101538509997, + "loss": 1.4707, + "step": 5912 + }, + { + "epoch": 0.53, + "learning_rate": 0.000964422194592037, + "loss": 1.7246, + "step": 5913 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009641342382869135, + "loss": 1.6973, + "step": 5914 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009638462849595365, + "loss": 1.6484, + "step": 5915 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009635583346338141, + "loss": 1.5645, + "step": 5916 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009632703873336529, + "loss": 1.5898, + "step": 5917 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009629824430829592, + "loss": 1.543, + "step": 5918 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009626945019056404, + "loss": 1.5723, + "step": 5919 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009624065638256019, + "loss": 1.6191, + "step": 5920 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009621186288667504, + "loss": 1.5078, + "step": 5921 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009618306970529907, + "loss": 1.7188, + "step": 5922 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009615427684082293, + "loss": 1.6074, + "step": 5923 + }, + { + "epoch": 0.53, + "learning_rate": 0.000961254842956371, + "loss": 1.4473, + "step": 5924 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009609669207213207, + "loss": 1.5801, + "step": 5925 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009606790017269828, + "loss": 1.5957, + "step": 5926 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009603910859972624, + "loss": 1.6855, + "step": 5927 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009601031735560631, + "loss": 1.5078, + "step": 5928 + }, + { + "epoch": 0.53, + "learning_rate": 0.000959815264427289, + "loss": 1.5625, + "step": 5929 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009595273586348433, + "loss": 1.5605, + "step": 5930 + }, + { + "epoch": 0.53, + "learning_rate": 0.00095923945620263, + "loss": 1.5938, + "step": 5931 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009589515571545518, + "loss": 1.6445, + "step": 5932 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009586636615145113, + "loss": 1.5918, + "step": 5933 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009583757693064107, + "loss": 1.6738, + "step": 5934 + }, + { + "epoch": 0.53, + "learning_rate": 0.000958087880554153, + "loss": 1.5684, + "step": 5935 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009577999952816397, + "loss": 1.625, + "step": 5936 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009575121135127723, + "loss": 1.5801, + "step": 5937 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009572242352714522, + "loss": 1.6484, + "step": 5938 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009569363605815804, + "loss": 1.6602, + "step": 5939 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009566484894670579, + "loss": 1.6641, + "step": 5940 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009563606219517847, + "loss": 1.5488, + "step": 5941 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009560727580596608, + "loss": 1.7129, + "step": 5942 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009557848978145868, + "loss": 1.5312, + "step": 5943 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009554970412404618, + "loss": 1.5684, + "step": 5944 + }, + { + "epoch": 0.53, + "learning_rate": 0.000955209188361185, + "loss": 1.6367, + "step": 5945 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009549213392006548, + "loss": 1.7324, + "step": 5946 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009546334937827709, + "loss": 1.5156, + "step": 5947 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009543456521314312, + "loss": 1.5645, + "step": 5948 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009540578142705334, + "loss": 1.5547, + "step": 5949 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009537699802239749, + "loss": 1.5957, + "step": 5950 + }, + { + "epoch": 0.53, + "learning_rate": 0.000953482150015654, + "loss": 1.6387, + "step": 5951 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009531943236694672, + "loss": 1.5352, + "step": 5952 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009529065012093113, + "loss": 1.5332, + "step": 5953 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009526186826590821, + "loss": 1.5762, + "step": 5954 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009523308680426766, + "loss": 1.5625, + "step": 5955 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009520430573839903, + "loss": 1.6367, + "step": 5956 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009517552507069184, + "loss": 1.6387, + "step": 5957 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009514674480353558, + "loss": 1.7227, + "step": 5958 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009511796493931978, + "loss": 1.4629, + "step": 5959 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009508918548043385, + "loss": 1.6426, + "step": 5960 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009506040642926717, + "loss": 1.668, + "step": 5961 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009503162778820917, + "loss": 1.5977, + "step": 5962 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009500284955964916, + "loss": 1.6582, + "step": 5963 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009497407174597647, + "loss": 1.5898, + "step": 5964 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009494529434958033, + "loss": 1.5117, + "step": 5965 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009491651737284998, + "loss": 1.5977, + "step": 5966 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009488774081817467, + "loss": 1.6387, + "step": 5967 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009485896468794356, + "loss": 1.5059, + "step": 5968 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009483018898454574, + "loss": 1.582, + "step": 5969 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009480141371037029, + "loss": 1.709, + "step": 5970 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009477263886780634, + "loss": 1.6172, + "step": 5971 + }, + { + "epoch": 0.53, + "learning_rate": 0.000947438644592429, + "loss": 1.5898, + "step": 5972 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009471509048706895, + "loss": 1.6367, + "step": 5973 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009468631695367337, + "loss": 1.5605, + "step": 5974 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009465754386144518, + "loss": 1.623, + "step": 5975 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009462877121277323, + "loss": 1.5762, + "step": 5976 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009459999901004635, + "loss": 1.5762, + "step": 5977 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009457122725565334, + "loss": 1.6855, + "step": 5978 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009454245595198298, + "loss": 1.5059, + "step": 5979 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009451368510142402, + "loss": 1.623, + "step": 5980 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009448491470636509, + "loss": 1.748, + "step": 5981 + }, + { + "epoch": 0.53, + "learning_rate": 0.000944561447691949, + "loss": 1.584, + "step": 5982 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009442737529230206, + "loss": 1.6172, + "step": 5983 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009439860627807516, + "loss": 1.5215, + "step": 5984 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009436983772890271, + "loss": 1.541, + "step": 5985 + }, + { + "epoch": 0.53, + "learning_rate": 0.000943410696471732, + "loss": 1.5801, + "step": 5986 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009431230203527515, + "loss": 1.6875, + "step": 5987 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009428353489559696, + "loss": 1.7871, + "step": 5988 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009425476823052702, + "loss": 1.6094, + "step": 5989 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009422600204245362, + "loss": 1.6543, + "step": 5990 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009419723633376515, + "loss": 1.5352, + "step": 5991 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009416847110684985, + "loss": 1.5488, + "step": 5992 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009413970636409593, + "loss": 1.6367, + "step": 5993 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009411094210789154, + "loss": 1.6836, + "step": 5994 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009408217834062492, + "loss": 1.5957, + "step": 5995 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009405341506468411, + "loss": 1.5195, + "step": 5996 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009402465228245719, + "loss": 1.5938, + "step": 5997 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009399588999633216, + "loss": 1.582, + "step": 5998 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009396712820869706, + "loss": 1.6152, + "step": 5999 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009393836692193979, + "loss": 1.5137, + "step": 6000 + }, + { + "epoch": 0.53, + "learning_rate": 0.000939096061384482, + "loss": 1.6602, + "step": 6001 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009388084586061026, + "loss": 1.4844, + "step": 6002 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009385208609081373, + "loss": 1.5273, + "step": 6003 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009382332683144637, + "loss": 1.5254, + "step": 6004 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009379456808489588, + "loss": 1.5195, + "step": 6005 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009376580985355005, + "loss": 1.6934, + "step": 6006 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009373705213979645, + "loss": 1.6074, + "step": 6007 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009370829494602271, + "loss": 1.5625, + "step": 6008 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009367953827461632, + "loss": 1.5449, + "step": 6009 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009365078212796492, + "loss": 1.6348, + "step": 6010 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009362202650845591, + "loss": 1.4473, + "step": 6011 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009359327141847673, + "loss": 1.5332, + "step": 6012 + }, + { + "epoch": 0.53, + "learning_rate": 0.0009356451686041472, + "loss": 1.4941, + "step": 6013 + }, + { + "epoch": 0.54, + "learning_rate": 0.000935357628366573, + "loss": 1.5781, + "step": 6014 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009350700934959174, + "loss": 1.6816, + "step": 6015 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009347825640160526, + "loss": 1.541, + "step": 6016 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009344950399508508, + "loss": 1.5312, + "step": 6017 + }, + { + "epoch": 0.54, + "learning_rate": 0.000934207521324184, + "loss": 1.6328, + "step": 6018 + }, + { + "epoch": 0.54, + "learning_rate": 0.000933920008159923, + "loss": 1.6953, + "step": 6019 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009336325004819383, + "loss": 1.5938, + "step": 6020 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009333449983141007, + "loss": 1.4961, + "step": 6021 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009330575016802797, + "loss": 1.5938, + "step": 6022 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009327700106043449, + "loss": 1.5605, + "step": 6023 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009324825251101649, + "loss": 1.5078, + "step": 6024 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009321950452216077, + "loss": 1.6328, + "step": 6025 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009319075709625422, + "loss": 1.6406, + "step": 6026 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009316201023568355, + "loss": 1.5391, + "step": 6027 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009313326394283546, + "loss": 1.4707, + "step": 6028 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009310451822009656, + "loss": 1.5566, + "step": 6029 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009307577306985354, + "loss": 1.625, + "step": 6030 + }, + { + "epoch": 0.54, + "learning_rate": 0.000930470284944929, + "loss": 1.5762, + "step": 6031 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009301828449640117, + "loss": 1.4375, + "step": 6032 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009298954107796477, + "loss": 1.5117, + "step": 6033 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009296079824157022, + "loss": 1.5957, + "step": 6034 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009293205598960383, + "loss": 1.7051, + "step": 6035 + }, + { + "epoch": 0.54, + "learning_rate": 0.000929033143244519, + "loss": 1.6973, + "step": 6036 + }, + { + "epoch": 0.54, + "learning_rate": 0.000928745732485007, + "loss": 1.4766, + "step": 6037 + }, + { + "epoch": 0.54, + "learning_rate": 0.000928458327641365, + "loss": 1.7344, + "step": 6038 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009281709287374542, + "loss": 1.6191, + "step": 6039 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009278835357971359, + "loss": 1.6738, + "step": 6040 + }, + { + "epoch": 0.54, + "learning_rate": 0.000927596148844271, + "loss": 1.5703, + "step": 6041 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009273087679027199, + "loss": 1.6113, + "step": 6042 + }, + { + "epoch": 0.54, + "learning_rate": 0.000927021392996342, + "loss": 1.5352, + "step": 6043 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009267340241489968, + "loss": 1.6074, + "step": 6044 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009264466613845426, + "loss": 1.7109, + "step": 6045 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009261593047268381, + "loss": 1.666, + "step": 6046 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009258719541997411, + "loss": 1.5996, + "step": 6047 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009255846098271085, + "loss": 1.5645, + "step": 6048 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009252972716327967, + "loss": 1.6836, + "step": 6049 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009250099396406626, + "loss": 1.6875, + "step": 6050 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009247226138745616, + "loss": 1.6074, + "step": 6051 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009244352943583488, + "loss": 1.5918, + "step": 6052 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009241479811158785, + "loss": 1.7422, + "step": 6053 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009238606741710055, + "loss": 1.7852, + "step": 6054 + }, + { + "epoch": 0.54, + "learning_rate": 0.000923573373547583, + "loss": 1.418, + "step": 6055 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009232860792694642, + "loss": 1.7559, + "step": 6056 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009229987913605014, + "loss": 1.6445, + "step": 6057 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009227115098445469, + "loss": 1.5742, + "step": 6058 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009224242347454521, + "loss": 1.5527, + "step": 6059 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009221369660870676, + "loss": 1.6895, + "step": 6060 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009218497038932443, + "loss": 1.6855, + "step": 6061 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009215624481878321, + "loss": 1.5547, + "step": 6062 + }, + { + "epoch": 0.54, + "learning_rate": 0.00092127519899468, + "loss": 1.6211, + "step": 6063 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009209879563376371, + "loss": 1.6309, + "step": 6064 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009207007202405511, + "loss": 1.5684, + "step": 6065 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009204134907272705, + "loss": 1.6172, + "step": 6066 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009201262678216421, + "loss": 1.6484, + "step": 6067 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009198390515475124, + "loss": 1.5957, + "step": 6068 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009195518419287274, + "loss": 1.5938, + "step": 6069 + }, + { + "epoch": 0.54, + "learning_rate": 0.000919264638989133, + "loss": 1.5312, + "step": 6070 + }, + { + "epoch": 0.54, + "learning_rate": 0.000918977442752574, + "loss": 1.6602, + "step": 6071 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009186902532428948, + "loss": 1.5957, + "step": 6072 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009184030704839387, + "loss": 1.4512, + "step": 6073 + }, + { + "epoch": 0.54, + "learning_rate": 0.00091811589449955, + "loss": 1.5293, + "step": 6074 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009178287253135709, + "loss": 1.6973, + "step": 6075 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009175415629498434, + "loss": 1.5469, + "step": 6076 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009172544074322092, + "loss": 1.5918, + "step": 6077 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009169672587845096, + "loss": 1.3848, + "step": 6078 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009166801170305848, + "loss": 1.5117, + "step": 6079 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009163929821942745, + "loss": 1.6621, + "step": 6080 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009161058542994182, + "loss": 1.6797, + "step": 6081 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009158187333698549, + "loss": 1.584, + "step": 6082 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009155316194294224, + "loss": 1.6758, + "step": 6083 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009152445125019584, + "loss": 1.6113, + "step": 6084 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009149574126112995, + "loss": 1.5859, + "step": 6085 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009146703197812827, + "loss": 1.5918, + "step": 6086 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009143832340357439, + "loss": 1.6816, + "step": 6087 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009140961553985173, + "loss": 1.5898, + "step": 6088 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009138090838934388, + "loss": 1.6426, + "step": 6089 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009135220195443419, + "loss": 1.5078, + "step": 6090 + }, + { + "epoch": 0.54, + "learning_rate": 0.00091323496237506, + "loss": 1.5664, + "step": 6091 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009129479124094256, + "loss": 1.5801, + "step": 6092 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009126608696712718, + "loss": 1.6191, + "step": 6093 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009123738341844298, + "loss": 1.627, + "step": 6094 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009120868059727307, + "loss": 1.7266, + "step": 6095 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009117997850600047, + "loss": 1.6562, + "step": 6096 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009115127714700821, + "loss": 1.6035, + "step": 6097 + }, + { + "epoch": 0.54, + "learning_rate": 0.000911225765226792, + "loss": 1.6465, + "step": 6098 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009109387663539627, + "loss": 1.541, + "step": 6099 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009106517748754225, + "loss": 1.6406, + "step": 6100 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009103647908149989, + "loss": 1.582, + "step": 6101 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009100778141965187, + "loss": 1.7305, + "step": 6102 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009097908450438077, + "loss": 1.5312, + "step": 6103 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009095038833806915, + "loss": 1.6543, + "step": 6104 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009092169292309956, + "loss": 1.7246, + "step": 6105 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009089299826185438, + "loss": 1.6133, + "step": 6106 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009086430435671599, + "loss": 1.6133, + "step": 6107 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009083561121006665, + "loss": 1.5312, + "step": 6108 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009080691882428869, + "loss": 1.5137, + "step": 6109 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009077822720176426, + "loss": 1.498, + "step": 6110 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009074953634487545, + "loss": 1.5195, + "step": 6111 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009072084625600427, + "loss": 1.4961, + "step": 6112 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009069215693753282, + "loss": 1.5586, + "step": 6113 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009066346839184296, + "loss": 1.6836, + "step": 6114 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009063478062131656, + "loss": 1.5352, + "step": 6115 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009060609362833539, + "loss": 1.7188, + "step": 6116 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009057740741528121, + "loss": 1.6484, + "step": 6117 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009054872198453568, + "loss": 1.5488, + "step": 6118 + }, + { + "epoch": 0.54, + "learning_rate": 0.000905200373384804, + "loss": 1.623, + "step": 6119 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009049135347949689, + "loss": 1.7109, + "step": 6120 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009046267040996667, + "loss": 1.6641, + "step": 6121 + }, + { + "epoch": 0.54, + "learning_rate": 0.000904339881322711, + "loss": 1.6211, + "step": 6122 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009040530664879153, + "loss": 1.5547, + "step": 6123 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009037662596190921, + "loss": 1.6035, + "step": 6124 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009034794607400541, + "loss": 1.6094, + "step": 6125 + }, + { + "epoch": 0.54, + "learning_rate": 0.0009031926698746123, + "loss": 1.6094, + "step": 6126 + }, + { + "epoch": 0.55, + "learning_rate": 0.0009029058870465776, + "loss": 1.6523, + "step": 6127 + }, + { + "epoch": 0.55, + "learning_rate": 0.0009026191122797595, + "loss": 1.584, + "step": 6128 + }, + { + "epoch": 0.55, + "learning_rate": 0.0009023323455979683, + "loss": 1.6172, + "step": 6129 + }, + { + "epoch": 0.55, + "learning_rate": 0.0009020455870250123, + "loss": 1.5645, + "step": 6130 + }, + { + "epoch": 0.55, + "learning_rate": 0.0009017588365846999, + "loss": 1.5215, + "step": 6131 + }, + { + "epoch": 0.55, + "learning_rate": 0.0009014720943008374, + "loss": 1.6504, + "step": 6132 + }, + { + "epoch": 0.55, + "learning_rate": 0.0009011853601972328, + "loss": 1.543, + "step": 6133 + }, + { + "epoch": 0.55, + "learning_rate": 0.0009008986342976919, + "loss": 1.5527, + "step": 6134 + }, + { + "epoch": 0.55, + "learning_rate": 0.0009006119166260195, + "loss": 1.6348, + "step": 6135 + }, + { + "epoch": 0.55, + "learning_rate": 0.0009003252072060204, + "loss": 1.627, + "step": 6136 + }, + { + "epoch": 0.55, + "learning_rate": 0.000900038506061499, + "loss": 1.5078, + "step": 6137 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008997518132162584, + "loss": 1.6016, + "step": 6138 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008994651286941011, + "loss": 1.5938, + "step": 6139 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008991784525188288, + "loss": 1.5664, + "step": 6140 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008988917847142433, + "loss": 1.6055, + "step": 6141 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008986051253041448, + "loss": 1.5859, + "step": 6142 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008983184743123333, + "loss": 1.5586, + "step": 6143 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008980318317626072, + "loss": 1.6191, + "step": 6144 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008977451976787662, + "loss": 1.5059, + "step": 6145 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008974585720846073, + "loss": 1.6895, + "step": 6146 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008971719550039275, + "loss": 1.5488, + "step": 6147 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008968853464605227, + "loss": 1.6328, + "step": 6148 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008965987464781897, + "loss": 1.5938, + "step": 6149 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008963121550807225, + "loss": 1.5586, + "step": 6150 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008960255722919156, + "loss": 1.623, + "step": 6151 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008957389981355619, + "loss": 1.5605, + "step": 6152 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008954524326354552, + "loss": 1.5605, + "step": 6153 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008951658758153869, + "loss": 1.623, + "step": 6154 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008948793276991484, + "loss": 1.623, + "step": 6155 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008945927883105301, + "loss": 1.584, + "step": 6156 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008943062576733222, + "loss": 1.6836, + "step": 6157 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008940197358113135, + "loss": 1.5195, + "step": 6158 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008937332227482929, + "loss": 1.6504, + "step": 6159 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008934467185080474, + "loss": 1.582, + "step": 6160 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008931602231143647, + "loss": 1.5547, + "step": 6161 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008928737365910307, + "loss": 1.4883, + "step": 6162 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008925872589618307, + "loss": 1.5742, + "step": 6163 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008923007902505492, + "loss": 1.4707, + "step": 6164 + }, + { + "epoch": 0.55, + "learning_rate": 0.000892014330480971, + "loss": 1.668, + "step": 6165 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008917278796768791, + "loss": 1.625, + "step": 6166 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008914414378620556, + "loss": 1.6895, + "step": 6167 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008911550050602822, + "loss": 1.5215, + "step": 6168 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008908685812953408, + "loss": 1.6406, + "step": 6169 + }, + { + "epoch": 0.55, + "learning_rate": 0.000890582166591011, + "loss": 1.5371, + "step": 6170 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008902957609710719, + "loss": 1.7207, + "step": 6171 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008900093644593033, + "loss": 1.6289, + "step": 6172 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008897229770794828, + "loss": 1.7109, + "step": 6173 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008894365988553874, + "loss": 1.582, + "step": 6174 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008891502298107937, + "loss": 1.668, + "step": 6175 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008888638699694775, + "loss": 1.4941, + "step": 6176 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008885775193552137, + "loss": 1.5352, + "step": 6177 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008882911779917764, + "loss": 1.6621, + "step": 6178 + }, + { + "epoch": 0.55, + "learning_rate": 0.000888004845902939, + "loss": 1.5645, + "step": 6179 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008877185231124747, + "loss": 1.6797, + "step": 6180 + }, + { + "epoch": 0.55, + "learning_rate": 0.000887432209644155, + "loss": 1.4434, + "step": 6181 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008871459055217509, + "loss": 1.5859, + "step": 6182 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008868596107690324, + "loss": 1.5723, + "step": 6183 + }, + { + "epoch": 0.55, + "learning_rate": 0.00088657332540977, + "loss": 1.7012, + "step": 6184 + }, + { + "epoch": 0.55, + "learning_rate": 0.000886287049467732, + "loss": 1.5234, + "step": 6185 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008860007829666862, + "loss": 1.6641, + "step": 6186 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008857145259303997, + "loss": 1.5039, + "step": 6187 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008854282783826396, + "loss": 1.6973, + "step": 6188 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008851420403471711, + "loss": 1.6367, + "step": 6189 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008848558118477592, + "loss": 1.4746, + "step": 6190 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008845695929081673, + "loss": 1.5977, + "step": 6191 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008842833835521598, + "loss": 1.5352, + "step": 6192 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008839971838034984, + "loss": 1.6484, + "step": 6193 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008837109936859451, + "loss": 1.4746, + "step": 6194 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008834248132232603, + "loss": 1.5762, + "step": 6195 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008831386424392047, + "loss": 1.7109, + "step": 6196 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008828524813575372, + "loss": 1.4785, + "step": 6197 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008825663300020162, + "loss": 1.5801, + "step": 6198 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008822801883963993, + "loss": 1.7168, + "step": 6199 + }, + { + "epoch": 0.55, + "learning_rate": 0.000881994056564444, + "loss": 1.6543, + "step": 6200 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008817079345299056, + "loss": 1.6367, + "step": 6201 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008814218223165397, + "loss": 1.6406, + "step": 6202 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008811357199481001, + "loss": 1.6172, + "step": 6203 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008808496274483413, + "loss": 1.6133, + "step": 6204 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008805635448410155, + "loss": 1.6602, + "step": 6205 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008802774721498749, + "loss": 1.7227, + "step": 6206 + }, + { + "epoch": 0.55, + "learning_rate": 0.00087999140939867, + "loss": 1.6621, + "step": 6207 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008797053566111521, + "loss": 1.4785, + "step": 6208 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008794193138110701, + "loss": 1.6426, + "step": 6209 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008791332810221726, + "loss": 1.5938, + "step": 6210 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008788472582682073, + "loss": 1.4512, + "step": 6211 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008785612455729215, + "loss": 1.5469, + "step": 6212 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008782752429600614, + "loss": 1.5137, + "step": 6213 + }, + { + "epoch": 0.55, + "learning_rate": 0.000877989250453372, + "loss": 1.5137, + "step": 6214 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008777032680765979, + "loss": 1.7031, + "step": 6215 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008774172958534827, + "loss": 1.5938, + "step": 6216 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008771313338077694, + "loss": 1.6484, + "step": 6217 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008768453819631993, + "loss": 1.5859, + "step": 6218 + }, + { + "epoch": 0.55, + "learning_rate": 0.000876559440343514, + "loss": 1.6289, + "step": 6219 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008762735089724538, + "loss": 1.5684, + "step": 6220 + }, + { + "epoch": 0.55, + "learning_rate": 0.000875987587873758, + "loss": 1.6562, + "step": 6221 + }, + { + "epoch": 0.55, + "learning_rate": 0.000875701677071165, + "loss": 1.541, + "step": 6222 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008754157765884122, + "loss": 1.5762, + "step": 6223 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008751298864492371, + "loss": 1.6309, + "step": 6224 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008748440066773755, + "loss": 1.5117, + "step": 6225 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008745581372965624, + "loss": 1.6836, + "step": 6226 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008742722783305312, + "loss": 1.625, + "step": 6227 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008739864298030168, + "loss": 1.5371, + "step": 6228 + }, + { + "epoch": 0.55, + "learning_rate": 0.000873700591737751, + "loss": 1.6426, + "step": 6229 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008734147641584652, + "loss": 1.623, + "step": 6230 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008731289470888901, + "loss": 1.6523, + "step": 6231 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008728431405527563, + "loss": 1.6621, + "step": 6232 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008725573445737923, + "loss": 1.5371, + "step": 6233 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008722715591757265, + "loss": 1.5156, + "step": 6234 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008719857843822858, + "loss": 1.5684, + "step": 6235 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008717000202171969, + "loss": 1.627, + "step": 6236 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008714142667041851, + "loss": 1.582, + "step": 6237 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008711285238669754, + "loss": 1.6016, + "step": 6238 + }, + { + "epoch": 0.56, + "learning_rate": 0.000870842791729291, + "loss": 1.5117, + "step": 6239 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008705570703148553, + "loss": 1.6035, + "step": 6240 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008702713596473899, + "loss": 1.5391, + "step": 6241 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008699856597506161, + "loss": 1.5781, + "step": 6242 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008696999706482533, + "loss": 1.7031, + "step": 6243 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008694142923640219, + "loss": 1.543, + "step": 6244 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008691286249216398, + "loss": 1.5938, + "step": 6245 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008688429683448245, + "loss": 1.5723, + "step": 6246 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008685573226572918, + "loss": 1.6816, + "step": 6247 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008682716878827588, + "loss": 1.4785, + "step": 6248 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008679860640449394, + "loss": 1.5996, + "step": 6249 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008677004511675477, + "loss": 1.584, + "step": 6250 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008674148492742961, + "loss": 1.6973, + "step": 6251 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008671292583888974, + "loss": 1.7441, + "step": 6252 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008668436785350626, + "loss": 1.627, + "step": 6253 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008665581097365016, + "loss": 1.6699, + "step": 6254 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008662725520169236, + "loss": 1.4648, + "step": 6255 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008659870054000374, + "loss": 1.5234, + "step": 6256 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008657014699095499, + "loss": 1.6953, + "step": 6257 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008654159455691679, + "loss": 1.748, + "step": 6258 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008651304324025973, + "loss": 1.6504, + "step": 6259 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008648449304335425, + "loss": 1.6113, + "step": 6260 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008645594396857073, + "loss": 1.6367, + "step": 6261 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008642739601827938, + "loss": 1.5859, + "step": 6262 + }, + { + "epoch": 0.56, + "learning_rate": 0.000863988491948505, + "loss": 1.6328, + "step": 6263 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008637030350065415, + "loss": 1.5762, + "step": 6264 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008634175893806029, + "loss": 1.7266, + "step": 6265 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008631321550943882, + "loss": 1.5938, + "step": 6266 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008628467321715963, + "loss": 1.5664, + "step": 6267 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008625613206359237, + "loss": 1.6562, + "step": 6268 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008622759205110669, + "loss": 1.5742, + "step": 6269 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008619905318207207, + "loss": 1.5039, + "step": 6270 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008617051545885804, + "loss": 1.5723, + "step": 6271 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008614197888383386, + "loss": 1.5352, + "step": 6272 + }, + { + "epoch": 0.56, + "learning_rate": 0.000861134434593688, + "loss": 1.7227, + "step": 6273 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008608490918783197, + "loss": 1.6289, + "step": 6274 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008605637607159249, + "loss": 1.7578, + "step": 6275 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008602784411301927, + "loss": 1.5723, + "step": 6276 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008599931331448117, + "loss": 1.6465, + "step": 6277 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008597078367834695, + "loss": 1.5391, + "step": 6278 + }, + { + "epoch": 0.56, + "learning_rate": 0.000859422552069853, + "loss": 1.584, + "step": 6279 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008591372790276478, + "loss": 1.5508, + "step": 6280 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008588520176805388, + "loss": 1.5859, + "step": 6281 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008585667680522089, + "loss": 1.5469, + "step": 6282 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008582815301663422, + "loss": 1.5098, + "step": 6283 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008579963040466197, + "loss": 1.5195, + "step": 6284 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008577110897167225, + "loss": 1.582, + "step": 6285 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008574258872003299, + "loss": 1.5547, + "step": 6286 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008571406965211215, + "loss": 1.498, + "step": 6287 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008568555177027753, + "loss": 1.5391, + "step": 6288 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008565703507689675, + "loss": 1.6074, + "step": 6289 + }, + { + "epoch": 0.56, + "learning_rate": 0.000856285195743374, + "loss": 1.4629, + "step": 6290 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008560000526496706, + "loss": 1.6152, + "step": 6291 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008557149215115308, + "loss": 1.6074, + "step": 6292 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008554298023526273, + "loss": 1.6094, + "step": 6293 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008551446951966321, + "loss": 1.5508, + "step": 6294 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008548596000672164, + "loss": 1.6328, + "step": 6295 + }, + { + "epoch": 0.56, + "learning_rate": 0.00085457451698805, + "loss": 1.7188, + "step": 6296 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008542894459828021, + "loss": 1.5625, + "step": 6297 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008540043870751402, + "loss": 1.5508, + "step": 6298 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008537193402887316, + "loss": 1.6641, + "step": 6299 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008534343056472424, + "loss": 1.502, + "step": 6300 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008531492831743372, + "loss": 1.6348, + "step": 6301 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008528642728936795, + "loss": 1.8203, + "step": 6302 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008525792748289333, + "loss": 1.6289, + "step": 6303 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008522942890037599, + "loss": 1.5039, + "step": 6304 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008520093154418203, + "loss": 1.7344, + "step": 6305 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008517243541667737, + "loss": 1.6699, + "step": 6306 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008514394052022801, + "loss": 1.5859, + "step": 6307 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008511544685719968, + "loss": 1.6211, + "step": 6308 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008508695442995805, + "loss": 1.5117, + "step": 6309 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008505846324086865, + "loss": 1.668, + "step": 6310 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008502997329229707, + "loss": 1.5195, + "step": 6311 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008500148458660861, + "loss": 1.5957, + "step": 6312 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008497299712616856, + "loss": 1.5938, + "step": 6313 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008494451091334204, + "loss": 1.6328, + "step": 6314 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008491602595049418, + "loss": 1.6582, + "step": 6315 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008488754223998987, + "loss": 1.6641, + "step": 6316 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008485905978419402, + "loss": 1.4863, + "step": 6317 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008483057858547134, + "loss": 1.4844, + "step": 6318 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008480209864618651, + "loss": 1.6504, + "step": 6319 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008477361996870405, + "loss": 1.5625, + "step": 6320 + }, + { + "epoch": 0.56, + "learning_rate": 0.000847451425553884, + "loss": 1.5645, + "step": 6321 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008471666640860384, + "loss": 1.6074, + "step": 6322 + }, + { + "epoch": 0.56, + "learning_rate": 0.000846881915307147, + "loss": 1.6152, + "step": 6323 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008465971792408503, + "loss": 1.6816, + "step": 6324 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008463124559107887, + "loss": 1.5762, + "step": 6325 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008460277453406008, + "loss": 1.6367, + "step": 6326 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008457430475539254, + "loss": 1.5371, + "step": 6327 + }, + { + "epoch": 0.56, + "learning_rate": 0.000845458362574399, + "loss": 1.457, + "step": 6328 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008451736904256577, + "loss": 1.707, + "step": 6329 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008448890311313357, + "loss": 1.582, + "step": 6330 + }, + { + "epoch": 0.56, + "learning_rate": 0.000844604384715068, + "loss": 1.5664, + "step": 6331 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008443197512004863, + "loss": 1.7148, + "step": 6332 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008440351306112227, + "loss": 1.6406, + "step": 6333 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008437505229709073, + "loss": 1.6992, + "step": 6334 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008434659283031703, + "loss": 1.6172, + "step": 6335 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008431813466316393, + "loss": 1.5156, + "step": 6336 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008428967779799425, + "loss": 1.6465, + "step": 6337 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008426122223717053, + "loss": 1.4766, + "step": 6338 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008423276798305534, + "loss": 1.6719, + "step": 6339 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008420431503801109, + "loss": 1.6914, + "step": 6340 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008417586340440001, + "loss": 1.7012, + "step": 6341 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008414741308458439, + "loss": 1.5605, + "step": 6342 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008411896408092626, + "loss": 1.5547, + "step": 6343 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008409051639578761, + "loss": 1.6328, + "step": 6344 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008406207003153023, + "loss": 1.4746, + "step": 6345 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008403362499051598, + "loss": 1.623, + "step": 6346 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008400518127510647, + "loss": 1.6133, + "step": 6347 + }, + { + "epoch": 0.56, + "learning_rate": 0.000839767388876632, + "loss": 1.5977, + "step": 6348 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008394829783054758, + "loss": 1.5566, + "step": 6349 + }, + { + "epoch": 0.56, + "learning_rate": 0.00083919858106121, + "loss": 1.6191, + "step": 6350 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008389141971674461, + "loss": 1.4961, + "step": 6351 + }, + { + "epoch": 0.57, + "learning_rate": 0.000838629826647795, + "loss": 1.5879, + "step": 6352 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008383454695258665, + "loss": 1.6914, + "step": 6353 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008380611258252694, + "loss": 1.5117, + "step": 6354 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008377767955696114, + "loss": 1.5723, + "step": 6355 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008374924787824986, + "loss": 1.4941, + "step": 6356 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008372081754875364, + "loss": 1.6523, + "step": 6357 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008369238857083293, + "loss": 1.5684, + "step": 6358 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008366396094684804, + "loss": 1.6895, + "step": 6359 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008363553467915914, + "loss": 1.6094, + "step": 6360 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008360710977012628, + "loss": 1.4824, + "step": 6361 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008357868622210953, + "loss": 1.6348, + "step": 6362 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008355026403746869, + "loss": 1.6699, + "step": 6363 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008352184321856352, + "loss": 1.6758, + "step": 6364 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008349342376775359, + "loss": 1.6406, + "step": 6365 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008346500568739852, + "loss": 1.6621, + "step": 6366 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008343658897985766, + "loss": 1.543, + "step": 6367 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008340817364749032, + "loss": 1.6328, + "step": 6368 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008337975969265561, + "loss": 1.5859, + "step": 6369 + }, + { + "epoch": 0.57, + "learning_rate": 0.000833513471177127, + "loss": 1.6797, + "step": 6370 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008332293592502047, + "loss": 1.5859, + "step": 6371 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008329452611693778, + "loss": 1.5801, + "step": 6372 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008326611769582331, + "loss": 1.5703, + "step": 6373 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008323771066403571, + "loss": 1.7305, + "step": 6374 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008320930502393341, + "loss": 1.5586, + "step": 6375 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008318090077787485, + "loss": 1.5918, + "step": 6376 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008315249792821824, + "loss": 1.627, + "step": 6377 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008312409647732175, + "loss": 1.5176, + "step": 6378 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008309569642754339, + "loss": 1.6426, + "step": 6379 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008306729778124105, + "loss": 1.6953, + "step": 6380 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008303890054077252, + "loss": 1.6016, + "step": 6381 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008301050470849551, + "loss": 1.6758, + "step": 6382 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008298211028676758, + "loss": 1.6602, + "step": 6383 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008295371727794614, + "loss": 1.5605, + "step": 6384 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008292532568438849, + "loss": 1.4355, + "step": 6385 + }, + { + "epoch": 0.57, + "learning_rate": 0.000828969355084519, + "loss": 1.6562, + "step": 6386 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008286854675249343, + "loss": 1.5684, + "step": 6387 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008284015941887004, + "loss": 1.6543, + "step": 6388 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008281177350993855, + "loss": 1.5625, + "step": 6389 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008278338902805578, + "loss": 1.6211, + "step": 6390 + }, + { + "epoch": 0.57, + "learning_rate": 0.000827550059755783, + "loss": 1.5156, + "step": 6391 + }, + { + "epoch": 0.57, + "learning_rate": 0.000827266243548626, + "loss": 1.6621, + "step": 6392 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008269824416826504, + "loss": 1.6562, + "step": 6393 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008266986541814192, + "loss": 1.6367, + "step": 6394 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008264148810684935, + "loss": 1.5098, + "step": 6395 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008261311223674337, + "loss": 1.6543, + "step": 6396 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008258473781017986, + "loss": 1.5938, + "step": 6397 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008255636482951463, + "loss": 1.6914, + "step": 6398 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008252799329710331, + "loss": 1.5742, + "step": 6399 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008249962321530146, + "loss": 1.7109, + "step": 6400 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008247125458646445, + "loss": 1.6484, + "step": 6401 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008244288741294767, + "loss": 1.5469, + "step": 6402 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008241452169710624, + "loss": 1.6699, + "step": 6403 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008238615744129523, + "loss": 1.5352, + "step": 6404 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008235779464786953, + "loss": 1.5898, + "step": 6405 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008232943331918402, + "loss": 1.6191, + "step": 6406 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008230107345759339, + "loss": 1.543, + "step": 6407 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008227271506545218, + "loss": 1.5859, + "step": 6408 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008224435814511481, + "loss": 1.6328, + "step": 6409 + }, + { + "epoch": 0.57, + "learning_rate": 0.000822160026989357, + "loss": 1.584, + "step": 6410 + }, + { + "epoch": 0.57, + "learning_rate": 0.00082187648729269, + "loss": 1.5215, + "step": 6411 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008215929623846878, + "loss": 1.6836, + "step": 6412 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008213094522888899, + "loss": 1.5625, + "step": 6413 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008210259570288353, + "loss": 1.6094, + "step": 6414 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008207424766280604, + "loss": 1.5137, + "step": 6415 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008204590111101017, + "loss": 1.6016, + "step": 6416 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008201755604984932, + "loss": 1.582, + "step": 6417 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008198921248167691, + "loss": 1.498, + "step": 6418 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008196087040884611, + "loss": 1.5625, + "step": 6419 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008193252983371002, + "loss": 1.627, + "step": 6420 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008190419075862158, + "loss": 1.6562, + "step": 6421 + }, + { + "epoch": 0.57, + "learning_rate": 0.000818758531859337, + "loss": 1.5879, + "step": 6422 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008184751711799907, + "loss": 1.4902, + "step": 6423 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008181918255717025, + "loss": 1.5645, + "step": 6424 + }, + { + "epoch": 0.57, + "learning_rate": 0.000817908495057998, + "loss": 1.6875, + "step": 6425 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008176251796623998, + "loss": 1.6094, + "step": 6426 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008173418794084306, + "loss": 1.6797, + "step": 6427 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008170585943196107, + "loss": 1.6133, + "step": 6428 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008167753244194605, + "loss": 1.6387, + "step": 6429 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008164920697314984, + "loss": 1.6777, + "step": 6430 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008162088302792412, + "loss": 1.5215, + "step": 6431 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008159256060862046, + "loss": 1.5098, + "step": 6432 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008156423971759037, + "loss": 1.6074, + "step": 6433 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008153592035718516, + "loss": 1.6309, + "step": 6434 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008150760252975607, + "loss": 1.543, + "step": 6435 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008147928623765412, + "loss": 1.623, + "step": 6436 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008145097148323034, + "loss": 1.6465, + "step": 6437 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008142265826883551, + "loss": 1.5664, + "step": 6438 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008139434659682034, + "loss": 1.6074, + "step": 6439 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008136603646953535, + "loss": 1.752, + "step": 6440 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008133772788933107, + "loss": 1.7012, + "step": 6441 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008130942085855777, + "loss": 1.6035, + "step": 6442 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008128111537956564, + "loss": 1.582, + "step": 6443 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008125281145470468, + "loss": 1.5605, + "step": 6444 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008122450908632493, + "loss": 1.6191, + "step": 6445 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008119620827677611, + "loss": 1.5918, + "step": 6446 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008116790902840793, + "loss": 1.6016, + "step": 6447 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008113961134356984, + "loss": 1.7344, + "step": 6448 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008111131522461137, + "loss": 1.6543, + "step": 6449 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008108302067388174, + "loss": 1.6309, + "step": 6450 + }, + { + "epoch": 0.57, + "learning_rate": 0.000810547276937301, + "loss": 1.6387, + "step": 6451 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008102643628650544, + "loss": 1.5977, + "step": 6452 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008099814645455671, + "loss": 1.6348, + "step": 6453 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008096985820023262, + "loss": 1.6113, + "step": 6454 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008094157152588182, + "loss": 1.6777, + "step": 6455 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008091328643385277, + "loss": 1.5801, + "step": 6456 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008088500292649388, + "loss": 1.6309, + "step": 6457 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008085672100615338, + "loss": 1.6641, + "step": 6458 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008082844067517934, + "loss": 1.6406, + "step": 6459 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008080016193591969, + "loss": 1.5781, + "step": 6460 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008077188479072237, + "loss": 1.5859, + "step": 6461 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008074360924193502, + "loss": 1.6309, + "step": 6462 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008071533529190522, + "loss": 1.6777, + "step": 6463 + }, + { + "epoch": 0.58, + "learning_rate": 0.0008068706294298037, + "loss": 1.582, + "step": 6464 + }, + { + "epoch": 0.58, + "learning_rate": 0.0008065879219750785, + "loss": 1.6348, + "step": 6465 + }, + { + "epoch": 0.58, + "learning_rate": 0.0008063052305783481, + "loss": 1.5879, + "step": 6466 + }, + { + "epoch": 0.58, + "learning_rate": 0.0008060225552630827, + "loss": 1.6855, + "step": 6467 + }, + { + "epoch": 0.58, + "learning_rate": 0.0008057398960527509, + "loss": 1.6367, + "step": 6468 + }, + { + "epoch": 0.58, + "learning_rate": 0.0008054572529708214, + "loss": 1.6094, + "step": 6469 + }, + { + "epoch": 0.58, + "learning_rate": 0.0008051746260407601, + "loss": 1.5547, + "step": 6470 + }, + { + "epoch": 0.58, + "learning_rate": 0.0008048920152860321, + "loss": 1.5273, + "step": 6471 + }, + { + "epoch": 0.58, + "learning_rate": 0.0008046094207301007, + "loss": 1.543, + "step": 6472 + }, + { + "epoch": 0.58, + "learning_rate": 0.0008043268423964289, + "loss": 1.5527, + "step": 6473 + }, + { + "epoch": 0.58, + "learning_rate": 0.0008040442803084772, + "loss": 1.5879, + "step": 6474 + }, + { + "epoch": 0.58, + "learning_rate": 0.0008037617344897056, + "loss": 1.6113, + "step": 6475 + }, + { + "epoch": 0.58, + "learning_rate": 0.0008034792049635721, + "loss": 1.6113, + "step": 6476 + }, + { + "epoch": 0.58, + "learning_rate": 0.000803196691753534, + "loss": 1.5371, + "step": 6477 + }, + { + "epoch": 0.58, + "learning_rate": 0.0008029141948830465, + "loss": 1.6289, + "step": 6478 + }, + { + "epoch": 0.58, + "learning_rate": 0.0008026317143755643, + "loss": 1.6484, + "step": 6479 + }, + { + "epoch": 0.58, + "learning_rate": 0.0008023492502545394, + "loss": 1.7617, + "step": 6480 + }, + { + "epoch": 0.58, + "learning_rate": 0.0008020668025434246, + "loss": 1.5781, + "step": 6481 + }, + { + "epoch": 0.58, + "learning_rate": 0.0008017843712656692, + "loss": 1.6699, + "step": 6482 + }, + { + "epoch": 0.58, + "learning_rate": 0.0008015019564447223, + "loss": 1.6035, + "step": 6483 + }, + { + "epoch": 0.58, + "learning_rate": 0.0008012195581040306, + "loss": 1.5352, + "step": 6484 + }, + { + "epoch": 0.58, + "learning_rate": 0.0008009371762670414, + "loss": 1.5391, + "step": 6485 + }, + { + "epoch": 0.58, + "learning_rate": 0.0008006548109571986, + "loss": 1.5371, + "step": 6486 + }, + { + "epoch": 0.58, + "learning_rate": 0.0008003724621979458, + "loss": 1.5586, + "step": 6487 + }, + { + "epoch": 0.58, + "learning_rate": 0.0008000901300127244, + "loss": 1.627, + "step": 6488 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007998078144249758, + "loss": 1.5352, + "step": 6489 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007995255154581388, + "loss": 1.5527, + "step": 6490 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007992432331356511, + "loss": 1.5781, + "step": 6491 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007989609674809491, + "loss": 1.5781, + "step": 6492 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007986787185174682, + "loss": 1.5527, + "step": 6493 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007983964862686415, + "loss": 1.5352, + "step": 6494 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007981142707579019, + "loss": 1.6016, + "step": 6495 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007978320720086797, + "loss": 1.6445, + "step": 6496 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007975498900444049, + "loss": 1.7559, + "step": 6497 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007972677248885053, + "loss": 1.584, + "step": 6498 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007969855765644076, + "loss": 1.5078, + "step": 6499 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007967034450955369, + "loss": 1.5254, + "step": 6500 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007964213305053176, + "loss": 1.7676, + "step": 6501 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007961392328171721, + "loss": 1.5645, + "step": 6502 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007958571520545213, + "loss": 1.5723, + "step": 6503 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007955750882407844, + "loss": 1.5723, + "step": 6504 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007952930413993808, + "loss": 1.6426, + "step": 6505 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007950110115537267, + "loss": 1.5293, + "step": 6506 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007947289987272377, + "loss": 1.5918, + "step": 6507 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007944470029433274, + "loss": 1.6602, + "step": 6508 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007941650242254092, + "loss": 1.8164, + "step": 6509 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007938830625968942, + "loss": 1.6855, + "step": 6510 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007936011180811916, + "loss": 1.5508, + "step": 6511 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007933191907017105, + "loss": 1.5234, + "step": 6512 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007930372804818572, + "loss": 1.6816, + "step": 6513 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007927553874450379, + "loss": 1.4609, + "step": 6514 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007924735116146559, + "loss": 1.4492, + "step": 6515 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007921916530141148, + "loss": 1.6426, + "step": 6516 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007919098116668152, + "loss": 1.5254, + "step": 6517 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007916279875961571, + "loss": 1.5938, + "step": 6518 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007913461808255386, + "loss": 1.6055, + "step": 6519 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007910643913783573, + "loss": 1.6074, + "step": 6520 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007907826192780083, + "loss": 1.625, + "step": 6521 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007905008645478857, + "loss": 1.6777, + "step": 6522 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007902191272113817, + "loss": 1.5645, + "step": 6523 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007899374072918884, + "loss": 1.4746, + "step": 6524 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007896557048127949, + "loss": 1.6172, + "step": 6525 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007893740197974897, + "loss": 1.5723, + "step": 6526 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007890923522693591, + "loss": 1.4668, + "step": 6527 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007888107022517894, + "loss": 1.6133, + "step": 6528 + }, + { + "epoch": 0.58, + "learning_rate": 0.000788529069768164, + "loss": 1.6367, + "step": 6529 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007882474548418654, + "loss": 1.6152, + "step": 6530 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007879658574962746, + "loss": 1.5234, + "step": 6531 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007876842777547714, + "loss": 1.5078, + "step": 6532 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007874027156407334, + "loss": 1.6523, + "step": 6533 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007871211711775379, + "loss": 1.5957, + "step": 6534 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007868396443885593, + "loss": 1.6289, + "step": 6535 + }, + { + "epoch": 0.58, + "learning_rate": 0.000786558135297172, + "loss": 1.498, + "step": 6536 + }, + { + "epoch": 0.58, + "learning_rate": 0.000786276643926748, + "loss": 1.707, + "step": 6537 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007859951703006581, + "loss": 1.6738, + "step": 6538 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007857137144422708, + "loss": 1.5977, + "step": 6539 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007854322763749551, + "loss": 1.5977, + "step": 6540 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007851508561220769, + "loss": 1.5547, + "step": 6541 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007848694537070009, + "loss": 1.6445, + "step": 6542 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007845880691530901, + "loss": 1.4844, + "step": 6543 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007843067024837074, + "loss": 1.6641, + "step": 6544 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007840253537222125, + "loss": 1.6133, + "step": 6545 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007837440228919645, + "loss": 1.5176, + "step": 6546 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007834627100163204, + "loss": 1.5078, + "step": 6547 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007831814151186369, + "loss": 1.5156, + "step": 6548 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007829001382222682, + "loss": 1.5586, + "step": 6549 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007826188793505671, + "loss": 1.5566, + "step": 6550 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007823376385268848, + "loss": 1.5684, + "step": 6551 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007820564157745717, + "loss": 1.6953, + "step": 6552 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007817752111169761, + "loss": 1.5664, + "step": 6553 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007814940245774449, + "loss": 1.6113, + "step": 6554 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007812128561793235, + "loss": 1.5898, + "step": 6555 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007809317059459562, + "loss": 1.666, + "step": 6556 + }, + { + "epoch": 0.58, + "learning_rate": 0.000780650573900685, + "loss": 1.707, + "step": 6557 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007803694600668513, + "loss": 1.5996, + "step": 6558 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007800883644677936, + "loss": 1.5859, + "step": 6559 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007798072871268509, + "loss": 1.668, + "step": 6560 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007795262280673591, + "loss": 1.6074, + "step": 6561 + }, + { + "epoch": 0.58, + "learning_rate": 0.000779245187312653, + "loss": 1.6543, + "step": 6562 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007789641648860657, + "loss": 1.4141, + "step": 6563 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007786831608109297, + "loss": 1.6562, + "step": 6564 + }, + { + "epoch": 0.58, + "learning_rate": 0.000778402175110575, + "loss": 1.5449, + "step": 6565 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007781212078083302, + "loss": 1.4883, + "step": 6566 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007778402589275223, + "loss": 1.582, + "step": 6567 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007775593284914778, + "loss": 1.6328, + "step": 6568 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007772784165235205, + "loss": 1.4941, + "step": 6569 + }, + { + "epoch": 0.58, + "learning_rate": 0.000776997523046973, + "loss": 1.6328, + "step": 6570 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007767166480851562, + "loss": 1.5938, + "step": 6571 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007764357916613899, + "loss": 1.6113, + "step": 6572 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007761549537989924, + "loss": 1.4844, + "step": 6573 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007758741345212799, + "loss": 1.5488, + "step": 6574 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007755933338515673, + "loss": 1.6191, + "step": 6575 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007753125518131683, + "loss": 1.6406, + "step": 6576 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007750317884293947, + "loss": 1.6914, + "step": 6577 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007747510437235565, + "loss": 1.6621, + "step": 6578 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007744703177189624, + "loss": 1.6348, + "step": 6579 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007741896104389203, + "loss": 1.6719, + "step": 6580 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007739089219067355, + "loss": 1.5293, + "step": 6581 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007736282521457121, + "loss": 1.6699, + "step": 6582 + }, + { + "epoch": 0.59, + "learning_rate": 0.000773347601179152, + "loss": 1.5098, + "step": 6583 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007730669690303575, + "loss": 1.6445, + "step": 6584 + }, + { + "epoch": 0.59, + "learning_rate": 0.000772786355722627, + "loss": 1.6113, + "step": 6585 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007725057612792589, + "loss": 1.5449, + "step": 6586 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007722251857235487, + "loss": 1.6055, + "step": 6587 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007719446290787922, + "loss": 1.6777, + "step": 6588 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007716640913682819, + "loss": 1.4824, + "step": 6589 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007713835726153096, + "loss": 1.6699, + "step": 6590 + }, + { + "epoch": 0.59, + "learning_rate": 0.000771103072843165, + "loss": 1.752, + "step": 6591 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007708225920751366, + "loss": 1.5508, + "step": 6592 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007705421303345118, + "loss": 1.7344, + "step": 6593 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007702616876445752, + "loss": 1.5254, + "step": 6594 + }, + { + "epoch": 0.59, + "learning_rate": 0.000769981264028611, + "loss": 1.6504, + "step": 6595 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007697008595099011, + "loss": 1.6562, + "step": 6596 + }, + { + "epoch": 0.59, + "learning_rate": 0.000769420474111726, + "loss": 1.6523, + "step": 6597 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007691401078573642, + "loss": 1.5312, + "step": 6598 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007688597607700939, + "loss": 1.7168, + "step": 6599 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007685794328731904, + "loss": 1.5859, + "step": 6600 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007682991241899279, + "loss": 1.5625, + "step": 6601 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007680188347435787, + "loss": 1.5684, + "step": 6602 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007677385645574143, + "loss": 1.6562, + "step": 6603 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007674583136547038, + "loss": 1.623, + "step": 6604 + }, + { + "epoch": 0.59, + "learning_rate": 0.000767178082058715, + "loss": 1.6094, + "step": 6605 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007668978697927137, + "loss": 1.7148, + "step": 6606 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007666176768799651, + "loss": 1.5137, + "step": 6607 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007663375033437319, + "loss": 1.502, + "step": 6608 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007660573492072754, + "loss": 1.4668, + "step": 6609 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007657772144938551, + "loss": 1.6719, + "step": 6610 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007654970992267297, + "loss": 1.5645, + "step": 6611 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007652170034291549, + "loss": 1.6602, + "step": 6612 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007649369271243864, + "loss": 1.5781, + "step": 6613 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007646568703356769, + "loss": 1.6621, + "step": 6614 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007643768330862786, + "loss": 1.6094, + "step": 6615 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007640968153994411, + "loss": 1.5977, + "step": 6616 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007638168172984129, + "loss": 1.502, + "step": 6617 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007635368388064403, + "loss": 1.6484, + "step": 6618 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007632568799467696, + "loss": 1.623, + "step": 6619 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007629769407426435, + "loss": 1.5703, + "step": 6620 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007626970212173041, + "loss": 1.5605, + "step": 6621 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007624171213939911, + "loss": 1.5605, + "step": 6622 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007621372412959442, + "loss": 1.6074, + "step": 6623 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007618573809463998, + "loss": 1.5762, + "step": 6624 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007615775403685931, + "loss": 1.5684, + "step": 6625 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007612977195857577, + "loss": 1.5977, + "step": 6626 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007610179186211263, + "loss": 1.4355, + "step": 6627 + }, + { + "epoch": 0.59, + "learning_rate": 0.000760738137497929, + "loss": 1.4941, + "step": 6628 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007604583762393944, + "loss": 1.666, + "step": 6629 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007601786348687495, + "loss": 1.625, + "step": 6630 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007598989134092203, + "loss": 1.5996, + "step": 6631 + }, + { + "epoch": 0.59, + "learning_rate": 0.00075961921188403, + "loss": 1.6543, + "step": 6632 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007593395303164012, + "loss": 1.5996, + "step": 6633 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007590598687295541, + "loss": 1.5508, + "step": 6634 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007587802271467079, + "loss": 1.541, + "step": 6635 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007585006055910796, + "loss": 1.6387, + "step": 6636 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007582210040858845, + "loss": 1.623, + "step": 6637 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007579414226543365, + "loss": 1.5918, + "step": 6638 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007576618613196482, + "loss": 1.5137, + "step": 6639 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007573823201050299, + "loss": 1.4922, + "step": 6640 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007571027990336903, + "loss": 1.6836, + "step": 6641 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007568232981288363, + "loss": 1.5742, + "step": 6642 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007565438174136742, + "loss": 1.6152, + "step": 6643 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007562643569114074, + "loss": 1.6348, + "step": 6644 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007559849166452379, + "loss": 1.6426, + "step": 6645 + }, + { + "epoch": 0.59, + "learning_rate": 0.000755705496638366, + "loss": 1.6504, + "step": 6646 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007554260969139912, + "loss": 1.7012, + "step": 6647 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007551467174953102, + "loss": 1.6367, + "step": 6648 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007548673584055184, + "loss": 1.7227, + "step": 6649 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007545880196678093, + "loss": 1.457, + "step": 6650 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007543087013053751, + "loss": 1.543, + "step": 6651 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007540294033414064, + "loss": 1.6895, + "step": 6652 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007537501257990917, + "loss": 1.5977, + "step": 6653 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007534708687016178, + "loss": 1.5762, + "step": 6654 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007531916320721703, + "loss": 1.5215, + "step": 6655 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007529124159339324, + "loss": 1.4668, + "step": 6656 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007526332203100861, + "loss": 1.6348, + "step": 6657 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007523540452238111, + "loss": 1.5742, + "step": 6658 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007520748906982868, + "loss": 1.5293, + "step": 6659 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007517957567566893, + "loss": 1.5059, + "step": 6660 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007515166434221938, + "loss": 1.623, + "step": 6661 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007512375507179733, + "loss": 1.6445, + "step": 6662 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007509584786672, + "loss": 1.4355, + "step": 6663 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007506794272930435, + "loss": 1.5957, + "step": 6664 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007504003966186721, + "loss": 1.7168, + "step": 6665 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007501213866672516, + "loss": 1.6074, + "step": 6666 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007498423974619477, + "loss": 1.5996, + "step": 6667 + }, + { + "epoch": 0.59, + "learning_rate": 0.000749563429025923, + "loss": 1.5859, + "step": 6668 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007492844813823387, + "loss": 1.6602, + "step": 6669 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007490055545543543, + "loss": 1.5078, + "step": 6670 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007487266485651278, + "loss": 1.709, + "step": 6671 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007484477634378155, + "loss": 1.4707, + "step": 6672 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007481688991955715, + "loss": 1.5469, + "step": 6673 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007478900558615483, + "loss": 1.541, + "step": 6674 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007476112334588974, + "loss": 1.6387, + "step": 6675 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007473324320107673, + "loss": 1.6777, + "step": 6676 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007470536515403054, + "loss": 1.5547, + "step": 6677 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007467748920706583, + "loss": 1.7363, + "step": 6678 + }, + { + "epoch": 0.59, + "learning_rate": 0.000746496153624969, + "loss": 1.627, + "step": 6679 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007462174362263803, + "loss": 1.6113, + "step": 6680 + }, + { + "epoch": 0.59, + "learning_rate": 0.000745938739898032, + "loss": 1.4277, + "step": 6681 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007456600646630633, + "loss": 1.6973, + "step": 6682 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007453814105446113, + "loss": 1.6367, + "step": 6683 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007451027775658108, + "loss": 1.5039, + "step": 6684 + }, + { + "epoch": 0.59, + "learning_rate": 0.000744824165749795, + "loss": 1.5332, + "step": 6685 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007445455751196964, + "loss": 1.5801, + "step": 6686 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007442670056986445, + "loss": 1.7109, + "step": 6687 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007439884575097674, + "loss": 1.6387, + "step": 6688 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007437099305761914, + "loss": 1.623, + "step": 6689 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007434314249210414, + "loss": 1.668, + "step": 6690 + }, + { + "epoch": 0.6, + "learning_rate": 0.00074315294056744, + "loss": 1.6172, + "step": 6691 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007428744775385085, + "loss": 1.6562, + "step": 6692 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007425960358573662, + "loss": 1.6133, + "step": 6693 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007423176155471308, + "loss": 1.541, + "step": 6694 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007420392166309178, + "loss": 1.5664, + "step": 6695 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007417608391318415, + "loss": 1.5879, + "step": 6696 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007414824830730135, + "loss": 1.5234, + "step": 6697 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007412041484775451, + "loss": 1.6797, + "step": 6698 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007409258353685447, + "loss": 1.7012, + "step": 6699 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007406475437691188, + "loss": 1.6113, + "step": 6700 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007403692737023725, + "loss": 1.5645, + "step": 6701 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007400910251914098, + "loss": 1.6152, + "step": 6702 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007398127982593319, + "loss": 1.5801, + "step": 6703 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007395345929292383, + "loss": 1.6289, + "step": 6704 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007392564092242266, + "loss": 1.7617, + "step": 6705 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007389782471673938, + "loss": 1.5527, + "step": 6706 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007387001067818339, + "loss": 1.5801, + "step": 6707 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007384219880906395, + "loss": 1.7109, + "step": 6708 + }, + { + "epoch": 0.6, + "learning_rate": 0.000738143891116901, + "loss": 1.541, + "step": 6709 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007378658158837074, + "loss": 1.5273, + "step": 6710 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007375877624141463, + "loss": 1.5605, + "step": 6711 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007373097307313028, + "loss": 1.6445, + "step": 6712 + }, + { + "epoch": 0.6, + "learning_rate": 0.00073703172085826, + "loss": 1.5957, + "step": 6713 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007367537328181002, + "loss": 1.5879, + "step": 6714 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007364757666339032, + "loss": 1.5176, + "step": 6715 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007361978223287466, + "loss": 1.6152, + "step": 6716 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007359198999257069, + "loss": 1.6934, + "step": 6717 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007356419994478589, + "loss": 1.6348, + "step": 6718 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007353641209182751, + "loss": 1.6211, + "step": 6719 + }, + { + "epoch": 0.6, + "learning_rate": 0.000735086264360026, + "loss": 1.5078, + "step": 6720 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007348084297961805, + "loss": 1.6973, + "step": 6721 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007345306172498063, + "loss": 1.6328, + "step": 6722 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007342528267439685, + "loss": 1.5469, + "step": 6723 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007339750583017304, + "loss": 1.7539, + "step": 6724 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007336973119461535, + "loss": 1.6602, + "step": 6725 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007334195877002983, + "loss": 1.6699, + "step": 6726 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007331418855872222, + "loss": 1.6074, + "step": 6727 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007328642056299819, + "loss": 1.5117, + "step": 6728 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007325865478516309, + "loss": 1.5352, + "step": 6729 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007323089122752225, + "loss": 1.4961, + "step": 6730 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007320312989238068, + "loss": 1.5762, + "step": 6731 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007317537078204329, + "loss": 1.543, + "step": 6732 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007314761389881475, + "loss": 1.5977, + "step": 6733 + }, + { + "epoch": 0.6, + "learning_rate": 0.000731198592449996, + "loss": 1.6211, + "step": 6734 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007309210682290212, + "loss": 1.5078, + "step": 6735 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007306435663482651, + "loss": 1.6973, + "step": 6736 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007303660868307662, + "loss": 1.7148, + "step": 6737 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007300886296995633, + "loss": 1.5293, + "step": 6738 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007298111949776918, + "loss": 1.6445, + "step": 6739 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007295337826881855, + "loss": 1.627, + "step": 6740 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007292563928540763, + "loss": 1.6211, + "step": 6741 + }, + { + "epoch": 0.6, + "learning_rate": 0.000728979025498395, + "loss": 1.625, + "step": 6742 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007287016806441698, + "loss": 1.3457, + "step": 6743 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007284243583144272, + "loss": 1.5547, + "step": 6744 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007281470585321911, + "loss": 1.6016, + "step": 6745 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007278697813204853, + "loss": 1.6621, + "step": 6746 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007275925267023303, + "loss": 1.5293, + "step": 6747 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007273152947007451, + "loss": 1.5645, + "step": 6748 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007270380853387465, + "loss": 1.4922, + "step": 6749 + }, + { + "epoch": 0.6, + "learning_rate": 0.00072676089863935, + "loss": 1.6328, + "step": 6750 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007264837346255692, + "loss": 1.6426, + "step": 6751 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007262065933204153, + "loss": 1.5957, + "step": 6752 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007259294747468979, + "loss": 1.5703, + "step": 6753 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007256523789280249, + "loss": 1.6543, + "step": 6754 + }, + { + "epoch": 0.6, + "learning_rate": 0.000725375305886802, + "loss": 1.5742, + "step": 6755 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007250982556462331, + "loss": 1.6738, + "step": 6756 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007248212282293197, + "loss": 1.5918, + "step": 6757 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007245442236590631, + "loss": 1.6055, + "step": 6758 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007242672419584609, + "loss": 1.5684, + "step": 6759 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007239902831505093, + "loss": 1.5371, + "step": 6760 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007237133472582026, + "loss": 1.6133, + "step": 6761 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007234364343045341, + "loss": 1.6113, + "step": 6762 + }, + { + "epoch": 0.6, + "learning_rate": 0.000723159544312494, + "loss": 1.5898, + "step": 6763 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007228826773050705, + "loss": 1.5449, + "step": 6764 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007226058333052514, + "loss": 1.5195, + "step": 6765 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007223290123360214, + "loss": 1.5527, + "step": 6766 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007220522144203629, + "loss": 1.6035, + "step": 6767 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007217754395812573, + "loss": 1.5938, + "step": 6768 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007214986878416839, + "loss": 1.5762, + "step": 6769 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007212219592246198, + "loss": 1.6973, + "step": 6770 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007209452537530406, + "loss": 1.6094, + "step": 6771 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007206685714499191, + "loss": 1.7344, + "step": 6772 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007203919123382277, + "loss": 1.6113, + "step": 6773 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007201152764409352, + "loss": 1.6465, + "step": 6774 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007198386637810095, + "loss": 1.5527, + "step": 6775 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007195620743814159, + "loss": 1.5215, + "step": 6776 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007192855082651191, + "loss": 1.5605, + "step": 6777 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007190089654550802, + "loss": 1.6016, + "step": 6778 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007187324459742595, + "loss": 1.5742, + "step": 6779 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007184559498456143, + "loss": 1.5059, + "step": 6780 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007181794770921013, + "loss": 1.7207, + "step": 6781 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007179030277366747, + "loss": 1.6445, + "step": 6782 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007176266018022861, + "loss": 1.6523, + "step": 6783 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007173501993118857, + "loss": 1.627, + "step": 6784 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007170738202884224, + "loss": 1.5488, + "step": 6785 + }, + { + "epoch": 0.6, + "learning_rate": 0.000716797464754842, + "loss": 1.7285, + "step": 6786 + }, + { + "epoch": 0.6, + "learning_rate": 0.000716521132734089, + "loss": 1.6504, + "step": 6787 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007162448242491055, + "loss": 1.5605, + "step": 6788 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007159685393228324, + "loss": 1.666, + "step": 6789 + }, + { + "epoch": 0.6, + "learning_rate": 0.000715692277978208, + "loss": 1.668, + "step": 6790 + }, + { + "epoch": 0.6, + "learning_rate": 0.000715416040238169, + "loss": 1.6445, + "step": 6791 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007151398261256494, + "loss": 1.6719, + "step": 6792 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007148636356635827, + "loss": 1.6855, + "step": 6793 + }, + { + "epoch": 0.6, + "learning_rate": 0.000714587468874899, + "loss": 1.6367, + "step": 6794 + }, + { + "epoch": 0.6, + "learning_rate": 0.000714311325782527, + "loss": 1.5332, + "step": 6795 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007140352064093932, + "loss": 1.5801, + "step": 6796 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007137591107784229, + "loss": 1.6523, + "step": 6797 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007134830389125387, + "loss": 1.5039, + "step": 6798 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007132069908346614, + "loss": 1.4941, + "step": 6799 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007129309665677092, + "loss": 1.6113, + "step": 6800 + }, + { + "epoch": 0.61, + "learning_rate": 0.0007126549661346, + "loss": 1.5586, + "step": 6801 + }, + { + "epoch": 0.61, + "learning_rate": 0.0007123789895582482, + "loss": 1.4824, + "step": 6802 + }, + { + "epoch": 0.61, + "learning_rate": 0.0007121030368615666, + "loss": 1.5059, + "step": 6803 + }, + { + "epoch": 0.61, + "learning_rate": 0.0007118271080674658, + "loss": 1.5332, + "step": 6804 + }, + { + "epoch": 0.61, + "learning_rate": 0.0007115512031988556, + "loss": 1.7539, + "step": 6805 + }, + { + "epoch": 0.61, + "learning_rate": 0.0007112753222786423, + "loss": 1.582, + "step": 6806 + }, + { + "epoch": 0.61, + "learning_rate": 0.0007109994653297312, + "loss": 1.5293, + "step": 6807 + }, + { + "epoch": 0.61, + "learning_rate": 0.0007107236323750248, + "loss": 1.7109, + "step": 6808 + }, + { + "epoch": 0.61, + "learning_rate": 0.0007104478234374243, + "loss": 1.5898, + "step": 6809 + }, + { + "epoch": 0.61, + "learning_rate": 0.0007101720385398289, + "loss": 1.5352, + "step": 6810 + }, + { + "epoch": 0.61, + "learning_rate": 0.0007098962777051355, + "loss": 1.6406, + "step": 6811 + }, + { + "epoch": 0.61, + "learning_rate": 0.0007096205409562387, + "loss": 1.7129, + "step": 6812 + }, + { + "epoch": 0.61, + "learning_rate": 0.0007093448283160317, + "loss": 1.5938, + "step": 6813 + }, + { + "epoch": 0.61, + "learning_rate": 0.0007090691398074059, + "loss": 1.584, + "step": 6814 + }, + { + "epoch": 0.61, + "learning_rate": 0.0007087934754532498, + "loss": 1.5039, + "step": 6815 + }, + { + "epoch": 0.61, + "learning_rate": 0.0007085178352764501, + "loss": 1.6191, + "step": 6816 + }, + { + "epoch": 0.61, + "learning_rate": 0.0007082422192998925, + "loss": 1.625, + "step": 6817 + }, + { + "epoch": 0.61, + "learning_rate": 0.0007079666275464595, + "loss": 1.5586, + "step": 6818 + }, + { + "epoch": 0.61, + "learning_rate": 0.0007076910600390325, + "loss": 1.5859, + "step": 6819 + }, + { + "epoch": 0.61, + "learning_rate": 0.0007074155168004894, + "loss": 1.7168, + "step": 6820 + }, + { + "epoch": 0.61, + "learning_rate": 0.0007071399978537083, + "loss": 1.7207, + "step": 6821 + }, + { + "epoch": 0.61, + "learning_rate": 0.0007068645032215635, + "loss": 1.6797, + "step": 6822 + }, + { + "epoch": 0.61, + "learning_rate": 0.000706589032926928, + "loss": 1.625, + "step": 6823 + }, + { + "epoch": 0.61, + "learning_rate": 0.0007063135869926722, + "loss": 1.4648, + "step": 6824 + }, + { + "epoch": 0.61, + "learning_rate": 0.0007060381654416659, + "loss": 1.5488, + "step": 6825 + }, + { + "epoch": 0.61, + "learning_rate": 0.0007057627682967753, + "loss": 1.5996, + "step": 6826 + }, + { + "epoch": 0.61, + "learning_rate": 0.0007054873955808653, + "loss": 1.6465, + "step": 6827 + }, + { + "epoch": 0.61, + "learning_rate": 0.0007052120473167985, + "loss": 1.5098, + "step": 6828 + }, + { + "epoch": 0.61, + "learning_rate": 0.0007049367235274355, + "loss": 1.5938, + "step": 6829 + }, + { + "epoch": 0.61, + "learning_rate": 0.0007046614242356358, + "loss": 1.5879, + "step": 6830 + }, + { + "epoch": 0.61, + "learning_rate": 0.0007043861494642554, + "loss": 1.4902, + "step": 6831 + }, + { + "epoch": 0.61, + "learning_rate": 0.0007041108992361486, + "loss": 1.6172, + "step": 6832 + }, + { + "epoch": 0.61, + "learning_rate": 0.0007038356735741689, + "loss": 1.5977, + "step": 6833 + }, + { + "epoch": 0.61, + "learning_rate": 0.0007035604725011664, + "loss": 1.4648, + "step": 6834 + }, + { + "epoch": 0.61, + "learning_rate": 0.0007032852960399895, + "loss": 1.6504, + "step": 6835 + }, + { + "epoch": 0.61, + "learning_rate": 0.0007030101442134843, + "loss": 1.6484, + "step": 6836 + }, + { + "epoch": 0.61, + "learning_rate": 0.0007027350170444961, + "loss": 1.5098, + "step": 6837 + }, + { + "epoch": 0.61, + "learning_rate": 0.0007024599145558669, + "loss": 1.543, + "step": 6838 + }, + { + "epoch": 0.61, + "learning_rate": 0.0007021848367704369, + "loss": 1.6504, + "step": 6839 + }, + { + "epoch": 0.61, + "learning_rate": 0.0007019097837110441, + "loss": 1.5488, + "step": 6840 + }, + { + "epoch": 0.61, + "learning_rate": 0.0007016347554005254, + "loss": 1.6113, + "step": 6841 + }, + { + "epoch": 0.61, + "learning_rate": 0.0007013597518617146, + "loss": 1.5195, + "step": 6842 + }, + { + "epoch": 0.61, + "learning_rate": 0.0007010847731174439, + "loss": 1.5508, + "step": 6843 + }, + { + "epoch": 0.61, + "learning_rate": 0.0007008098191905429, + "loss": 1.5703, + "step": 6844 + }, + { + "epoch": 0.61, + "learning_rate": 0.0007005348901038403, + "loss": 1.584, + "step": 6845 + }, + { + "epoch": 0.61, + "learning_rate": 0.0007002599858801617, + "loss": 1.5605, + "step": 6846 + }, + { + "epoch": 0.61, + "learning_rate": 0.0006999851065423308, + "loss": 1.5859, + "step": 6847 + }, + { + "epoch": 0.61, + "learning_rate": 0.0006997102521131697, + "loss": 1.6816, + "step": 6848 + }, + { + "epoch": 0.61, + "learning_rate": 0.0006994354226154978, + "loss": 1.75, + "step": 6849 + }, + { + "epoch": 0.61, + "learning_rate": 0.0006991606180721332, + "loss": 1.6191, + "step": 6850 + }, + { + "epoch": 0.61, + "learning_rate": 0.0006988858385058909, + "loss": 1.5762, + "step": 6851 + }, + { + "epoch": 0.61, + "learning_rate": 0.000698611083939585, + "loss": 1.4355, + "step": 6852 + }, + { + "epoch": 0.61, + "learning_rate": 0.0006983363543960268, + "loss": 1.6406, + "step": 6853 + }, + { + "epoch": 0.61, + "learning_rate": 0.0006980616498980253, + "loss": 1.752, + "step": 6854 + }, + { + "epoch": 0.61, + "learning_rate": 0.0006977869704683877, + "loss": 1.5996, + "step": 6855 + }, + { + "epoch": 0.61, + "learning_rate": 0.0006975123161299199, + "loss": 1.6855, + "step": 6856 + }, + { + "epoch": 0.61, + "learning_rate": 0.0006972376869054246, + "loss": 1.5566, + "step": 6857 + }, + { + "epoch": 0.61, + "learning_rate": 0.0006969630828177029, + "loss": 1.7012, + "step": 6858 + }, + { + "epoch": 0.61, + "learning_rate": 0.0006966885038895534, + "loss": 1.4062, + "step": 6859 + }, + { + "epoch": 0.61, + "learning_rate": 0.0006964139501437736, + "loss": 1.5215, + "step": 6860 + }, + { + "epoch": 0.61, + "learning_rate": 0.0006961394216031578, + "loss": 1.5039, + "step": 6861 + }, + { + "epoch": 0.61, + "learning_rate": 0.0006958649182904988, + "loss": 1.7383, + "step": 6862 + }, + { + "epoch": 0.61, + "learning_rate": 0.0006955904402285868, + "loss": 1.6172, + "step": 6863 + }, + { + "epoch": 0.61, + "learning_rate": 0.000695315987440211, + "loss": 1.5586, + "step": 6864 + }, + { + "epoch": 0.61, + "learning_rate": 0.0006950415599481575, + "loss": 1.6699, + "step": 6865 + }, + { + "epoch": 0.61, + "learning_rate": 0.0006947671577752105, + "loss": 1.666, + "step": 6866 + }, + { + "epoch": 0.61, + "learning_rate": 0.0006944927809441519, + "loss": 1.5742, + "step": 6867 + }, + { + "epoch": 0.61, + "learning_rate": 0.0006942184294777622, + "loss": 1.6133, + "step": 6868 + }, + { + "epoch": 0.61, + "learning_rate": 0.0006939441033988193, + "loss": 1.4258, + "step": 6869 + }, + { + "epoch": 0.61, + "learning_rate": 0.000693669802730099, + "loss": 1.543, + "step": 6870 + }, + { + "epoch": 0.61, + "learning_rate": 0.0006933955274943749, + "loss": 1.7617, + "step": 6871 + }, + { + "epoch": 0.61, + "learning_rate": 0.000693121277714419, + "loss": 1.6094, + "step": 6872 + }, + { + "epoch": 0.61, + "learning_rate": 0.0006928470534130005, + "loss": 1.5664, + "step": 6873 + }, + { + "epoch": 0.61, + "learning_rate": 0.0006925728546128871, + "loss": 1.6895, + "step": 6874 + }, + { + "epoch": 0.61, + "learning_rate": 0.0006922986813368432, + "loss": 1.6367, + "step": 6875 + }, + { + "epoch": 0.61, + "learning_rate": 0.0006920245336076334, + "loss": 1.6953, + "step": 6876 + }, + { + "epoch": 0.61, + "learning_rate": 0.000691750411448018, + "loss": 1.5254, + "step": 6877 + }, + { + "epoch": 0.61, + "learning_rate": 0.0006914763148807559, + "loss": 1.4355, + "step": 6878 + }, + { + "epoch": 0.61, + "learning_rate": 0.0006912022439286036, + "loss": 1.5859, + "step": 6879 + }, + { + "epoch": 0.61, + "learning_rate": 0.0006909281986143166, + "loss": 1.4785, + "step": 6880 + }, + { + "epoch": 0.61, + "learning_rate": 0.0006906541789606468, + "loss": 1.6191, + "step": 6881 + }, + { + "epoch": 0.61, + "learning_rate": 0.0006903801849903451, + "loss": 1.6113, + "step": 6882 + }, + { + "epoch": 0.61, + "learning_rate": 0.000690106216726159, + "loss": 1.7051, + "step": 6883 + }, + { + "epoch": 0.61, + "learning_rate": 0.0006898322741908356, + "loss": 1.6406, + "step": 6884 + }, + { + "epoch": 0.61, + "learning_rate": 0.0006895583574071185, + "loss": 1.5742, + "step": 6885 + }, + { + "epoch": 0.61, + "learning_rate": 0.0006892844663977496, + "loss": 1.498, + "step": 6886 + }, + { + "epoch": 0.61, + "learning_rate": 0.0006890106011854684, + "loss": 1.5527, + "step": 6887 + }, + { + "epoch": 0.61, + "learning_rate": 0.0006887367617930127, + "loss": 1.6152, + "step": 6888 + }, + { + "epoch": 0.61, + "learning_rate": 0.0006884629482431181, + "loss": 1.5469, + "step": 6889 + }, + { + "epoch": 0.61, + "learning_rate": 0.0006881891605585177, + "loss": 1.5781, + "step": 6890 + }, + { + "epoch": 0.61, + "learning_rate": 0.0006879153987619427, + "loss": 1.6758, + "step": 6891 + }, + { + "epoch": 0.61, + "learning_rate": 0.0006876416628761224, + "loss": 1.582, + "step": 6892 + }, + { + "epoch": 0.61, + "learning_rate": 0.0006873679529237833, + "loss": 1.6973, + "step": 6893 + }, + { + "epoch": 0.61, + "learning_rate": 0.0006870942689276503, + "loss": 1.6523, + "step": 6894 + }, + { + "epoch": 0.61, + "learning_rate": 0.0006868206109104454, + "loss": 1.6562, + "step": 6895 + }, + { + "epoch": 0.61, + "learning_rate": 0.00068654697889489, + "loss": 1.6777, + "step": 6896 + }, + { + "epoch": 0.61, + "learning_rate": 0.0006862733729037017, + "loss": 1.582, + "step": 6897 + }, + { + "epoch": 0.61, + "learning_rate": 0.0006859997929595965, + "loss": 1.5469, + "step": 6898 + }, + { + "epoch": 0.61, + "learning_rate": 0.0006857262390852884, + "loss": 1.5488, + "step": 6899 + }, + { + "epoch": 0.61, + "learning_rate": 0.0006854527113034894, + "loss": 1.5449, + "step": 6900 + }, + { + "epoch": 0.61, + "learning_rate": 0.0006851792096369089, + "loss": 1.625, + "step": 6901 + }, + { + "epoch": 0.61, + "learning_rate": 0.0006849057341082545, + "loss": 1.6484, + "step": 6902 + }, + { + "epoch": 0.61, + "learning_rate": 0.0006846322847402309, + "loss": 1.5117, + "step": 6903 + }, + { + "epoch": 0.61, + "learning_rate": 0.0006843588615555418, + "loss": 1.7148, + "step": 6904 + }, + { + "epoch": 0.61, + "learning_rate": 0.0006840854645768881, + "loss": 1.6797, + "step": 6905 + }, + { + "epoch": 0.61, + "learning_rate": 0.0006838120938269681, + "loss": 1.5371, + "step": 6906 + }, + { + "epoch": 0.61, + "learning_rate": 0.0006835387493284784, + "loss": 1.6289, + "step": 6907 + }, + { + "epoch": 0.61, + "learning_rate": 0.0006832654311041134, + "loss": 1.6699, + "step": 6908 + }, + { + "epoch": 0.61, + "learning_rate": 0.0006829921391765657, + "loss": 1.5352, + "step": 6909 + }, + { + "epoch": 0.61, + "learning_rate": 0.0006827188735685249, + "loss": 1.6777, + "step": 6910 + }, + { + "epoch": 0.61, + "learning_rate": 0.0006824456343026787, + "loss": 1.6875, + "step": 6911 + }, + { + "epoch": 0.61, + "learning_rate": 0.0006821724214017131, + "loss": 1.4297, + "step": 6912 + }, + { + "epoch": 0.61, + "learning_rate": 0.0006818992348883115, + "loss": 1.6055, + "step": 6913 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006816260747851548, + "loss": 1.5664, + "step": 6914 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006813529411149221, + "loss": 1.6738, + "step": 6915 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006810798339002906, + "loss": 1.6016, + "step": 6916 + }, + { + "epoch": 0.62, + "learning_rate": 0.000680806753163935, + "loss": 1.541, + "step": 6917 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006805336989285275, + "loss": 1.5859, + "step": 6918 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006802606712167379, + "loss": 1.5859, + "step": 6919 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006799876700512354, + "loss": 1.6953, + "step": 6920 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006797146954546852, + "loss": 1.5977, + "step": 6921 + }, + { + "epoch": 0.62, + "learning_rate": 0.000679441747449751, + "loss": 1.6016, + "step": 6922 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006791688260590938, + "loss": 1.666, + "step": 6923 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006788959313053737, + "loss": 1.582, + "step": 6924 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006786230632112474, + "loss": 1.6836, + "step": 6925 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006783502217993695, + "loss": 1.584, + "step": 6926 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006780774070923925, + "loss": 1.5645, + "step": 6927 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006778046191129673, + "loss": 1.7266, + "step": 6928 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006775318578837418, + "loss": 1.6602, + "step": 6929 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006772591234273619, + "loss": 1.6836, + "step": 6930 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006769864157664712, + "loss": 1.5176, + "step": 6931 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006767137349237115, + "loss": 1.502, + "step": 6932 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006764410809217219, + "loss": 1.5273, + "step": 6933 + }, + { + "epoch": 0.62, + "learning_rate": 0.000676168453783139, + "loss": 1.4668, + "step": 6934 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006758958535305984, + "loss": 1.5176, + "step": 6935 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006756232801867325, + "loss": 1.4531, + "step": 6936 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006753507337741713, + "loss": 1.6875, + "step": 6937 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006750782143155426, + "loss": 1.5566, + "step": 6938 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006748057218334732, + "loss": 1.6172, + "step": 6939 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006745332563505862, + "loss": 1.5391, + "step": 6940 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006742608178895031, + "loss": 1.5742, + "step": 6941 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006739884064728426, + "loss": 1.666, + "step": 6942 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006737160221232224, + "loss": 1.5371, + "step": 6943 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006734436648632567, + "loss": 1.543, + "step": 6944 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006731713347155579, + "loss": 1.5586, + "step": 6945 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006728990317027359, + "loss": 1.5898, + "step": 6946 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006726267558473991, + "loss": 1.459, + "step": 6947 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006723545071721528, + "loss": 1.5801, + "step": 6948 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006720822856996009, + "loss": 1.668, + "step": 6949 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006718100914523436, + "loss": 1.707, + "step": 6950 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006715379244529807, + "loss": 1.5605, + "step": 6951 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006712657847241083, + "loss": 1.6055, + "step": 6952 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006709936722883209, + "loss": 1.4902, + "step": 6953 + }, + { + "epoch": 0.62, + "learning_rate": 0.00067072158716821, + "loss": 1.6836, + "step": 6954 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006704495293863664, + "loss": 1.625, + "step": 6955 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006701774989653771, + "loss": 1.6387, + "step": 6956 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006699054959278276, + "loss": 1.5957, + "step": 6957 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006696335202963002, + "loss": 1.5762, + "step": 6958 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006693615720933765, + "loss": 1.5938, + "step": 6959 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006690896513416347, + "loss": 1.7656, + "step": 6960 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006688177580636507, + "loss": 1.4902, + "step": 6961 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006685458922819982, + "loss": 1.5645, + "step": 6962 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006682740540192493, + "loss": 1.459, + "step": 6963 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006680022432979734, + "loss": 1.6289, + "step": 6964 + }, + { + "epoch": 0.62, + "learning_rate": 0.000667730460140737, + "loss": 1.8066, + "step": 6965 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006674587045701051, + "loss": 1.541, + "step": 6966 + }, + { + "epoch": 0.62, + "learning_rate": 0.00066718697660864, + "loss": 1.7539, + "step": 6967 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006669152762789021, + "loss": 1.625, + "step": 6968 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006666436036034493, + "loss": 1.6562, + "step": 6969 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006663719586048367, + "loss": 1.4238, + "step": 6970 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006661003413056182, + "loss": 1.7441, + "step": 6971 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006658287517283443, + "loss": 1.6621, + "step": 6972 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006655571898955638, + "loss": 1.5664, + "step": 6973 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006652856558298227, + "loss": 1.5605, + "step": 6974 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006650141495536658, + "loss": 1.5762, + "step": 6975 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006647426710896345, + "loss": 1.5664, + "step": 6976 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006644712204602681, + "loss": 1.5742, + "step": 6977 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006641997976881035, + "loss": 1.5781, + "step": 6978 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006639284027956762, + "loss": 1.502, + "step": 6979 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006636570358055183, + "loss": 1.5957, + "step": 6980 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006633856967401601, + "loss": 1.6016, + "step": 6981 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006631143856221289, + "loss": 1.541, + "step": 6982 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006628431024739513, + "loss": 1.5293, + "step": 6983 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006625718473181499, + "loss": 1.6016, + "step": 6984 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006623006201772456, + "loss": 1.7539, + "step": 6985 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006620294210737569, + "loss": 1.5293, + "step": 6986 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006617582500302, + "loss": 1.5898, + "step": 6987 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006614871070690894, + "loss": 1.5938, + "step": 6988 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006612159922129362, + "loss": 1.4766, + "step": 6989 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006609449054842495, + "loss": 1.5684, + "step": 6990 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006606738469055367, + "loss": 1.7344, + "step": 6991 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006604028164993021, + "loss": 1.6387, + "step": 6992 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006601318142880479, + "loss": 1.5898, + "step": 6993 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006598608402942736, + "loss": 1.7051, + "step": 6994 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006595898945404777, + "loss": 1.6738, + "step": 6995 + }, + { + "epoch": 0.62, + "learning_rate": 0.000659318977049155, + "loss": 1.5527, + "step": 6996 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006590480878427982, + "loss": 1.6465, + "step": 6997 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006587772269438976, + "loss": 1.5742, + "step": 6998 + }, + { + "epoch": 0.62, + "learning_rate": 0.000658506394374942, + "loss": 1.7188, + "step": 6999 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006582355901584168, + "loss": 1.6445, + "step": 7000 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006579648143168056, + "loss": 1.5039, + "step": 7001 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006576940668725891, + "loss": 1.627, + "step": 7002 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006574233478482467, + "loss": 1.5859, + "step": 7003 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006571526572662546, + "loss": 1.748, + "step": 7004 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006568819951490866, + "loss": 1.5566, + "step": 7005 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006566113615192142, + "loss": 1.5742, + "step": 7006 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006563407563991071, + "loss": 1.7012, + "step": 7007 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006560701798112323, + "loss": 1.4922, + "step": 7008 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006557996317780541, + "loss": 1.6289, + "step": 7009 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006555291123220346, + "loss": 1.5957, + "step": 7010 + }, + { + "epoch": 0.62, + "learning_rate": 0.000655258621465634, + "loss": 1.5957, + "step": 7011 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006549881592313094, + "loss": 1.5508, + "step": 7012 + }, + { + "epoch": 0.62, + "learning_rate": 0.000654717725641516, + "loss": 1.6914, + "step": 7013 + }, + { + "epoch": 0.62, + "learning_rate": 0.000654447320718706, + "loss": 1.5684, + "step": 7014 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006541769444853305, + "loss": 1.6426, + "step": 7015 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006539065969638374, + "loss": 1.6035, + "step": 7016 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006536362781766711, + "loss": 1.7051, + "step": 7017 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006533659881462763, + "loss": 1.7109, + "step": 7018 + }, + { + "epoch": 0.62, + "learning_rate": 0.000653095726895093, + "loss": 1.6602, + "step": 7019 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006528254944455597, + "loss": 1.6191, + "step": 7020 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006525552908201119, + "loss": 1.5879, + "step": 7021 + }, + { + "epoch": 0.62, + "learning_rate": 0.000652285116041184, + "loss": 1.5605, + "step": 7022 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006520149701312068, + "loss": 1.6523, + "step": 7023 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006517448531126091, + "loss": 1.6289, + "step": 7024 + }, + { + "epoch": 0.62, + "learning_rate": 0.0006514747650078173, + "loss": 1.6152, + "step": 7025 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006512047058392554, + "loss": 1.6562, + "step": 7026 + }, + { + "epoch": 0.63, + "learning_rate": 0.000650934675629345, + "loss": 1.6172, + "step": 7027 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006506646744005056, + "loss": 1.7891, + "step": 7028 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006503947021751535, + "loss": 1.5273, + "step": 7029 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006501247589757035, + "loss": 1.4961, + "step": 7030 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006498548448245673, + "loss": 1.582, + "step": 7031 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006495849597441547, + "loss": 1.6445, + "step": 7032 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006493151037568723, + "loss": 1.6562, + "step": 7033 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006490452768851256, + "loss": 1.5273, + "step": 7034 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006487754791513165, + "loss": 1.5938, + "step": 7035 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006485057105778449, + "loss": 1.6562, + "step": 7036 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006482359711871081, + "loss": 1.5996, + "step": 7037 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006479662610015016, + "loss": 1.5039, + "step": 7038 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006476965800434179, + "loss": 1.5586, + "step": 7039 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006474269283352472, + "loss": 1.4531, + "step": 7040 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006471573058993766, + "loss": 1.5762, + "step": 7041 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006468877127581925, + "loss": 1.5449, + "step": 7042 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006466181489340774, + "loss": 1.5254, + "step": 7043 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006463486144494115, + "loss": 1.5957, + "step": 7044 + }, + { + "epoch": 0.63, + "learning_rate": 0.000646079109326573, + "loss": 1.6973, + "step": 7045 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006458096335879376, + "loss": 1.709, + "step": 7046 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006455401872558786, + "loss": 1.6562, + "step": 7047 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006452707703527665, + "loss": 1.623, + "step": 7048 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006450013829009694, + "loss": 1.5977, + "step": 7049 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006447320249228536, + "loss": 1.543, + "step": 7050 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006444626964407822, + "loss": 1.584, + "step": 7051 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006441933974771162, + "loss": 1.5254, + "step": 7052 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006439241280542135, + "loss": 1.6582, + "step": 7053 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006436548881944313, + "loss": 1.5781, + "step": 7054 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006433856779201225, + "loss": 1.5879, + "step": 7055 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006431164972536383, + "loss": 1.5645, + "step": 7056 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006428473462173268, + "loss": 1.5918, + "step": 7057 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006425782248335355, + "loss": 1.6816, + "step": 7058 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006423091331246071, + "loss": 1.5566, + "step": 7059 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006420400711128833, + "loss": 1.4609, + "step": 7060 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006417710388207026, + "loss": 1.5859, + "step": 7061 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006415020362704016, + "loss": 1.5898, + "step": 7062 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006412330634843143, + "loss": 1.5801, + "step": 7063 + }, + { + "epoch": 0.63, + "learning_rate": 0.000640964120484772, + "loss": 1.6543, + "step": 7064 + }, + { + "epoch": 0.63, + "learning_rate": 0.000640695207294103, + "loss": 1.5391, + "step": 7065 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006404263239346347, + "loss": 1.5547, + "step": 7066 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006401574704286907, + "loss": 1.5371, + "step": 7067 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006398886467985927, + "loss": 1.6133, + "step": 7068 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006396198530666592, + "loss": 1.5371, + "step": 7069 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006393510892552073, + "loss": 1.6152, + "step": 7070 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006390823553865508, + "loss": 1.627, + "step": 7071 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006388136514830013, + "loss": 1.5996, + "step": 7072 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006385449775668675, + "loss": 1.5996, + "step": 7073 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006382763336604567, + "loss": 1.4512, + "step": 7074 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006380077197860728, + "loss": 1.543, + "step": 7075 + }, + { + "epoch": 0.63, + "learning_rate": 0.000637739135966017, + "loss": 1.6816, + "step": 7076 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006374705822225885, + "loss": 1.5938, + "step": 7077 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006372020585780845, + "loss": 1.6191, + "step": 7078 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006369335650547985, + "loss": 1.5645, + "step": 7079 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006366651016750225, + "loss": 1.6191, + "step": 7080 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006363966684610451, + "loss": 1.7051, + "step": 7081 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006361282654351535, + "loss": 1.5176, + "step": 7082 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006358598926196317, + "loss": 1.6113, + "step": 7083 + }, + { + "epoch": 0.63, + "learning_rate": 0.000635591550036761, + "loss": 1.582, + "step": 7084 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006353232377088203, + "loss": 1.6387, + "step": 7085 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006350549556580868, + "loss": 1.5469, + "step": 7086 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006347867039068343, + "loss": 1.6094, + "step": 7087 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006345184824773344, + "loss": 1.5996, + "step": 7088 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006342502913918558, + "loss": 1.7148, + "step": 7089 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006339821306726652, + "loss": 1.5215, + "step": 7090 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006337140003420268, + "loss": 1.4453, + "step": 7091 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006334459004222017, + "loss": 1.6348, + "step": 7092 + }, + { + "epoch": 0.63, + "learning_rate": 0.000633177830935449, + "loss": 1.6426, + "step": 7093 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006329097919040252, + "loss": 1.4434, + "step": 7094 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006326417833501842, + "loss": 1.6406, + "step": 7095 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006323738052961772, + "loss": 1.4688, + "step": 7096 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006321058577642526, + "loss": 1.6211, + "step": 7097 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006318379407766576, + "loss": 1.7246, + "step": 7098 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006315700543556356, + "loss": 1.6074, + "step": 7099 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006313021985234273, + "loss": 1.6113, + "step": 7100 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006310343733022721, + "loss": 1.5938, + "step": 7101 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006307665787144057, + "loss": 1.5859, + "step": 7102 + }, + { + "epoch": 0.63, + "learning_rate": 0.000630498814782062, + "loss": 1.6738, + "step": 7103 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006302310815274715, + "loss": 1.5859, + "step": 7104 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006299633789728632, + "loss": 1.5898, + "step": 7105 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006296957071404627, + "loss": 1.6113, + "step": 7106 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006294280660524939, + "loss": 1.498, + "step": 7107 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006291604557311771, + "loss": 1.6133, + "step": 7108 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006288928761987309, + "loss": 1.6465, + "step": 7109 + }, + { + "epoch": 0.63, + "learning_rate": 0.000628625327477371, + "loss": 1.6328, + "step": 7110 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006283578095893104, + "loss": 1.4941, + "step": 7111 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006280903225567594, + "loss": 1.6367, + "step": 7112 + }, + { + "epoch": 0.63, + "learning_rate": 0.000627822866401927, + "loss": 1.5605, + "step": 7113 + }, + { + "epoch": 0.63, + "learning_rate": 0.000627555441147018, + "loss": 1.4785, + "step": 7114 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006272880468142356, + "loss": 1.625, + "step": 7115 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006270206834257796, + "loss": 1.6367, + "step": 7116 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006267533510038486, + "loss": 1.625, + "step": 7117 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006264860495706374, + "loss": 1.6504, + "step": 7118 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006262187791483387, + "loss": 1.7148, + "step": 7119 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006259515397591421, + "loss": 1.6426, + "step": 7120 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006256843314252359, + "loss": 1.6836, + "step": 7121 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006254171541688047, + "loss": 1.6055, + "step": 7122 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006251500080120309, + "loss": 1.5215, + "step": 7123 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006248828929770937, + "loss": 1.6211, + "step": 7124 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006246158090861709, + "loss": 1.7031, + "step": 7125 + }, + { + "epoch": 0.63, + "learning_rate": 0.000624348756361437, + "loss": 1.543, + "step": 7126 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006240817348250638, + "loss": 1.5586, + "step": 7127 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006238147444992206, + "loss": 1.7559, + "step": 7128 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006235477854060745, + "loss": 1.5938, + "step": 7129 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006232808575677897, + "loss": 1.6797, + "step": 7130 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006230139610065278, + "loss": 1.582, + "step": 7131 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006227470957444471, + "loss": 1.6504, + "step": 7132 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006224802618037054, + "loss": 1.5234, + "step": 7133 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006222134592064555, + "loss": 1.4453, + "step": 7134 + }, + { + "epoch": 0.63, + "learning_rate": 0.000621946687974849, + "loss": 1.4336, + "step": 7135 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006216799481310343, + "loss": 1.6113, + "step": 7136 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006214132396971577, + "loss": 1.627, + "step": 7137 + }, + { + "epoch": 0.63, + "learning_rate": 0.0006211465626953625, + "loss": 1.6094, + "step": 7138 + }, + { + "epoch": 0.64, + "learning_rate": 0.0006208799171477896, + "loss": 1.7324, + "step": 7139 + }, + { + "epoch": 0.64, + "learning_rate": 0.0006206133030765766, + "loss": 1.5312, + "step": 7140 + }, + { + "epoch": 0.64, + "learning_rate": 0.0006203467205038599, + "loss": 1.5254, + "step": 7141 + }, + { + "epoch": 0.64, + "learning_rate": 0.0006200801694517722, + "loss": 1.5508, + "step": 7142 + }, + { + "epoch": 0.64, + "learning_rate": 0.0006198136499424438, + "loss": 1.4844, + "step": 7143 + }, + { + "epoch": 0.64, + "learning_rate": 0.0006195471619980017, + "loss": 1.502, + "step": 7144 + }, + { + "epoch": 0.64, + "learning_rate": 0.0006192807056405724, + "loss": 1.7988, + "step": 7145 + }, + { + "epoch": 0.64, + "learning_rate": 0.0006190142808922774, + "loss": 1.5137, + "step": 7146 + }, + { + "epoch": 0.64, + "learning_rate": 0.0006187478877752369, + "loss": 1.6035, + "step": 7147 + }, + { + "epoch": 0.64, + "learning_rate": 0.0006184815263115678, + "loss": 1.4707, + "step": 7148 + }, + { + "epoch": 0.64, + "learning_rate": 0.0006182151965233851, + "loss": 1.6641, + "step": 7149 + }, + { + "epoch": 0.64, + "learning_rate": 0.0006179488984328008, + "loss": 1.5547, + "step": 7150 + }, + { + "epoch": 0.64, + "learning_rate": 0.0006176826320619238, + "loss": 1.5605, + "step": 7151 + }, + { + "epoch": 0.64, + "learning_rate": 0.0006174163974328607, + "loss": 1.5918, + "step": 7152 + }, + { + "epoch": 0.64, + "learning_rate": 0.0006171501945677163, + "loss": 1.6055, + "step": 7153 + }, + { + "epoch": 0.64, + "learning_rate": 0.0006168840234885918, + "loss": 1.5508, + "step": 7154 + }, + { + "epoch": 0.64, + "learning_rate": 0.0006166178842175858, + "loss": 1.6426, + "step": 7155 + }, + { + "epoch": 0.64, + "learning_rate": 0.0006163517767767938, + "loss": 1.5801, + "step": 7156 + }, + { + "epoch": 0.64, + "learning_rate": 0.0006160857011883108, + "loss": 1.5723, + "step": 7157 + }, + { + "epoch": 0.64, + "learning_rate": 0.0006158196574742267, + "loss": 1.5664, + "step": 7158 + }, + { + "epoch": 0.64, + "learning_rate": 0.00061555364565663, + "loss": 1.627, + "step": 7159 + }, + { + "epoch": 0.64, + "learning_rate": 0.0006152876657576057, + "loss": 1.6797, + "step": 7160 + }, + { + "epoch": 0.64, + "learning_rate": 0.0006150217177992377, + "loss": 1.5527, + "step": 7161 + }, + { + "epoch": 0.64, + "learning_rate": 0.0006147558018036054, + "loss": 1.6504, + "step": 7162 + }, + { + "epoch": 0.64, + "learning_rate": 0.0006144899177927872, + "loss": 1.623, + "step": 7163 + }, + { + "epoch": 0.64, + "learning_rate": 0.000614224065788857, + "loss": 1.5938, + "step": 7164 + }, + { + "epoch": 0.64, + "learning_rate": 0.0006139582458138883, + "loss": 1.623, + "step": 7165 + }, + { + "epoch": 0.64, + "learning_rate": 0.0006136924578899501, + "loss": 1.6309, + "step": 7166 + }, + { + "epoch": 0.64, + "learning_rate": 0.0006134267020391095, + "loss": 1.6172, + "step": 7167 + }, + { + "epoch": 0.64, + "learning_rate": 0.0006131609782834305, + "loss": 1.5703, + "step": 7168 + }, + { + "epoch": 0.64, + "learning_rate": 0.0006128952866449752, + "loss": 1.5137, + "step": 7169 + }, + { + "epoch": 0.64, + "learning_rate": 0.0006126296271458026, + "loss": 1.5605, + "step": 7170 + }, + { + "epoch": 0.64, + "learning_rate": 0.0006123639998079686, + "loss": 1.5527, + "step": 7171 + }, + { + "epoch": 0.64, + "learning_rate": 0.0006120984046535272, + "loss": 1.5918, + "step": 7172 + }, + { + "epoch": 0.64, + "learning_rate": 0.0006118328417045294, + "loss": 1.4629, + "step": 7173 + }, + { + "epoch": 0.64, + "learning_rate": 0.0006115673109830233, + "loss": 1.4102, + "step": 7174 + }, + { + "epoch": 0.64, + "learning_rate": 0.0006113018125110546, + "loss": 1.5566, + "step": 7175 + }, + { + "epoch": 0.64, + "learning_rate": 0.0006110363463106659, + "loss": 1.623, + "step": 7176 + }, + { + "epoch": 0.64, + "learning_rate": 0.0006107709124038984, + "loss": 1.5508, + "step": 7177 + }, + { + "epoch": 0.64, + "learning_rate": 0.000610505510812789, + "loss": 1.541, + "step": 7178 + }, + { + "epoch": 0.64, + "learning_rate": 0.0006102401415593728, + "loss": 1.7383, + "step": 7179 + }, + { + "epoch": 0.64, + "learning_rate": 0.0006099748046656814, + "loss": 1.6133, + "step": 7180 + }, + { + "epoch": 0.64, + "learning_rate": 0.0006097095001537455, + "loss": 1.6777, + "step": 7181 + }, + { + "epoch": 0.64, + "learning_rate": 0.0006094442280455913, + "loss": 1.5996, + "step": 7182 + }, + { + "epoch": 0.64, + "learning_rate": 0.0006091789883632431, + "loss": 1.6426, + "step": 7183 + }, + { + "epoch": 0.64, + "learning_rate": 0.0006089137811287215, + "loss": 1.498, + "step": 7184 + }, + { + "epoch": 0.64, + "learning_rate": 0.0006086486063640468, + "loss": 1.5742, + "step": 7185 + }, + { + "epoch": 0.64, + "learning_rate": 0.0006083834640912343, + "loss": 1.5664, + "step": 7186 + }, + { + "epoch": 0.64, + "learning_rate": 0.0006081183543322971, + "loss": 1.7207, + "step": 7187 + }, + { + "epoch": 0.64, + "learning_rate": 0.0006078532771092464, + "loss": 1.6836, + "step": 7188 + }, + { + "epoch": 0.64, + "learning_rate": 0.0006075882324440897, + "loss": 1.5547, + "step": 7189 + }, + { + "epoch": 0.64, + "learning_rate": 0.0006073232203588329, + "loss": 1.6074, + "step": 7190 + }, + { + "epoch": 0.64, + "learning_rate": 0.0006070582408754777, + "loss": 1.5117, + "step": 7191 + }, + { + "epoch": 0.64, + "learning_rate": 0.0006067932940160247, + "loss": 1.6641, + "step": 7192 + }, + { + "epoch": 0.64, + "learning_rate": 0.000606528379802471, + "loss": 1.6738, + "step": 7193 + }, + { + "epoch": 0.64, + "learning_rate": 0.0006062634982568107, + "loss": 1.5957, + "step": 7194 + }, + { + "epoch": 0.64, + "learning_rate": 0.0006059986494010354, + "loss": 1.7168, + "step": 7195 + }, + { + "epoch": 0.64, + "learning_rate": 0.0006057338332571347, + "loss": 1.5488, + "step": 7196 + }, + { + "epoch": 0.64, + "learning_rate": 0.0006054690498470947, + "loss": 1.5488, + "step": 7197 + }, + { + "epoch": 0.64, + "learning_rate": 0.0006052042991928989, + "loss": 1.5469, + "step": 7198 + }, + { + "epoch": 0.64, + "learning_rate": 0.0006049395813165277, + "loss": 1.543, + "step": 7199 + }, + { + "epoch": 0.64, + "learning_rate": 0.0006046748962399601, + "loss": 1.5254, + "step": 7200 + }, + { + "epoch": 0.64, + "learning_rate": 0.000604410243985171, + "loss": 1.6484, + "step": 7201 + }, + { + "epoch": 0.64, + "learning_rate": 0.0006041456245741335, + "loss": 1.5605, + "step": 7202 + }, + { + "epoch": 0.64, + "learning_rate": 0.0006038810380288167, + "loss": 1.6699, + "step": 7203 + }, + { + "epoch": 0.64, + "learning_rate": 0.0006036164843711887, + "loss": 1.6367, + "step": 7204 + }, + { + "epoch": 0.64, + "learning_rate": 0.0006033519636232139, + "loss": 1.4824, + "step": 7205 + }, + { + "epoch": 0.64, + "learning_rate": 0.0006030874758068539, + "loss": 1.6699, + "step": 7206 + }, + { + "epoch": 0.64, + "learning_rate": 0.0006028230209440675, + "loss": 1.5195, + "step": 7207 + }, + { + "epoch": 0.64, + "learning_rate": 0.0006025585990568114, + "loss": 1.4277, + "step": 7208 + }, + { + "epoch": 0.64, + "learning_rate": 0.000602294210167039, + "loss": 1.5625, + "step": 7209 + }, + { + "epoch": 0.64, + "learning_rate": 0.0006020298542967013, + "loss": 1.5527, + "step": 7210 + }, + { + "epoch": 0.64, + "learning_rate": 0.0006017655314677457, + "loss": 1.6152, + "step": 7211 + }, + { + "epoch": 0.64, + "learning_rate": 0.0006015012417021186, + "loss": 1.4902, + "step": 7212 + }, + { + "epoch": 0.64, + "learning_rate": 0.0006012369850217619, + "loss": 1.623, + "step": 7213 + }, + { + "epoch": 0.64, + "learning_rate": 0.0006009727614486157, + "loss": 1.5, + "step": 7214 + }, + { + "epoch": 0.64, + "learning_rate": 0.0006007085710046167, + "loss": 1.5723, + "step": 7215 + }, + { + "epoch": 0.64, + "learning_rate": 0.0006004444137116998, + "loss": 1.543, + "step": 7216 + }, + { + "epoch": 0.64, + "learning_rate": 0.0006001802895917965, + "loss": 1.6191, + "step": 7217 + }, + { + "epoch": 0.64, + "learning_rate": 0.0005999161986668355, + "loss": 1.582, + "step": 7218 + }, + { + "epoch": 0.64, + "learning_rate": 0.0005996521409587426, + "loss": 1.5684, + "step": 7219 + }, + { + "epoch": 0.64, + "learning_rate": 0.000599388116489442, + "loss": 1.5039, + "step": 7220 + }, + { + "epoch": 0.64, + "learning_rate": 0.0005991241252808534, + "loss": 1.5352, + "step": 7221 + }, + { + "epoch": 0.64, + "learning_rate": 0.000598860167354895, + "loss": 1.5254, + "step": 7222 + }, + { + "epoch": 0.64, + "learning_rate": 0.0005985962427334815, + "loss": 1.5859, + "step": 7223 + }, + { + "epoch": 0.64, + "learning_rate": 0.0005983323514385259, + "loss": 1.5312, + "step": 7224 + }, + { + "epoch": 0.64, + "learning_rate": 0.0005980684934919372, + "loss": 1.6191, + "step": 7225 + }, + { + "epoch": 0.64, + "learning_rate": 0.0005978046689156223, + "loss": 1.6602, + "step": 7226 + }, + { + "epoch": 0.64, + "learning_rate": 0.0005975408777314849, + "loss": 1.623, + "step": 7227 + }, + { + "epoch": 0.64, + "learning_rate": 0.0005972771199614264, + "loss": 1.5859, + "step": 7228 + }, + { + "epoch": 0.64, + "learning_rate": 0.0005970133956273453, + "loss": 1.6348, + "step": 7229 + }, + { + "epoch": 0.64, + "learning_rate": 0.0005967497047511375, + "loss": 1.6797, + "step": 7230 + }, + { + "epoch": 0.64, + "learning_rate": 0.000596486047354695, + "loss": 1.5293, + "step": 7231 + }, + { + "epoch": 0.64, + "learning_rate": 0.0005962224234599087, + "loss": 1.4941, + "step": 7232 + }, + { + "epoch": 0.64, + "learning_rate": 0.0005959588330886658, + "loss": 1.5684, + "step": 7233 + }, + { + "epoch": 0.64, + "learning_rate": 0.0005956952762628507, + "loss": 1.4727, + "step": 7234 + }, + { + "epoch": 0.64, + "learning_rate": 0.0005954317530043449, + "loss": 1.6309, + "step": 7235 + }, + { + "epoch": 0.64, + "learning_rate": 0.0005951682633350278, + "loss": 1.7148, + "step": 7236 + }, + { + "epoch": 0.64, + "learning_rate": 0.0005949048072767755, + "loss": 1.625, + "step": 7237 + }, + { + "epoch": 0.64, + "learning_rate": 0.0005946413848514614, + "loss": 1.5, + "step": 7238 + }, + { + "epoch": 0.64, + "learning_rate": 0.0005943779960809553, + "loss": 1.5703, + "step": 7239 + }, + { + "epoch": 0.64, + "learning_rate": 0.0005941146409871263, + "loss": 1.5078, + "step": 7240 + }, + { + "epoch": 0.64, + "learning_rate": 0.0005938513195918386, + "loss": 1.6133, + "step": 7241 + }, + { + "epoch": 0.64, + "learning_rate": 0.0005935880319169547, + "loss": 1.5723, + "step": 7242 + }, + { + "epoch": 0.64, + "learning_rate": 0.0005933247779843333, + "loss": 1.5293, + "step": 7243 + }, + { + "epoch": 0.64, + "learning_rate": 0.0005930615578158319, + "loss": 1.543, + "step": 7244 + }, + { + "epoch": 0.64, + "learning_rate": 0.0005927983714333041, + "loss": 1.4727, + "step": 7245 + }, + { + "epoch": 0.64, + "learning_rate": 0.0005925352188586007, + "loss": 1.5664, + "step": 7246 + }, + { + "epoch": 0.64, + "learning_rate": 0.0005922721001135697, + "loss": 1.5723, + "step": 7247 + }, + { + "epoch": 0.64, + "learning_rate": 0.0005920090152200569, + "loss": 1.5195, + "step": 7248 + }, + { + "epoch": 0.64, + "learning_rate": 0.0005917459641999048, + "loss": 1.6035, + "step": 7249 + }, + { + "epoch": 0.64, + "learning_rate": 0.0005914829470749526, + "loss": 1.6289, + "step": 7250 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005912199638670376, + "loss": 1.6289, + "step": 7251 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005909570145979945, + "loss": 1.5605, + "step": 7252 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005906940992896539, + "loss": 1.668, + "step": 7253 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005904312179638444, + "loss": 1.75, + "step": 7254 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005901683706423915, + "loss": 1.5469, + "step": 7255 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005899055573471185, + "loss": 1.6152, + "step": 7256 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005896427780998452, + "loss": 1.6133, + "step": 7257 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005893800329223888, + "loss": 1.6309, + "step": 7258 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005891173218365633, + "loss": 1.6465, + "step": 7259 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005888546448641809, + "loss": 1.7188, + "step": 7260 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005885920020270501, + "loss": 1.6113, + "step": 7261 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005883293933469765, + "loss": 1.5723, + "step": 7262 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005880668188457627, + "loss": 1.6836, + "step": 7263 + }, + { + "epoch": 0.65, + "learning_rate": 0.00058780427854521, + "loss": 1.7109, + "step": 7264 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005875417724671152, + "loss": 1.4805, + "step": 7265 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005872793006332728, + "loss": 1.5137, + "step": 7266 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005870168630654742, + "loss": 1.5332, + "step": 7267 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005867544597855088, + "loss": 1.6895, + "step": 7268 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005864920908151621, + "loss": 1.4395, + "step": 7269 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005862297561762171, + "loss": 1.5742, + "step": 7270 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005859674558904547, + "loss": 1.5527, + "step": 7271 + }, + { + "epoch": 0.65, + "learning_rate": 0.000585705189979652, + "loss": 1.5586, + "step": 7272 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005854429584655835, + "loss": 1.5449, + "step": 7273 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005851807613700207, + "loss": 1.5742, + "step": 7274 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005849185987147329, + "loss": 1.625, + "step": 7275 + }, + { + "epoch": 0.65, + "learning_rate": 0.000584656470521486, + "loss": 1.582, + "step": 7276 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005843943768120431, + "loss": 1.5645, + "step": 7277 + }, + { + "epoch": 0.65, + "learning_rate": 0.000584132317608164, + "loss": 1.5234, + "step": 7278 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005838702929316068, + "loss": 1.7051, + "step": 7279 + }, + { + "epoch": 0.65, + "learning_rate": 0.000583608302804126, + "loss": 1.6426, + "step": 7280 + }, + { + "epoch": 0.65, + "learning_rate": 0.000583346347247473, + "loss": 1.5254, + "step": 7281 + }, + { + "epoch": 0.65, + "learning_rate": 0.000583084426283396, + "loss": 1.6094, + "step": 7282 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005828225399336422, + "loss": 1.6582, + "step": 7283 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005825606882199539, + "loss": 1.5742, + "step": 7284 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005822988711640715, + "loss": 1.5781, + "step": 7285 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005820370887877317, + "loss": 1.5332, + "step": 7286 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005817753411126701, + "loss": 1.6543, + "step": 7287 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005815136281606172, + "loss": 1.6406, + "step": 7288 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005812519499533023, + "loss": 1.7051, + "step": 7289 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005809903065124505, + "loss": 1.5137, + "step": 7290 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005807286978597854, + "loss": 1.5938, + "step": 7291 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005804671240170268, + "loss": 1.4609, + "step": 7292 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005802055850058915, + "loss": 1.6309, + "step": 7293 + }, + { + "epoch": 0.65, + "learning_rate": 0.000579944080848094, + "loss": 1.7012, + "step": 7294 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005796826115653458, + "loss": 1.5742, + "step": 7295 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005794211771793543, + "loss": 1.6309, + "step": 7296 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005791597777118265, + "loss": 1.4434, + "step": 7297 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005788984131844641, + "loss": 1.6855, + "step": 7298 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005786370836189672, + "loss": 1.541, + "step": 7299 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005783757890370325, + "loss": 1.6367, + "step": 7300 + }, + { + "epoch": 0.65, + "learning_rate": 0.000578114529460354, + "loss": 1.8008, + "step": 7301 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005778533049106223, + "loss": 1.6602, + "step": 7302 + }, + { + "epoch": 0.65, + "learning_rate": 0.000577592115409526, + "loss": 1.6523, + "step": 7303 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005773309609787504, + "loss": 1.5234, + "step": 7304 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005770698416399774, + "loss": 1.4453, + "step": 7305 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005768087574148862, + "loss": 1.6367, + "step": 7306 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005765477083251539, + "loss": 1.5703, + "step": 7307 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005762866943924538, + "loss": 1.5039, + "step": 7308 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005760257156384563, + "loss": 1.5527, + "step": 7309 + }, + { + "epoch": 0.65, + "learning_rate": 0.000575764772084829, + "loss": 1.4766, + "step": 7310 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005755038637532372, + "loss": 1.5645, + "step": 7311 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005752429906653427, + "loss": 1.5996, + "step": 7312 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005749821528428041, + "loss": 1.543, + "step": 7313 + }, + { + "epoch": 0.65, + "learning_rate": 0.000574721350307277, + "loss": 1.5312, + "step": 7314 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005744605830804155, + "loss": 1.6133, + "step": 7315 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005741998511838693, + "loss": 1.666, + "step": 7316 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005739391546392854, + "loss": 1.4902, + "step": 7317 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005736784934683079, + "loss": 1.5664, + "step": 7318 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005734178676925787, + "loss": 1.4258, + "step": 7319 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005731572773337361, + "loss": 1.498, + "step": 7320 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005728967224134154, + "loss": 1.584, + "step": 7321 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005726362029532486, + "loss": 1.4785, + "step": 7322 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005723757189748662, + "loss": 1.5488, + "step": 7323 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005721152704998944, + "loss": 1.582, + "step": 7324 + }, + { + "epoch": 0.65, + "learning_rate": 0.000571854857549957, + "loss": 1.5469, + "step": 7325 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005715944801466742, + "loss": 1.6895, + "step": 7326 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005713341383116646, + "loss": 1.5781, + "step": 7327 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005710738320665425, + "loss": 1.6367, + "step": 7328 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005708135614329199, + "loss": 1.5723, + "step": 7329 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005705533264324052, + "loss": 1.6113, + "step": 7330 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005702931270866054, + "loss": 1.6602, + "step": 7331 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005700329634171228, + "loss": 1.5527, + "step": 7332 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005697728354455578, + "loss": 1.6191, + "step": 7333 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005695127431935069, + "loss": 1.4648, + "step": 7334 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005692526866825645, + "loss": 1.4512, + "step": 7335 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005689926659343218, + "loss": 1.4863, + "step": 7336 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005687326809703672, + "loss": 1.7461, + "step": 7337 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005684727318122856, + "loss": 1.5859, + "step": 7338 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005682128184816592, + "loss": 1.6816, + "step": 7339 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005679529410000672, + "loss": 1.6152, + "step": 7340 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005676930993890861, + "loss": 1.6992, + "step": 7341 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005674332936702886, + "loss": 1.5488, + "step": 7342 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005671735238652458, + "loss": 1.5879, + "step": 7343 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005669137899955248, + "loss": 1.4531, + "step": 7344 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005666540920826898, + "loss": 1.6582, + "step": 7345 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005663944301483018, + "loss": 1.5449, + "step": 7346 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005661348042139199, + "loss": 1.5078, + "step": 7347 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005658752143010993, + "loss": 1.6602, + "step": 7348 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005656156604313922, + "loss": 1.6387, + "step": 7349 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005653561426263476, + "loss": 1.623, + "step": 7350 + }, + { + "epoch": 0.65, + "learning_rate": 0.000565096660907513, + "loss": 1.5742, + "step": 7351 + }, + { + "epoch": 0.65, + "learning_rate": 0.000564837215296431, + "loss": 1.7109, + "step": 7352 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005645778058146421, + "loss": 1.5938, + "step": 7353 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005643184324836836, + "loss": 1.502, + "step": 7354 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005640590953250904, + "loss": 1.5586, + "step": 7355 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005637997943603937, + "loss": 1.6309, + "step": 7356 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005635405296111213, + "loss": 1.4121, + "step": 7357 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005632813010987998, + "loss": 1.5449, + "step": 7358 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005630221088449508, + "loss": 1.5332, + "step": 7359 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005627629528710936, + "loss": 1.5859, + "step": 7360 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005625038331987447, + "loss": 1.6719, + "step": 7361 + }, + { + "epoch": 0.65, + "learning_rate": 0.0005622447498494176, + "loss": 1.4688, + "step": 7362 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005619857028446228, + "loss": 1.5527, + "step": 7363 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005617266922058673, + "loss": 1.623, + "step": 7364 + }, + { + "epoch": 0.66, + "learning_rate": 0.000561467717954655, + "loss": 1.6602, + "step": 7365 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005612087801124882, + "loss": 1.6914, + "step": 7366 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005609498787008646, + "loss": 1.5723, + "step": 7367 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005606910137412794, + "loss": 1.5527, + "step": 7368 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005604321852552244, + "loss": 1.5684, + "step": 7369 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005601733932641897, + "loss": 1.6172, + "step": 7370 + }, + { + "epoch": 0.66, + "learning_rate": 0.000559914637789661, + "loss": 1.5742, + "step": 7371 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005596559188531214, + "loss": 1.5977, + "step": 7372 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005593972364760509, + "loss": 1.6191, + "step": 7373 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005591385906799267, + "loss": 1.4785, + "step": 7374 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005588799814862223, + "loss": 1.5137, + "step": 7375 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005586214089164094, + "loss": 1.6445, + "step": 7376 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005583628729919559, + "loss": 1.6152, + "step": 7377 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005581043737343262, + "loss": 1.6758, + "step": 7378 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005578459111649825, + "loss": 1.6035, + "step": 7379 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005575874853053832, + "loss": 1.6328, + "step": 7380 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005573290961769842, + "loss": 1.582, + "step": 7381 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005570707438012387, + "loss": 1.582, + "step": 7382 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005568124281995959, + "loss": 1.5703, + "step": 7383 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005565541493935024, + "loss": 1.6465, + "step": 7384 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005562959074044013, + "loss": 1.6855, + "step": 7385 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005560377022537342, + "loss": 1.4629, + "step": 7386 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005557795339629378, + "loss": 1.5879, + "step": 7387 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005555214025534465, + "loss": 1.5801, + "step": 7388 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005552633080466913, + "loss": 1.5215, + "step": 7389 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005550052504641013, + "loss": 1.5117, + "step": 7390 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005547472298271011, + "loss": 1.4922, + "step": 7391 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005544892461571129, + "loss": 1.7559, + "step": 7392 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005542312994755554, + "loss": 1.6035, + "step": 7393 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005539733898038453, + "loss": 1.5859, + "step": 7394 + }, + { + "epoch": 0.66, + "learning_rate": 0.000553715517163395, + "loss": 1.4941, + "step": 7395 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005534576815756143, + "loss": 1.5859, + "step": 7396 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005531998830619098, + "loss": 1.6816, + "step": 7397 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005529421216436859, + "loss": 1.6211, + "step": 7398 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005526843973423425, + "loss": 1.5527, + "step": 7399 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005524267101792773, + "loss": 1.5898, + "step": 7400 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005521690601758844, + "loss": 1.6699, + "step": 7401 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005519114473535559, + "loss": 1.5176, + "step": 7402 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005516538717336795, + "loss": 1.6055, + "step": 7403 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005513963333376406, + "loss": 1.5859, + "step": 7404 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005511388321868205, + "loss": 1.5137, + "step": 7405 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005508813683025993, + "loss": 1.6621, + "step": 7406 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005506239417063524, + "loss": 1.6777, + "step": 7407 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005503665524194526, + "loss": 1.6758, + "step": 7408 + }, + { + "epoch": 0.66, + "learning_rate": 0.000550109200463269, + "loss": 1.543, + "step": 7409 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005498518858591693, + "loss": 1.4629, + "step": 7410 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005495946086285164, + "loss": 1.4805, + "step": 7411 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005493373687926707, + "loss": 1.668, + "step": 7412 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005490801663729896, + "loss": 1.75, + "step": 7413 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005488230013908266, + "loss": 1.6719, + "step": 7414 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005485658738675338, + "loss": 1.584, + "step": 7415 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005483087838244587, + "loss": 1.6309, + "step": 7416 + }, + { + "epoch": 0.66, + "learning_rate": 0.000548051731282946, + "loss": 1.5723, + "step": 7417 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005477947162643378, + "loss": 1.541, + "step": 7418 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005475377387899723, + "loss": 1.5293, + "step": 7419 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005472807988811852, + "loss": 1.4824, + "step": 7420 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005470238965593086, + "loss": 1.6113, + "step": 7421 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005467670318456722, + "loss": 1.4941, + "step": 7422 + }, + { + "epoch": 0.66, + "learning_rate": 0.000546510204761602, + "loss": 1.5527, + "step": 7423 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005462534153284211, + "loss": 1.5879, + "step": 7424 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005459966635674487, + "loss": 1.4941, + "step": 7425 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005457399495000027, + "loss": 1.5801, + "step": 7426 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005454832731473959, + "loss": 1.6074, + "step": 7427 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005452266345309391, + "loss": 1.7285, + "step": 7428 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005449700336719392, + "loss": 1.7051, + "step": 7429 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005447134705917012, + "loss": 1.6777, + "step": 7430 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005444569453115258, + "loss": 1.5645, + "step": 7431 + }, + { + "epoch": 0.66, + "learning_rate": 0.000544200457852711, + "loss": 1.4668, + "step": 7432 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005439440082365512, + "loss": 1.5488, + "step": 7433 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005436875964843389, + "loss": 1.666, + "step": 7434 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005434312226173621, + "loss": 1.5605, + "step": 7435 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005431748866569062, + "loss": 1.7051, + "step": 7436 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005429185886242532, + "loss": 1.5547, + "step": 7437 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005426623285406829, + "loss": 1.6543, + "step": 7438 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005424061064274708, + "loss": 1.5801, + "step": 7439 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005421499223058892, + "loss": 1.5762, + "step": 7440 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005418937761972088, + "loss": 1.5723, + "step": 7441 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005416376681226956, + "loss": 1.498, + "step": 7442 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005413815981036125, + "loss": 1.5977, + "step": 7443 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005411255661612197, + "loss": 1.4941, + "step": 7444 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005408695723167751, + "loss": 1.6465, + "step": 7445 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005406136165915316, + "loss": 1.6289, + "step": 7446 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005403576990067405, + "loss": 1.5508, + "step": 7447 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005401018195836483, + "loss": 1.541, + "step": 7448 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005398459783435006, + "loss": 1.5703, + "step": 7449 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005395901753075379, + "loss": 1.6113, + "step": 7450 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005393344104969982, + "loss": 1.5488, + "step": 7451 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005390786839331164, + "loss": 1.5156, + "step": 7452 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005388229956371242, + "loss": 1.5762, + "step": 7453 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005385673456302496, + "loss": 1.4805, + "step": 7454 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005383117339337185, + "loss": 1.7168, + "step": 7455 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005380561605687528, + "loss": 1.6074, + "step": 7456 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005378006255565714, + "loss": 1.6562, + "step": 7457 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005375451289183901, + "loss": 1.5664, + "step": 7458 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005372896706754215, + "loss": 1.6152, + "step": 7459 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005370342508488744, + "loss": 1.5469, + "step": 7460 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005367788694599555, + "loss": 1.6719, + "step": 7461 + }, + { + "epoch": 0.66, + "learning_rate": 0.000536523526529868, + "loss": 1.6992, + "step": 7462 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005362682220798114, + "loss": 1.4824, + "step": 7463 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005360129561309819, + "loss": 1.666, + "step": 7464 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005357577287045736, + "loss": 1.5996, + "step": 7465 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005355025398217763, + "loss": 1.5332, + "step": 7466 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005352473895037773, + "loss": 1.5195, + "step": 7467 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005349922777717596, + "loss": 1.7422, + "step": 7468 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005347372046469048, + "loss": 1.5527, + "step": 7469 + }, + { + "epoch": 0.66, + "learning_rate": 0.00053448217015039, + "loss": 1.5859, + "step": 7470 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005342271743033891, + "loss": 1.6465, + "step": 7471 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005339722171270729, + "loss": 1.5352, + "step": 7472 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005337172986426098, + "loss": 1.5781, + "step": 7473 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005334624188711642, + "loss": 1.6055, + "step": 7474 + }, + { + "epoch": 0.66, + "learning_rate": 0.0005332075778338972, + "loss": 1.666, + "step": 7475 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005329527755519666, + "loss": 1.5762, + "step": 7476 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005326980120465281, + "loss": 1.6309, + "step": 7477 + }, + { + "epoch": 0.67, + "learning_rate": 0.000532443287338733, + "loss": 1.5723, + "step": 7478 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005321886014497296, + "loss": 1.5469, + "step": 7479 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005319339544006629, + "loss": 1.4863, + "step": 7480 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005316793462126759, + "loss": 1.5176, + "step": 7481 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005314247769069067, + "loss": 1.5215, + "step": 7482 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005311702465044908, + "loss": 1.7051, + "step": 7483 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005309157550265604, + "loss": 1.6055, + "step": 7484 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005306613024942452, + "loss": 1.5312, + "step": 7485 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005304068889286709, + "loss": 1.7148, + "step": 7486 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005301525143509598, + "loss": 1.6816, + "step": 7487 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005298981787822311, + "loss": 1.5527, + "step": 7488 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005296438822436018, + "loss": 1.5254, + "step": 7489 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005293896247561842, + "loss": 1.6133, + "step": 7490 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005291354063410882, + "loss": 1.582, + "step": 7491 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005288812270194201, + "loss": 1.6797, + "step": 7492 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005286270868122827, + "loss": 1.7324, + "step": 7493 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005283729857407766, + "loss": 1.5332, + "step": 7494 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005281189238259985, + "loss": 1.6523, + "step": 7495 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005278649010890412, + "loss": 1.5527, + "step": 7496 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005276109175509955, + "loss": 1.5879, + "step": 7497 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005273569732329481, + "loss": 1.582, + "step": 7498 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005271030681559825, + "loss": 1.6465, + "step": 7499 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005268492023411789, + "loss": 1.457, + "step": 7500 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005265953758096151, + "loss": 1.6543, + "step": 7501 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005263415885823648, + "loss": 1.6172, + "step": 7502 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005260878406804986, + "loss": 1.4785, + "step": 7503 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005258341321250832, + "loss": 1.498, + "step": 7504 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005255804629371839, + "loss": 1.4922, + "step": 7505 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005253268331378607, + "loss": 1.6621, + "step": 7506 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005250732427481714, + "loss": 1.5566, + "step": 7507 + }, + { + "epoch": 0.67, + "learning_rate": 0.00052481969178917, + "loss": 1.5156, + "step": 7508 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005245661802819081, + "loss": 1.6621, + "step": 7509 + }, + { + "epoch": 0.67, + "learning_rate": 0.000524312708247433, + "loss": 1.5449, + "step": 7510 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005240592757067893, + "loss": 1.625, + "step": 7511 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005238058826810178, + "loss": 1.6211, + "step": 7512 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005235525291911572, + "loss": 1.6016, + "step": 7513 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005232992152582414, + "loss": 1.5352, + "step": 7514 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005230459409033022, + "loss": 1.5996, + "step": 7515 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005227927061473669, + "loss": 1.6562, + "step": 7516 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005225395110114615, + "loss": 1.6543, + "step": 7517 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005222863555166064, + "loss": 1.5312, + "step": 7518 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005220332396838203, + "loss": 1.7363, + "step": 7519 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005217801635341175, + "loss": 1.5156, + "step": 7520 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005215271270885104, + "loss": 1.4766, + "step": 7521 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005212741303680069, + "loss": 1.6074, + "step": 7522 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005210211733936116, + "loss": 1.6367, + "step": 7523 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005207682561863272, + "loss": 1.5469, + "step": 7524 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005205153787671513, + "loss": 1.5332, + "step": 7525 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005202625411570793, + "loss": 1.5586, + "step": 7526 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005200097433771025, + "loss": 1.6973, + "step": 7527 + }, + { + "epoch": 0.67, + "learning_rate": 0.00051975698544821, + "loss": 1.5332, + "step": 7528 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005195042673913868, + "loss": 1.4785, + "step": 7529 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005192515892276147, + "loss": 1.6309, + "step": 7530 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005189989509778723, + "loss": 1.6309, + "step": 7531 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005187463526631343, + "loss": 1.5254, + "step": 7532 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005184937943043734, + "loss": 1.666, + "step": 7533 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005182412759225581, + "loss": 1.543, + "step": 7534 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005179887975386533, + "loss": 1.5449, + "step": 7535 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005177363591736213, + "loss": 1.6445, + "step": 7536 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005174839608484205, + "loss": 1.5762, + "step": 7537 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005172316025840063, + "loss": 1.5723, + "step": 7538 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005169792844013302, + "loss": 1.5781, + "step": 7539 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005167270063213417, + "loss": 1.584, + "step": 7540 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005164747683649858, + "loss": 1.6309, + "step": 7541 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005162225705532046, + "loss": 1.5195, + "step": 7542 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005159704129069362, + "loss": 1.6309, + "step": 7543 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005157182954471167, + "loss": 1.5137, + "step": 7544 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005154662181946778, + "loss": 1.6074, + "step": 7545 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005152141811705482, + "loss": 1.5156, + "step": 7546 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005149621843956526, + "loss": 1.5195, + "step": 7547 + }, + { + "epoch": 0.67, + "learning_rate": 0.000514710227890914, + "loss": 1.5391, + "step": 7548 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005144583116772505, + "loss": 1.5977, + "step": 7549 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005142064357755774, + "loss": 1.6641, + "step": 7550 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005139546002068064, + "loss": 1.5156, + "step": 7551 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005137028049918467, + "loss": 1.7617, + "step": 7552 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005134510501516032, + "loss": 1.6582, + "step": 7553 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005131993357069778, + "loss": 1.5488, + "step": 7554 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005129476616788687, + "loss": 1.5215, + "step": 7555 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005126960280881719, + "loss": 1.709, + "step": 7556 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005124444349557786, + "loss": 1.6055, + "step": 7557 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005121928823025773, + "loss": 1.5488, + "step": 7558 + }, + { + "epoch": 0.67, + "learning_rate": 0.000511941370149453, + "loss": 1.6406, + "step": 7559 + }, + { + "epoch": 0.67, + "learning_rate": 0.000511689898517288, + "loss": 1.5801, + "step": 7560 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005114384674269603, + "loss": 1.5957, + "step": 7561 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005111870768993448, + "loss": 1.7246, + "step": 7562 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005109357269553131, + "loss": 1.6602, + "step": 7563 + }, + { + "epoch": 0.67, + "learning_rate": 0.000510684417615734, + "loss": 1.6035, + "step": 7564 + }, + { + "epoch": 0.67, + "learning_rate": 0.000510433148901472, + "loss": 1.4609, + "step": 7565 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005101819208333887, + "loss": 1.5547, + "step": 7566 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005099307334323418, + "loss": 1.6777, + "step": 7567 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005096795867191868, + "loss": 1.5801, + "step": 7568 + }, + { + "epoch": 0.67, + "learning_rate": 0.000509428480714775, + "loss": 1.5684, + "step": 7569 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005091774154399542, + "loss": 1.4492, + "step": 7570 + }, + { + "epoch": 0.67, + "learning_rate": 0.000508926390915569, + "loss": 1.6562, + "step": 7571 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005086754071624604, + "loss": 1.5879, + "step": 7572 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005084244642014669, + "loss": 1.5312, + "step": 7573 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005081735620534228, + "loss": 1.5781, + "step": 7574 + }, + { + "epoch": 0.67, + "learning_rate": 0.000507922700739159, + "loss": 1.6133, + "step": 7575 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005076718802795033, + "loss": 1.6465, + "step": 7576 + }, + { + "epoch": 0.67, + "learning_rate": 0.00050742110069528, + "loss": 1.4727, + "step": 7577 + }, + { + "epoch": 0.67, + "learning_rate": 0.00050717036200731, + "loss": 1.6367, + "step": 7578 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005069196642364104, + "loss": 1.5566, + "step": 7579 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005066690074033962, + "loss": 1.6934, + "step": 7580 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005064183915290776, + "loss": 1.5078, + "step": 7581 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005061678166342621, + "loss": 1.5371, + "step": 7582 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005059172827397531, + "loss": 1.5195, + "step": 7583 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005056667898663521, + "loss": 1.5957, + "step": 7584 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005054163380348555, + "loss": 1.543, + "step": 7585 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005051659272660574, + "loss": 1.5293, + "step": 7586 + }, + { + "epoch": 0.67, + "learning_rate": 0.0005049155575807473, + "loss": 1.5957, + "step": 7587 + }, + { + "epoch": 0.68, + "learning_rate": 0.0005046652289997131, + "loss": 1.5996, + "step": 7588 + }, + { + "epoch": 0.68, + "learning_rate": 0.0005044149415437378, + "loss": 1.5742, + "step": 7589 + }, + { + "epoch": 0.68, + "learning_rate": 0.0005041646952336016, + "loss": 1.6465, + "step": 7590 + }, + { + "epoch": 0.68, + "learning_rate": 0.0005039144900900806, + "loss": 1.5566, + "step": 7591 + }, + { + "epoch": 0.68, + "learning_rate": 0.0005036643261339489, + "loss": 1.5586, + "step": 7592 + }, + { + "epoch": 0.68, + "learning_rate": 0.0005034142033859758, + "loss": 1.6641, + "step": 7593 + }, + { + "epoch": 0.68, + "learning_rate": 0.0005031641218669279, + "loss": 1.6855, + "step": 7594 + }, + { + "epoch": 0.68, + "learning_rate": 0.0005029140815975675, + "loss": 1.4609, + "step": 7595 + }, + { + "epoch": 0.68, + "learning_rate": 0.000502664082598655, + "loss": 1.582, + "step": 7596 + }, + { + "epoch": 0.68, + "learning_rate": 0.0005024141248909464, + "loss": 1.4336, + "step": 7597 + }, + { + "epoch": 0.68, + "learning_rate": 0.000502164208495194, + "loss": 1.5742, + "step": 7598 + }, + { + "epoch": 0.68, + "learning_rate": 0.0005019143334321469, + "loss": 1.6016, + "step": 7599 + }, + { + "epoch": 0.68, + "learning_rate": 0.0005016644997225516, + "loss": 1.6074, + "step": 7600 + }, + { + "epoch": 0.68, + "learning_rate": 0.00050141470738715, + "loss": 1.5977, + "step": 7601 + }, + { + "epoch": 0.68, + "learning_rate": 0.0005011649564466813, + "loss": 1.7188, + "step": 7602 + }, + { + "epoch": 0.68, + "learning_rate": 0.0005009152469218805, + "loss": 1.5645, + "step": 7603 + }, + { + "epoch": 0.68, + "learning_rate": 0.0005006655788334806, + "loss": 1.6641, + "step": 7604 + }, + { + "epoch": 0.68, + "learning_rate": 0.0005004159522022095, + "loss": 1.5234, + "step": 7605 + }, + { + "epoch": 0.68, + "learning_rate": 0.0005001663670487926, + "loss": 1.625, + "step": 7606 + }, + { + "epoch": 0.68, + "learning_rate": 0.0004999168233939514, + "loss": 1.5938, + "step": 7607 + }, + { + "epoch": 0.68, + "learning_rate": 0.0004996673212584047, + "loss": 1.5781, + "step": 7608 + }, + { + "epoch": 0.68, + "learning_rate": 0.0004994178606628673, + "loss": 1.623, + "step": 7609 + }, + { + "epoch": 0.68, + "learning_rate": 0.0004991684416280503, + "loss": 1.5625, + "step": 7610 + }, + { + "epoch": 0.68, + "learning_rate": 0.0004989190641746615, + "loss": 1.6582, + "step": 7611 + }, + { + "epoch": 0.68, + "learning_rate": 0.0004986697283234059, + "loss": 1.6133, + "step": 7612 + }, + { + "epoch": 0.68, + "learning_rate": 0.0004984204340949845, + "loss": 1.6582, + "step": 7613 + }, + { + "epoch": 0.68, + "learning_rate": 0.0004981711815100948, + "loss": 1.625, + "step": 7614 + }, + { + "epoch": 0.68, + "learning_rate": 0.0004979219705894308, + "loss": 1.6094, + "step": 7615 + }, + { + "epoch": 0.68, + "learning_rate": 0.0004976728013536832, + "loss": 1.6113, + "step": 7616 + }, + { + "epoch": 0.68, + "learning_rate": 0.0004974236738235394, + "loss": 1.709, + "step": 7617 + }, + { + "epoch": 0.68, + "learning_rate": 0.0004971745880196826, + "loss": 1.4629, + "step": 7618 + }, + { + "epoch": 0.68, + "learning_rate": 0.000496925543962794, + "loss": 1.5215, + "step": 7619 + }, + { + "epoch": 0.68, + "learning_rate": 0.0004966765416735499, + "loss": 1.5293, + "step": 7620 + }, + { + "epoch": 0.68, + "learning_rate": 0.0004964275811726237, + "loss": 1.584, + "step": 7621 + }, + { + "epoch": 0.68, + "learning_rate": 0.000496178662480685, + "loss": 1.6875, + "step": 7622 + }, + { + "epoch": 0.68, + "learning_rate": 0.0004959297856184008, + "loss": 1.5938, + "step": 7623 + }, + { + "epoch": 0.68, + "learning_rate": 0.000495680950606434, + "loss": 1.5898, + "step": 7624 + }, + { + "epoch": 0.68, + "learning_rate": 0.0004954321574654436, + "loss": 1.5332, + "step": 7625 + }, + { + "epoch": 0.68, + "learning_rate": 0.0004951834062160857, + "loss": 1.5215, + "step": 7626 + }, + { + "epoch": 0.68, + "learning_rate": 0.0004949346968790133, + "loss": 1.6895, + "step": 7627 + }, + { + "epoch": 0.68, + "learning_rate": 0.0004946860294748751, + "loss": 1.627, + "step": 7628 + }, + { + "epoch": 0.68, + "learning_rate": 0.0004944374040243166, + "loss": 1.4883, + "step": 7629 + }, + { + "epoch": 0.68, + "learning_rate": 0.0004941888205479795, + "loss": 1.582, + "step": 7630 + }, + { + "epoch": 0.68, + "learning_rate": 0.0004939402790665033, + "loss": 1.6172, + "step": 7631 + }, + { + "epoch": 0.68, + "learning_rate": 0.0004936917796005226, + "loss": 1.6406, + "step": 7632 + }, + { + "epoch": 0.68, + "learning_rate": 0.000493443322170669, + "loss": 1.6113, + "step": 7633 + }, + { + "epoch": 0.68, + "learning_rate": 0.0004931949067975703, + "loss": 1.5586, + "step": 7634 + }, + { + "epoch": 0.68, + "learning_rate": 0.0004929465335018519, + "loss": 1.582, + "step": 7635 + }, + { + "epoch": 0.68, + "learning_rate": 0.0004926982023041346, + "loss": 1.5625, + "step": 7636 + }, + { + "epoch": 0.68, + "learning_rate": 0.0004924499132250357, + "loss": 1.6738, + "step": 7637 + }, + { + "epoch": 0.68, + "learning_rate": 0.0004922016662851695, + "loss": 1.666, + "step": 7638 + }, + { + "epoch": 0.68, + "learning_rate": 0.000491953461505147, + "loss": 1.666, + "step": 7639 + }, + { + "epoch": 0.68, + "learning_rate": 0.0004917052989055751, + "loss": 1.4102, + "step": 7640 + }, + { + "epoch": 0.68, + "learning_rate": 0.0004914571785070575, + "loss": 1.5059, + "step": 7641 + }, + { + "epoch": 0.68, + "learning_rate": 0.000491209100330194, + "loss": 1.5664, + "step": 7642 + }, + { + "epoch": 0.68, + "learning_rate": 0.0004909610643955818, + "loss": 1.6562, + "step": 7643 + }, + { + "epoch": 0.68, + "learning_rate": 0.0004907130707238137, + "loss": 1.5703, + "step": 7644 + }, + { + "epoch": 0.68, + "learning_rate": 0.0004904651193354795, + "loss": 1.6875, + "step": 7645 + }, + { + "epoch": 0.68, + "learning_rate": 0.0004902172102511646, + "loss": 1.5859, + "step": 7646 + }, + { + "epoch": 0.68, + "learning_rate": 0.0004899693434914528, + "loss": 1.5566, + "step": 7647 + }, + { + "epoch": 0.68, + "learning_rate": 0.0004897215190769224, + "loss": 1.5762, + "step": 7648 + }, + { + "epoch": 0.68, + "learning_rate": 0.0004894737370281493, + "loss": 1.6387, + "step": 7649 + }, + { + "epoch": 0.68, + "learning_rate": 0.0004892259973657051, + "loss": 1.627, + "step": 7650 + }, + { + "epoch": 0.68, + "learning_rate": 0.0004889783001101585, + "loss": 1.4785, + "step": 7651 + }, + { + "epoch": 0.68, + "learning_rate": 0.0004887306452820749, + "loss": 1.5156, + "step": 7652 + }, + { + "epoch": 0.68, + "learning_rate": 0.0004884830329020155, + "loss": 1.6641, + "step": 7653 + }, + { + "epoch": 0.68, + "learning_rate": 0.0004882354629905382, + "loss": 1.6484, + "step": 7654 + }, + { + "epoch": 0.68, + "learning_rate": 0.0004879879355681975, + "loss": 1.7188, + "step": 7655 + }, + { + "epoch": 0.68, + "learning_rate": 0.00048774045065554453, + "loss": 1.5996, + "step": 7656 + }, + { + "epoch": 0.68, + "learning_rate": 0.00048749300827312625, + "loss": 1.7227, + "step": 7657 + }, + { + "epoch": 0.68, + "learning_rate": 0.00048724560844148635, + "loss": 1.6191, + "step": 7658 + }, + { + "epoch": 0.68, + "learning_rate": 0.0004869982511811659, + "loss": 1.6113, + "step": 7659 + }, + { + "epoch": 0.68, + "learning_rate": 0.0004867509365127013, + "loss": 1.5977, + "step": 7660 + }, + { + "epoch": 0.68, + "learning_rate": 0.00048650366445662584, + "loss": 1.4902, + "step": 7661 + }, + { + "epoch": 0.68, + "learning_rate": 0.00048625643503346874, + "loss": 1.6953, + "step": 7662 + }, + { + "epoch": 0.68, + "learning_rate": 0.000486009248263757, + "loss": 1.541, + "step": 7663 + }, + { + "epoch": 0.68, + "learning_rate": 0.0004857621041680128, + "loss": 1.6309, + "step": 7664 + }, + { + "epoch": 0.68, + "learning_rate": 0.0004855150027667553, + "loss": 1.6172, + "step": 7665 + }, + { + "epoch": 0.68, + "learning_rate": 0.0004852679440804997, + "loss": 1.541, + "step": 7666 + }, + { + "epoch": 0.68, + "learning_rate": 0.0004850209281297585, + "loss": 1.6621, + "step": 7667 + }, + { + "epoch": 0.68, + "learning_rate": 0.0004847739549350401, + "loss": 1.748, + "step": 7668 + }, + { + "epoch": 0.68, + "learning_rate": 0.00048452702451684917, + "loss": 1.6465, + "step": 7669 + }, + { + "epoch": 0.68, + "learning_rate": 0.00048428013689568675, + "loss": 1.625, + "step": 7670 + }, + { + "epoch": 0.68, + "learning_rate": 0.00048403329209205137, + "loss": 1.5996, + "step": 7671 + }, + { + "epoch": 0.68, + "learning_rate": 0.00048378649012643693, + "loss": 1.6348, + "step": 7672 + }, + { + "epoch": 0.68, + "learning_rate": 0.0004835397310193341, + "loss": 1.4219, + "step": 7673 + }, + { + "epoch": 0.68, + "learning_rate": 0.0004832930147912296, + "loss": 1.6484, + "step": 7674 + }, + { + "epoch": 0.68, + "learning_rate": 0.00048304634146260764, + "loss": 1.4805, + "step": 7675 + }, + { + "epoch": 0.68, + "learning_rate": 0.00048279971105394805, + "loss": 1.6562, + "step": 7676 + }, + { + "epoch": 0.68, + "learning_rate": 0.00048255312358572714, + "loss": 1.6562, + "step": 7677 + }, + { + "epoch": 0.68, + "learning_rate": 0.0004823065790784173, + "loss": 1.5488, + "step": 7678 + }, + { + "epoch": 0.68, + "learning_rate": 0.0004820600775524888, + "loss": 1.6699, + "step": 7679 + }, + { + "epoch": 0.68, + "learning_rate": 0.0004818136190284069, + "loss": 1.582, + "step": 7680 + }, + { + "epoch": 0.68, + "learning_rate": 0.00048156720352663377, + "loss": 1.6621, + "step": 7681 + }, + { + "epoch": 0.68, + "learning_rate": 0.00048132083106762756, + "loss": 1.6367, + "step": 7682 + }, + { + "epoch": 0.68, + "learning_rate": 0.0004810745016718442, + "loss": 1.5762, + "step": 7683 + }, + { + "epoch": 0.68, + "learning_rate": 0.00048082821535973464, + "loss": 1.5, + "step": 7684 + }, + { + "epoch": 0.68, + "learning_rate": 0.0004805819721517467, + "loss": 1.6055, + "step": 7685 + }, + { + "epoch": 0.68, + "learning_rate": 0.00048033577206832445, + "loss": 1.625, + "step": 7686 + }, + { + "epoch": 0.68, + "learning_rate": 0.00048008961512990925, + "loss": 1.5703, + "step": 7687 + }, + { + "epoch": 0.68, + "learning_rate": 0.00047984350135693777, + "loss": 1.5566, + "step": 7688 + }, + { + "epoch": 0.68, + "learning_rate": 0.00047959743076984374, + "loss": 1.6055, + "step": 7689 + }, + { + "epoch": 0.68, + "learning_rate": 0.00047935140338905657, + "loss": 1.502, + "step": 7690 + }, + { + "epoch": 0.68, + "learning_rate": 0.0004791054192350035, + "loss": 1.6699, + "step": 7691 + }, + { + "epoch": 0.68, + "learning_rate": 0.0004788594783281068, + "loss": 1.623, + "step": 7692 + }, + { + "epoch": 0.68, + "learning_rate": 0.0004786135806887858, + "loss": 1.5078, + "step": 7693 + }, + { + "epoch": 0.68, + "learning_rate": 0.00047836772633745594, + "loss": 1.6562, + "step": 7694 + }, + { + "epoch": 0.68, + "learning_rate": 0.00047812191529452943, + "loss": 1.5195, + "step": 7695 + }, + { + "epoch": 0.68, + "learning_rate": 0.00047787614758041454, + "loss": 1.5254, + "step": 7696 + }, + { + "epoch": 0.68, + "learning_rate": 0.00047763042321551574, + "loss": 1.4902, + "step": 7697 + }, + { + "epoch": 0.68, + "learning_rate": 0.000477384742220235, + "loss": 1.668, + "step": 7698 + }, + { + "epoch": 0.68, + "learning_rate": 0.0004771391046149696, + "loss": 1.5645, + "step": 7699 + }, + { + "epoch": 0.68, + "learning_rate": 0.0004768935104201133, + "loss": 1.623, + "step": 7700 + }, + { + "epoch": 0.69, + "learning_rate": 0.00047664795965605646, + "loss": 1.5605, + "step": 7701 + }, + { + "epoch": 0.69, + "learning_rate": 0.00047640245234318636, + "loss": 1.5977, + "step": 7702 + }, + { + "epoch": 0.69, + "learning_rate": 0.00047615698850188607, + "loss": 1.5117, + "step": 7703 + }, + { + "epoch": 0.69, + "learning_rate": 0.00047591156815253487, + "loss": 1.4648, + "step": 7704 + }, + { + "epoch": 0.69, + "learning_rate": 0.0004756661913155087, + "loss": 1.4004, + "step": 7705 + }, + { + "epoch": 0.69, + "learning_rate": 0.0004754208580111804, + "loss": 1.5547, + "step": 7706 + }, + { + "epoch": 0.69, + "learning_rate": 0.0004751755682599184, + "loss": 1.6289, + "step": 7707 + }, + { + "epoch": 0.69, + "learning_rate": 0.00047493032208208787, + "loss": 1.4941, + "step": 7708 + }, + { + "epoch": 0.69, + "learning_rate": 0.00047468511949804994, + "loss": 1.4941, + "step": 7709 + }, + { + "epoch": 0.69, + "learning_rate": 0.0004744399605281633, + "loss": 1.5605, + "step": 7710 + }, + { + "epoch": 0.69, + "learning_rate": 0.00047419484519278164, + "loss": 1.5625, + "step": 7711 + }, + { + "epoch": 0.69, + "learning_rate": 0.00047394977351225586, + "loss": 1.6543, + "step": 7712 + }, + { + "epoch": 0.69, + "learning_rate": 0.0004737047455069324, + "loss": 1.5977, + "step": 7713 + }, + { + "epoch": 0.69, + "learning_rate": 0.0004734597611971556, + "loss": 1.5469, + "step": 7714 + }, + { + "epoch": 0.69, + "learning_rate": 0.0004732148206032647, + "loss": 1.5605, + "step": 7715 + }, + { + "epoch": 0.69, + "learning_rate": 0.000472969923745596, + "loss": 1.7695, + "step": 7716 + }, + { + "epoch": 0.69, + "learning_rate": 0.0004727250706444813, + "loss": 1.6738, + "step": 7717 + }, + { + "epoch": 0.69, + "learning_rate": 0.0004724802613202507, + "loss": 1.5605, + "step": 7718 + }, + { + "epoch": 0.69, + "learning_rate": 0.0004722354957932287, + "loss": 1.5059, + "step": 7719 + }, + { + "epoch": 0.69, + "learning_rate": 0.00047199077408373693, + "loss": 1.6367, + "step": 7720 + }, + { + "epoch": 0.69, + "learning_rate": 0.0004717460962120931, + "loss": 1.6445, + "step": 7721 + }, + { + "epoch": 0.69, + "learning_rate": 0.0004715014621986123, + "loss": 1.498, + "step": 7722 + }, + { + "epoch": 0.69, + "learning_rate": 0.0004712568720636048, + "loss": 1.7188, + "step": 7723 + }, + { + "epoch": 0.69, + "learning_rate": 0.00047101232582737754, + "loss": 1.4961, + "step": 7724 + }, + { + "epoch": 0.69, + "learning_rate": 0.0004707678235102337, + "loss": 1.6133, + "step": 7725 + }, + { + "epoch": 0.69, + "learning_rate": 0.0004705233651324737, + "loss": 1.5371, + "step": 7726 + }, + { + "epoch": 0.69, + "learning_rate": 0.0004702789507143933, + "loss": 1.6465, + "step": 7727 + }, + { + "epoch": 0.69, + "learning_rate": 0.00047003458027628485, + "loss": 1.5312, + "step": 7728 + }, + { + "epoch": 0.69, + "learning_rate": 0.00046979025383843734, + "loss": 1.5723, + "step": 7729 + }, + { + "epoch": 0.69, + "learning_rate": 0.0004695459714211354, + "loss": 1.6172, + "step": 7730 + }, + { + "epoch": 0.69, + "learning_rate": 0.0004693017330446613, + "loss": 1.5996, + "step": 7731 + }, + { + "epoch": 0.69, + "learning_rate": 0.00046905753872929256, + "loss": 1.6133, + "step": 7732 + }, + { + "epoch": 0.69, + "learning_rate": 0.0004688133884953032, + "loss": 1.5078, + "step": 7733 + }, + { + "epoch": 0.69, + "learning_rate": 0.0004685692823629639, + "loss": 1.5234, + "step": 7734 + }, + { + "epoch": 0.69, + "learning_rate": 0.00046832522035254156, + "loss": 1.6719, + "step": 7735 + }, + { + "epoch": 0.69, + "learning_rate": 0.0004680812024842992, + "loss": 1.6543, + "step": 7736 + }, + { + "epoch": 0.69, + "learning_rate": 0.000467837228778496, + "loss": 1.6035, + "step": 7737 + }, + { + "epoch": 0.69, + "learning_rate": 0.00046759329925538873, + "loss": 1.5742, + "step": 7738 + }, + { + "epoch": 0.69, + "learning_rate": 0.000467349413935229, + "loss": 1.582, + "step": 7739 + }, + { + "epoch": 0.69, + "learning_rate": 0.0004671055728382656, + "loss": 1.7637, + "step": 7740 + }, + { + "epoch": 0.69, + "learning_rate": 0.00046686177598474266, + "loss": 1.6836, + "step": 7741 + }, + { + "epoch": 0.69, + "learning_rate": 0.00046661802339490247, + "loss": 1.6113, + "step": 7742 + }, + { + "epoch": 0.69, + "learning_rate": 0.00046637431508898185, + "loss": 1.4824, + "step": 7743 + }, + { + "epoch": 0.69, + "learning_rate": 0.0004661306510872149, + "loss": 1.4922, + "step": 7744 + }, + { + "epoch": 0.69, + "learning_rate": 0.0004658870314098311, + "loss": 1.6289, + "step": 7745 + }, + { + "epoch": 0.69, + "learning_rate": 0.0004656434560770579, + "loss": 1.5938, + "step": 7746 + }, + { + "epoch": 0.69, + "learning_rate": 0.00046539992510911764, + "loss": 1.6973, + "step": 7747 + }, + { + "epoch": 0.69, + "learning_rate": 0.0004651564385262295, + "loss": 1.6211, + "step": 7748 + }, + { + "epoch": 0.69, + "learning_rate": 0.0004649129963486084, + "loss": 1.4766, + "step": 7749 + }, + { + "epoch": 0.69, + "learning_rate": 0.00046466959859646685, + "loss": 1.5859, + "step": 7750 + }, + { + "epoch": 0.69, + "learning_rate": 0.0004644262452900125, + "loss": 1.5918, + "step": 7751 + }, + { + "epoch": 0.69, + "learning_rate": 0.0004641829364494499, + "loss": 1.668, + "step": 7752 + }, + { + "epoch": 0.69, + "learning_rate": 0.000463939672094979, + "loss": 1.5605, + "step": 7753 + }, + { + "epoch": 0.69, + "learning_rate": 0.0004636964522467978, + "loss": 1.5664, + "step": 7754 + }, + { + "epoch": 0.69, + "learning_rate": 0.00046345327692509907, + "loss": 1.4766, + "step": 7755 + }, + { + "epoch": 0.69, + "learning_rate": 0.0004632101461500725, + "loss": 1.6973, + "step": 7756 + }, + { + "epoch": 0.69, + "learning_rate": 0.0004629670599419035, + "loss": 1.5996, + "step": 7757 + }, + { + "epoch": 0.69, + "learning_rate": 0.00046272401832077493, + "loss": 1.5332, + "step": 7758 + }, + { + "epoch": 0.69, + "learning_rate": 0.00046248102130686496, + "loss": 1.6758, + "step": 7759 + }, + { + "epoch": 0.69, + "learning_rate": 0.00046223806892034834, + "loss": 1.5469, + "step": 7760 + }, + { + "epoch": 0.69, + "learning_rate": 0.00046199516118139596, + "loss": 1.6387, + "step": 7761 + }, + { + "epoch": 0.69, + "learning_rate": 0.0004617522981101756, + "loss": 1.6191, + "step": 7762 + }, + { + "epoch": 0.69, + "learning_rate": 0.0004615094797268508, + "loss": 1.5508, + "step": 7763 + }, + { + "epoch": 0.69, + "learning_rate": 0.0004612667060515814, + "loss": 1.4082, + "step": 7764 + }, + { + "epoch": 0.69, + "learning_rate": 0.00046102397710452324, + "loss": 1.6094, + "step": 7765 + }, + { + "epoch": 0.69, + "learning_rate": 0.0004607812929058296, + "loss": 1.5801, + "step": 7766 + }, + { + "epoch": 0.69, + "learning_rate": 0.00046053865347564895, + "loss": 1.6953, + "step": 7767 + }, + { + "epoch": 0.69, + "learning_rate": 0.0004602960588341263, + "loss": 1.5957, + "step": 7768 + }, + { + "epoch": 0.69, + "learning_rate": 0.00046005350900140275, + "loss": 1.5938, + "step": 7769 + }, + { + "epoch": 0.69, + "learning_rate": 0.00045981100399761655, + "loss": 1.6309, + "step": 7770 + }, + { + "epoch": 0.69, + "learning_rate": 0.00045956854384290136, + "loss": 1.7129, + "step": 7771 + }, + { + "epoch": 0.69, + "learning_rate": 0.0004593261285573873, + "loss": 1.6367, + "step": 7772 + }, + { + "epoch": 0.69, + "learning_rate": 0.00045908375816120097, + "loss": 1.6289, + "step": 7773 + }, + { + "epoch": 0.69, + "learning_rate": 0.000458841432674465, + "loss": 1.5293, + "step": 7774 + }, + { + "epoch": 0.69, + "learning_rate": 0.00045859915211729843, + "loss": 1.6172, + "step": 7775 + }, + { + "epoch": 0.69, + "learning_rate": 0.00045835691650981616, + "loss": 1.6289, + "step": 7776 + }, + { + "epoch": 0.69, + "learning_rate": 0.00045811472587213066, + "loss": 1.375, + "step": 7777 + }, + { + "epoch": 0.69, + "learning_rate": 0.0004578725802243491, + "loss": 1.6035, + "step": 7778 + }, + { + "epoch": 0.69, + "learning_rate": 0.0004576304795865758, + "loss": 1.707, + "step": 7779 + }, + { + "epoch": 0.69, + "learning_rate": 0.00045738842397891066, + "loss": 1.6055, + "step": 7780 + }, + { + "epoch": 0.69, + "learning_rate": 0.000457146413421451, + "loss": 1.6836, + "step": 7781 + }, + { + "epoch": 0.69, + "learning_rate": 0.0004569044479342893, + "loss": 1.6055, + "step": 7782 + }, + { + "epoch": 0.69, + "learning_rate": 0.0004566625275375149, + "loss": 1.5898, + "step": 7783 + }, + { + "epoch": 0.69, + "learning_rate": 0.00045642065225121254, + "loss": 1.541, + "step": 7784 + }, + { + "epoch": 0.69, + "learning_rate": 0.0004561788220954648, + "loss": 1.4727, + "step": 7785 + }, + { + "epoch": 0.69, + "learning_rate": 0.0004559370370903491, + "loss": 1.5391, + "step": 7786 + }, + { + "epoch": 0.69, + "learning_rate": 0.0004556952972559395, + "loss": 1.5898, + "step": 7787 + }, + { + "epoch": 0.69, + "learning_rate": 0.0004554536026123063, + "loss": 1.6113, + "step": 7788 + }, + { + "epoch": 0.69, + "learning_rate": 0.00045521195317951667, + "loss": 1.6035, + "step": 7789 + }, + { + "epoch": 0.69, + "learning_rate": 0.0004549703489776331, + "loss": 1.4688, + "step": 7790 + }, + { + "epoch": 0.69, + "learning_rate": 0.00045472879002671495, + "loss": 1.5547, + "step": 7791 + }, + { + "epoch": 0.69, + "learning_rate": 0.00045448727634681696, + "loss": 1.5859, + "step": 7792 + }, + { + "epoch": 0.69, + "learning_rate": 0.00045424580795799173, + "loss": 1.5352, + "step": 7793 + }, + { + "epoch": 0.69, + "learning_rate": 0.0004540043848802866, + "loss": 1.4414, + "step": 7794 + }, + { + "epoch": 0.69, + "learning_rate": 0.0004537630071337456, + "loss": 1.5449, + "step": 7795 + }, + { + "epoch": 0.69, + "learning_rate": 0.000453521674738409, + "loss": 1.5293, + "step": 7796 + }, + { + "epoch": 0.69, + "learning_rate": 0.0004532803877143138, + "loss": 1.6562, + "step": 7797 + }, + { + "epoch": 0.69, + "learning_rate": 0.0004530391460814925, + "loss": 1.5684, + "step": 7798 + }, + { + "epoch": 0.69, + "learning_rate": 0.00045279794985997427, + "loss": 1.6582, + "step": 7799 + }, + { + "epoch": 0.69, + "learning_rate": 0.00045255679906978377, + "loss": 1.502, + "step": 7800 + }, + { + "epoch": 0.69, + "learning_rate": 0.0004523156937309434, + "loss": 1.5859, + "step": 7801 + }, + { + "epoch": 0.69, + "learning_rate": 0.00045207463386347046, + "loss": 1.627, + "step": 7802 + }, + { + "epoch": 0.69, + "learning_rate": 0.000451833619487379, + "loss": 1.6426, + "step": 7803 + }, + { + "epoch": 0.69, + "learning_rate": 0.0004515926506226786, + "loss": 1.5605, + "step": 7804 + }, + { + "epoch": 0.69, + "learning_rate": 0.0004513517272893766, + "loss": 1.5176, + "step": 7805 + }, + { + "epoch": 0.69, + "learning_rate": 0.00045111084950747506, + "loss": 1.5195, + "step": 7806 + }, + { + "epoch": 0.69, + "learning_rate": 0.00045087001729697296, + "loss": 1.6484, + "step": 7807 + }, + { + "epoch": 0.69, + "learning_rate": 0.00045062923067786475, + "loss": 1.5098, + "step": 7808 + }, + { + "epoch": 0.69, + "learning_rate": 0.0004503884896701428, + "loss": 1.4746, + "step": 7809 + }, + { + "epoch": 0.69, + "learning_rate": 0.00045014779429379385, + "loss": 1.6641, + "step": 7810 + }, + { + "epoch": 0.69, + "learning_rate": 0.0004499071445688017, + "loss": 1.5996, + "step": 7811 + }, + { + "epoch": 0.69, + "learning_rate": 0.0004496665405151462, + "loss": 1.5801, + "step": 7812 + }, + { + "epoch": 0.7, + "learning_rate": 0.0004494259821528035, + "loss": 1.5684, + "step": 7813 + }, + { + "epoch": 0.7, + "learning_rate": 0.00044918546950174597, + "loss": 1.5234, + "step": 7814 + }, + { + "epoch": 0.7, + "learning_rate": 0.00044894500258194206, + "loss": 1.668, + "step": 7815 + }, + { + "epoch": 0.7, + "learning_rate": 0.000448704581413356, + "loss": 1.5918, + "step": 7816 + }, + { + "epoch": 0.7, + "learning_rate": 0.0004484642060159496, + "loss": 1.6387, + "step": 7817 + }, + { + "epoch": 0.7, + "learning_rate": 0.00044822387640967956, + "loss": 1.5977, + "step": 7818 + }, + { + "epoch": 0.7, + "learning_rate": 0.0004479835926144992, + "loss": 1.4512, + "step": 7819 + }, + { + "epoch": 0.7, + "learning_rate": 0.00044774335465035754, + "loss": 1.5762, + "step": 7820 + }, + { + "epoch": 0.7, + "learning_rate": 0.00044750316253720123, + "loss": 1.7012, + "step": 7821 + }, + { + "epoch": 0.7, + "learning_rate": 0.00044726301629497166, + "loss": 1.5293, + "step": 7822 + }, + { + "epoch": 0.7, + "learning_rate": 0.000447022915943607, + "loss": 1.5332, + "step": 7823 + }, + { + "epoch": 0.7, + "learning_rate": 0.00044678286150304106, + "loss": 1.5234, + "step": 7824 + }, + { + "epoch": 0.7, + "learning_rate": 0.00044654285299320495, + "loss": 1.418, + "step": 7825 + }, + { + "epoch": 0.7, + "learning_rate": 0.00044630289043402527, + "loss": 1.541, + "step": 7826 + }, + { + "epoch": 0.7, + "learning_rate": 0.0004460629738454246, + "loss": 1.5801, + "step": 7827 + }, + { + "epoch": 0.7, + "learning_rate": 0.0004458231032473218, + "loss": 1.6562, + "step": 7828 + }, + { + "epoch": 0.7, + "learning_rate": 0.0004455832786596325, + "loss": 1.5879, + "step": 7829 + }, + { + "epoch": 0.7, + "learning_rate": 0.0004453435001022681, + "loss": 1.6016, + "step": 7830 + }, + { + "epoch": 0.7, + "learning_rate": 0.0004451037675951358, + "loss": 1.5781, + "step": 7831 + }, + { + "epoch": 0.7, + "learning_rate": 0.00044486408115813924, + "loss": 1.6211, + "step": 7832 + }, + { + "epoch": 0.7, + "learning_rate": 0.0004446244408111789, + "loss": 1.5332, + "step": 7833 + }, + { + "epoch": 0.7, + "learning_rate": 0.00044438484657415055, + "loss": 1.627, + "step": 7834 + }, + { + "epoch": 0.7, + "learning_rate": 0.00044414529846694654, + "loss": 1.4668, + "step": 7835 + }, + { + "epoch": 0.7, + "learning_rate": 0.0004439057965094547, + "loss": 1.5332, + "step": 7836 + }, + { + "epoch": 0.7, + "learning_rate": 0.00044366634072156067, + "loss": 1.5586, + "step": 7837 + }, + { + "epoch": 0.7, + "learning_rate": 0.0004434269311231446, + "loss": 1.6406, + "step": 7838 + }, + { + "epoch": 0.7, + "learning_rate": 0.00044318756773408343, + "loss": 1.5859, + "step": 7839 + }, + { + "epoch": 0.7, + "learning_rate": 0.00044294825057425, + "loss": 1.584, + "step": 7840 + }, + { + "epoch": 0.7, + "learning_rate": 0.0004427089796635143, + "loss": 1.5352, + "step": 7841 + }, + { + "epoch": 0.7, + "learning_rate": 0.0004424697550217414, + "loss": 1.5059, + "step": 7842 + }, + { + "epoch": 0.7, + "learning_rate": 0.00044223057666879274, + "loss": 1.7168, + "step": 7843 + }, + { + "epoch": 0.7, + "learning_rate": 0.00044199144462452565, + "loss": 1.5488, + "step": 7844 + }, + { + "epoch": 0.7, + "learning_rate": 0.00044175235890879505, + "loss": 1.6758, + "step": 7845 + }, + { + "epoch": 0.7, + "learning_rate": 0.00044151331954145025, + "loss": 1.627, + "step": 7846 + }, + { + "epoch": 0.7, + "learning_rate": 0.0004412743265423377, + "loss": 1.6582, + "step": 7847 + }, + { + "epoch": 0.7, + "learning_rate": 0.00044103537993129914, + "loss": 1.4668, + "step": 7848 + }, + { + "epoch": 0.7, + "learning_rate": 0.00044079647972817404, + "loss": 1.5781, + "step": 7849 + }, + { + "epoch": 0.7, + "learning_rate": 0.0004405576259527966, + "loss": 1.5664, + "step": 7850 + }, + { + "epoch": 0.7, + "learning_rate": 0.00044031881862499747, + "loss": 1.5508, + "step": 7851 + }, + { + "epoch": 0.7, + "learning_rate": 0.00044008005776460357, + "loss": 1.5156, + "step": 7852 + }, + { + "epoch": 0.7, + "learning_rate": 0.00043984134339143823, + "loss": 1.5273, + "step": 7853 + }, + { + "epoch": 0.7, + "learning_rate": 0.0004396026755253204, + "loss": 1.5781, + "step": 7854 + }, + { + "epoch": 0.7, + "learning_rate": 0.00043936405418606506, + "loss": 1.6777, + "step": 7855 + }, + { + "epoch": 0.7, + "learning_rate": 0.00043912547939348466, + "loss": 1.6289, + "step": 7856 + }, + { + "epoch": 0.7, + "learning_rate": 0.0004388869511673863, + "loss": 1.6035, + "step": 7857 + }, + { + "epoch": 0.7, + "learning_rate": 0.00043864846952757365, + "loss": 1.6738, + "step": 7858 + }, + { + "epoch": 0.7, + "learning_rate": 0.00043841003449384677, + "loss": 1.6875, + "step": 7859 + }, + { + "epoch": 0.7, + "learning_rate": 0.0004381716460860011, + "loss": 1.6621, + "step": 7860 + }, + { + "epoch": 0.7, + "learning_rate": 0.0004379333043238297, + "loss": 1.6777, + "step": 7861 + }, + { + "epoch": 0.7, + "learning_rate": 0.00043769500922712047, + "loss": 1.6055, + "step": 7862 + }, + { + "epoch": 0.7, + "learning_rate": 0.00043745676081565735, + "loss": 1.627, + "step": 7863 + }, + { + "epoch": 0.7, + "learning_rate": 0.0004372185591092216, + "loss": 1.6406, + "step": 7864 + }, + { + "epoch": 0.7, + "learning_rate": 0.00043698040412758953, + "loss": 1.541, + "step": 7865 + }, + { + "epoch": 0.7, + "learning_rate": 0.0004367422958905339, + "loss": 1.4688, + "step": 7866 + }, + { + "epoch": 0.7, + "learning_rate": 0.00043650423441782325, + "loss": 1.748, + "step": 7867 + }, + { + "epoch": 0.7, + "learning_rate": 0.0004362662197292233, + "loss": 1.498, + "step": 7868 + }, + { + "epoch": 0.7, + "learning_rate": 0.0004360282518444948, + "loss": 1.5977, + "step": 7869 + }, + { + "epoch": 0.7, + "learning_rate": 0.0004357903307833949, + "loss": 1.707, + "step": 7870 + }, + { + "epoch": 0.7, + "learning_rate": 0.00043555245656567667, + "loss": 1.5938, + "step": 7871 + }, + { + "epoch": 0.7, + "learning_rate": 0.0004353146292110903, + "loss": 1.6328, + "step": 7872 + }, + { + "epoch": 0.7, + "learning_rate": 0.00043507684873938103, + "loss": 1.6152, + "step": 7873 + }, + { + "epoch": 0.7, + "learning_rate": 0.0004348391151702904, + "loss": 1.5469, + "step": 7874 + }, + { + "epoch": 0.7, + "learning_rate": 0.0004346014285235559, + "loss": 1.5547, + "step": 7875 + }, + { + "epoch": 0.7, + "learning_rate": 0.00043436378881891205, + "loss": 1.6523, + "step": 7876 + }, + { + "epoch": 0.7, + "learning_rate": 0.0004341261960760886, + "loss": 1.6719, + "step": 7877 + }, + { + "epoch": 0.7, + "learning_rate": 0.0004338886503148115, + "loss": 1.4668, + "step": 7878 + }, + { + "epoch": 0.7, + "learning_rate": 0.00043365115155480274, + "loss": 1.5605, + "step": 7879 + }, + { + "epoch": 0.7, + "learning_rate": 0.0004334136998157813, + "loss": 1.5938, + "step": 7880 + }, + { + "epoch": 0.7, + "learning_rate": 0.00043317629511746103, + "loss": 1.6348, + "step": 7881 + }, + { + "epoch": 0.7, + "learning_rate": 0.0004329389374795525, + "loss": 1.5137, + "step": 7882 + }, + { + "epoch": 0.7, + "learning_rate": 0.00043270162692176197, + "loss": 1.541, + "step": 7883 + }, + { + "epoch": 0.7, + "learning_rate": 0.00043246436346379267, + "loss": 1.7207, + "step": 7884 + }, + { + "epoch": 0.7, + "learning_rate": 0.0004322271471253432, + "loss": 1.6523, + "step": 7885 + }, + { + "epoch": 0.7, + "learning_rate": 0.0004319899779261082, + "loss": 1.5352, + "step": 7886 + }, + { + "epoch": 0.7, + "learning_rate": 0.0004317528558857783, + "loss": 1.7148, + "step": 7887 + }, + { + "epoch": 0.7, + "learning_rate": 0.0004315157810240412, + "loss": 1.584, + "step": 7888 + }, + { + "epoch": 0.7, + "learning_rate": 0.00043127875336057976, + "loss": 1.5, + "step": 7889 + }, + { + "epoch": 0.7, + "learning_rate": 0.00043104177291507294, + "loss": 1.6719, + "step": 7890 + }, + { + "epoch": 0.7, + "learning_rate": 0.0004308048397071961, + "loss": 1.4727, + "step": 7891 + }, + { + "epoch": 0.7, + "learning_rate": 0.00043056795375662064, + "loss": 1.5996, + "step": 7892 + }, + { + "epoch": 0.7, + "learning_rate": 0.0004303311150830138, + "loss": 1.5742, + "step": 7893 + }, + { + "epoch": 0.7, + "learning_rate": 0.0004300943237060392, + "loss": 1.498, + "step": 7894 + }, + { + "epoch": 0.7, + "learning_rate": 0.000429857579645356, + "loss": 1.5449, + "step": 7895 + }, + { + "epoch": 0.7, + "learning_rate": 0.00042962088292062043, + "loss": 1.6387, + "step": 7896 + }, + { + "epoch": 0.7, + "learning_rate": 0.0004293842335514839, + "loss": 1.5156, + "step": 7897 + }, + { + "epoch": 0.7, + "learning_rate": 0.0004291476315575943, + "loss": 1.5039, + "step": 7898 + }, + { + "epoch": 0.7, + "learning_rate": 0.0004289110769585949, + "loss": 1.5938, + "step": 7899 + }, + { + "epoch": 0.7, + "learning_rate": 0.00042867456977412635, + "loss": 1.5938, + "step": 7900 + }, + { + "epoch": 0.7, + "learning_rate": 0.00042843811002382437, + "loss": 1.6172, + "step": 7901 + }, + { + "epoch": 0.7, + "learning_rate": 0.00042820169772732076, + "loss": 1.5469, + "step": 7902 + }, + { + "epoch": 0.7, + "learning_rate": 0.00042796533290424355, + "loss": 1.6406, + "step": 7903 + }, + { + "epoch": 0.7, + "learning_rate": 0.0004277290155742173, + "loss": 1.5703, + "step": 7904 + }, + { + "epoch": 0.7, + "learning_rate": 0.00042749274575686205, + "loss": 1.5156, + "step": 7905 + }, + { + "epoch": 0.7, + "learning_rate": 0.0004272565234717939, + "loss": 1.6035, + "step": 7906 + }, + { + "epoch": 0.7, + "learning_rate": 0.00042702034873862485, + "loss": 1.5, + "step": 7907 + }, + { + "epoch": 0.7, + "learning_rate": 0.00042678422157696396, + "loss": 1.5234, + "step": 7908 + }, + { + "epoch": 0.7, + "learning_rate": 0.0004265481420064153, + "loss": 1.4961, + "step": 7909 + }, + { + "epoch": 0.7, + "learning_rate": 0.0004263121100465793, + "loss": 1.5117, + "step": 7910 + }, + { + "epoch": 0.7, + "learning_rate": 0.0004260761257170522, + "loss": 1.7441, + "step": 7911 + }, + { + "epoch": 0.7, + "learning_rate": 0.0004258401890374272, + "loss": 1.3809, + "step": 7912 + }, + { + "epoch": 0.7, + "learning_rate": 0.00042560430002729245, + "loss": 1.6055, + "step": 7913 + }, + { + "epoch": 0.7, + "learning_rate": 0.00042536845870623264, + "loss": 1.5879, + "step": 7914 + }, + { + "epoch": 0.7, + "learning_rate": 0.0004251326650938281, + "loss": 1.6738, + "step": 7915 + }, + { + "epoch": 0.7, + "learning_rate": 0.0004248969192096562, + "loss": 1.5566, + "step": 7916 + }, + { + "epoch": 0.7, + "learning_rate": 0.0004246612210732895, + "loss": 1.6797, + "step": 7917 + }, + { + "epoch": 0.7, + "learning_rate": 0.00042442557070429657, + "loss": 1.6895, + "step": 7918 + }, + { + "epoch": 0.7, + "learning_rate": 0.000424189968122242, + "loss": 1.627, + "step": 7919 + }, + { + "epoch": 0.7, + "learning_rate": 0.0004239544133466874, + "loss": 1.5996, + "step": 7920 + }, + { + "epoch": 0.7, + "learning_rate": 0.00042371890639718914, + "loss": 1.582, + "step": 7921 + }, + { + "epoch": 0.7, + "learning_rate": 0.0004234834472933002, + "loss": 1.6973, + "step": 7922 + }, + { + "epoch": 0.7, + "learning_rate": 0.0004232480360545692, + "loss": 1.6152, + "step": 7923 + }, + { + "epoch": 0.7, + "learning_rate": 0.00042301267270054176, + "loss": 1.6484, + "step": 7924 + }, + { + "epoch": 0.71, + "learning_rate": 0.0004227773572507586, + "loss": 1.6074, + "step": 7925 + }, + { + "epoch": 0.71, + "learning_rate": 0.00042254208972475674, + "loss": 1.584, + "step": 7926 + }, + { + "epoch": 0.71, + "learning_rate": 0.0004223068701420687, + "loss": 1.6621, + "step": 7927 + }, + { + "epoch": 0.71, + "learning_rate": 0.0004220716985222244, + "loss": 1.5918, + "step": 7928 + }, + { + "epoch": 0.71, + "learning_rate": 0.00042183657488474834, + "loss": 1.5703, + "step": 7929 + }, + { + "epoch": 0.71, + "learning_rate": 0.00042160149924916205, + "loss": 1.5938, + "step": 7930 + }, + { + "epoch": 0.71, + "learning_rate": 0.0004213664716349821, + "loss": 1.4707, + "step": 7931 + }, + { + "epoch": 0.71, + "learning_rate": 0.000421131492061722, + "loss": 1.6504, + "step": 7932 + }, + { + "epoch": 0.71, + "learning_rate": 0.0004208965605488907, + "loss": 1.7402, + "step": 7933 + }, + { + "epoch": 0.71, + "learning_rate": 0.0004206616771159929, + "loss": 1.6543, + "step": 7934 + }, + { + "epoch": 0.71, + "learning_rate": 0.0004204268417825308, + "loss": 1.6289, + "step": 7935 + }, + { + "epoch": 0.71, + "learning_rate": 0.0004201920545680007, + "loss": 1.5977, + "step": 7936 + }, + { + "epoch": 0.71, + "learning_rate": 0.0004199573154918962, + "loss": 1.541, + "step": 7937 + }, + { + "epoch": 0.71, + "learning_rate": 0.00041972262457370626, + "loss": 1.5273, + "step": 7938 + }, + { + "epoch": 0.71, + "learning_rate": 0.0004194879818329156, + "loss": 1.5742, + "step": 7939 + }, + { + "epoch": 0.71, + "learning_rate": 0.00041925338728900633, + "loss": 1.6641, + "step": 7940 + }, + { + "epoch": 0.71, + "learning_rate": 0.00041901884096145505, + "loss": 1.4863, + "step": 7941 + }, + { + "epoch": 0.71, + "learning_rate": 0.000418784342869735, + "loss": 1.6289, + "step": 7942 + }, + { + "epoch": 0.71, + "learning_rate": 0.00041854989303331493, + "loss": 1.6523, + "step": 7943 + }, + { + "epoch": 0.71, + "learning_rate": 0.00041831549147166084, + "loss": 1.5059, + "step": 7944 + }, + { + "epoch": 0.71, + "learning_rate": 0.0004180811382042333, + "loss": 1.502, + "step": 7945 + }, + { + "epoch": 0.71, + "learning_rate": 0.0004178468332504892, + "loss": 1.4688, + "step": 7946 + }, + { + "epoch": 0.71, + "learning_rate": 0.0004176125766298824, + "loss": 1.5762, + "step": 7947 + }, + { + "epoch": 0.71, + "learning_rate": 0.0004173783683618615, + "loss": 1.666, + "step": 7948 + }, + { + "epoch": 0.71, + "learning_rate": 0.0004171442084658718, + "loss": 1.4805, + "step": 7949 + }, + { + "epoch": 0.71, + "learning_rate": 0.00041691009696135387, + "loss": 1.6367, + "step": 7950 + }, + { + "epoch": 0.71, + "learning_rate": 0.00041667603386774546, + "loss": 1.6309, + "step": 7951 + }, + { + "epoch": 0.71, + "learning_rate": 0.0004164420192044794, + "loss": 1.5996, + "step": 7952 + }, + { + "epoch": 0.71, + "learning_rate": 0.00041620805299098454, + "loss": 1.584, + "step": 7953 + }, + { + "epoch": 0.71, + "learning_rate": 0.0004159741352466856, + "loss": 1.5801, + "step": 7954 + }, + { + "epoch": 0.71, + "learning_rate": 0.00041574026599100423, + "loss": 1.6016, + "step": 7955 + }, + { + "epoch": 0.71, + "learning_rate": 0.0004155064452433571, + "loss": 1.7031, + "step": 7956 + }, + { + "epoch": 0.71, + "learning_rate": 0.00041527267302315707, + "loss": 1.5566, + "step": 7957 + }, + { + "epoch": 0.71, + "learning_rate": 0.0004150389493498126, + "loss": 1.5801, + "step": 7958 + }, + { + "epoch": 0.71, + "learning_rate": 0.0004148052742427293, + "loss": 1.6855, + "step": 7959 + }, + { + "epoch": 0.71, + "learning_rate": 0.0004145716477213077, + "loss": 1.5176, + "step": 7960 + }, + { + "epoch": 0.71, + "learning_rate": 0.0004143380698049445, + "loss": 1.623, + "step": 7961 + }, + { + "epoch": 0.71, + "learning_rate": 0.00041410454051303227, + "loss": 1.5469, + "step": 7962 + }, + { + "epoch": 0.71, + "learning_rate": 0.0004138710598649602, + "loss": 1.5703, + "step": 7963 + }, + { + "epoch": 0.71, + "learning_rate": 0.0004136376278801127, + "loss": 1.6758, + "step": 7964 + }, + { + "epoch": 0.71, + "learning_rate": 0.0004134042445778705, + "loss": 1.625, + "step": 7965 + }, + { + "epoch": 0.71, + "learning_rate": 0.00041317090997760973, + "loss": 1.4668, + "step": 7966 + }, + { + "epoch": 0.71, + "learning_rate": 0.0004129376240987037, + "loss": 1.5195, + "step": 7967 + }, + { + "epoch": 0.71, + "learning_rate": 0.0004127043869605206, + "loss": 1.5645, + "step": 7968 + }, + { + "epoch": 0.71, + "learning_rate": 0.0004124711985824249, + "loss": 1.627, + "step": 7969 + }, + { + "epoch": 0.71, + "learning_rate": 0.00041223805898377684, + "loss": 1.5879, + "step": 7970 + }, + { + "epoch": 0.71, + "learning_rate": 0.0004120049681839331, + "loss": 1.6641, + "step": 7971 + }, + { + "epoch": 0.71, + "learning_rate": 0.00041177192620224566, + "loss": 1.4492, + "step": 7972 + }, + { + "epoch": 0.71, + "learning_rate": 0.0004115389330580629, + "loss": 1.4902, + "step": 7973 + }, + { + "epoch": 0.71, + "learning_rate": 0.00041130598877072875, + "loss": 1.5918, + "step": 7974 + }, + { + "epoch": 0.71, + "learning_rate": 0.000411073093359584, + "loss": 1.6758, + "step": 7975 + }, + { + "epoch": 0.71, + "learning_rate": 0.00041084024684396437, + "loss": 1.6055, + "step": 7976 + }, + { + "epoch": 0.71, + "learning_rate": 0.00041060744924320195, + "loss": 1.6465, + "step": 7977 + }, + { + "epoch": 0.71, + "learning_rate": 0.0004103747005766243, + "loss": 1.5059, + "step": 7978 + }, + { + "epoch": 0.71, + "learning_rate": 0.00041014200086355603, + "loss": 1.498, + "step": 7979 + }, + { + "epoch": 0.71, + "learning_rate": 0.00040990935012331665, + "loss": 1.5469, + "step": 7980 + }, + { + "epoch": 0.71, + "learning_rate": 0.00040967674837522197, + "loss": 1.459, + "step": 7981 + }, + { + "epoch": 0.71, + "learning_rate": 0.00040944419563858326, + "loss": 1.4902, + "step": 7982 + }, + { + "epoch": 0.71, + "learning_rate": 0.0004092116919327089, + "loss": 1.5293, + "step": 7983 + }, + { + "epoch": 0.71, + "learning_rate": 0.00040897923727690224, + "loss": 1.5117, + "step": 7984 + }, + { + "epoch": 0.71, + "learning_rate": 0.00040874683169046257, + "loss": 1.6406, + "step": 7985 + }, + { + "epoch": 0.71, + "learning_rate": 0.000408514475192685, + "loss": 1.6523, + "step": 7986 + }, + { + "epoch": 0.71, + "learning_rate": 0.00040828216780286166, + "loss": 1.7402, + "step": 7987 + }, + { + "epoch": 0.71, + "learning_rate": 0.0004080499095402794, + "loss": 1.5996, + "step": 7988 + }, + { + "epoch": 0.71, + "learning_rate": 0.0004078177004242216, + "loss": 1.6094, + "step": 7989 + }, + { + "epoch": 0.71, + "learning_rate": 0.00040758554047396666, + "loss": 1.5859, + "step": 7990 + }, + { + "epoch": 0.71, + "learning_rate": 0.00040735342970879063, + "loss": 1.582, + "step": 7991 + }, + { + "epoch": 0.71, + "learning_rate": 0.0004071213681479641, + "loss": 1.4766, + "step": 7992 + }, + { + "epoch": 0.71, + "learning_rate": 0.0004068893558107536, + "loss": 1.6621, + "step": 7993 + }, + { + "epoch": 0.71, + "learning_rate": 0.0004066573927164219, + "loss": 1.582, + "step": 7994 + }, + { + "epoch": 0.71, + "learning_rate": 0.0004064254788842282, + "loss": 1.584, + "step": 7995 + }, + { + "epoch": 0.71, + "learning_rate": 0.00040619361433342685, + "loss": 1.4922, + "step": 7996 + }, + { + "epoch": 0.71, + "learning_rate": 0.00040596179908326824, + "loss": 1.5918, + "step": 7997 + }, + { + "epoch": 0.71, + "learning_rate": 0.0004057300331529985, + "loss": 1.6074, + "step": 7998 + }, + { + "epoch": 0.71, + "learning_rate": 0.00040549831656186067, + "loss": 1.6172, + "step": 7999 + }, + { + "epoch": 0.71, + "learning_rate": 0.0004052666493290925, + "loss": 1.5469, + "step": 8000 + }, + { + "epoch": 0.71, + "learning_rate": 0.00040503503147392816, + "loss": 1.4004, + "step": 8001 + }, + { + "epoch": 0.71, + "learning_rate": 0.00040480346301559745, + "loss": 1.543, + "step": 8002 + }, + { + "epoch": 0.71, + "learning_rate": 0.00040457194397332697, + "loss": 1.6992, + "step": 8003 + }, + { + "epoch": 0.71, + "learning_rate": 0.00040434047436633803, + "loss": 1.7422, + "step": 8004 + }, + { + "epoch": 0.71, + "learning_rate": 0.00040410905421384835, + "loss": 1.5215, + "step": 8005 + }, + { + "epoch": 0.71, + "learning_rate": 0.0004038776835350714, + "loss": 1.6387, + "step": 8006 + }, + { + "epoch": 0.71, + "learning_rate": 0.00040364636234921716, + "loss": 1.4805, + "step": 8007 + }, + { + "epoch": 0.71, + "learning_rate": 0.0004034150906754907, + "loss": 1.582, + "step": 8008 + }, + { + "epoch": 0.71, + "learning_rate": 0.0004031838685330934, + "loss": 1.5859, + "step": 8009 + }, + { + "epoch": 0.71, + "learning_rate": 0.0004029526959412224, + "loss": 1.5859, + "step": 8010 + }, + { + "epoch": 0.71, + "learning_rate": 0.0004027215729190706, + "loss": 1.5762, + "step": 8011 + }, + { + "epoch": 0.71, + "learning_rate": 0.0004024904994858273, + "loss": 1.584, + "step": 8012 + }, + { + "epoch": 0.71, + "learning_rate": 0.00040225947566067656, + "loss": 1.5723, + "step": 8013 + }, + { + "epoch": 0.71, + "learning_rate": 0.0004020285014628, + "loss": 1.6348, + "step": 8014 + }, + { + "epoch": 0.71, + "learning_rate": 0.0004017975769113738, + "loss": 1.6055, + "step": 8015 + }, + { + "epoch": 0.71, + "learning_rate": 0.00040156670202557044, + "loss": 1.5938, + "step": 8016 + }, + { + "epoch": 0.71, + "learning_rate": 0.00040133587682455817, + "loss": 1.6504, + "step": 8017 + }, + { + "epoch": 0.71, + "learning_rate": 0.000401105101327501, + "loss": 1.5742, + "step": 8018 + }, + { + "epoch": 0.71, + "learning_rate": 0.00040087437555355967, + "loss": 1.6191, + "step": 8019 + }, + { + "epoch": 0.71, + "learning_rate": 0.00040064369952188975, + "loss": 1.5723, + "step": 8020 + }, + { + "epoch": 0.71, + "learning_rate": 0.000400413073251643, + "loss": 1.6504, + "step": 8021 + }, + { + "epoch": 0.71, + "learning_rate": 0.0004001824967619668, + "loss": 1.7207, + "step": 8022 + }, + { + "epoch": 0.71, + "learning_rate": 0.0003999519700720056, + "loss": 1.6523, + "step": 8023 + }, + { + "epoch": 0.71, + "learning_rate": 0.0003997214932008981, + "loss": 1.6582, + "step": 8024 + }, + { + "epoch": 0.71, + "learning_rate": 0.0003994910661677801, + "loss": 1.5352, + "step": 8025 + }, + { + "epoch": 0.71, + "learning_rate": 0.0003992606889917819, + "loss": 1.5684, + "step": 8026 + }, + { + "epoch": 0.71, + "learning_rate": 0.00039903036169203145, + "loss": 1.6094, + "step": 8027 + }, + { + "epoch": 0.71, + "learning_rate": 0.00039880008428765134, + "loss": 1.7012, + "step": 8028 + }, + { + "epoch": 0.71, + "learning_rate": 0.00039856985679776017, + "loss": 1.4941, + "step": 8029 + }, + { + "epoch": 0.71, + "learning_rate": 0.0003983396792414722, + "loss": 1.5645, + "step": 8030 + }, + { + "epoch": 0.71, + "learning_rate": 0.0003981095516378986, + "loss": 1.6035, + "step": 8031 + }, + { + "epoch": 0.71, + "learning_rate": 0.00039787947400614533, + "loss": 1.5996, + "step": 8032 + }, + { + "epoch": 0.71, + "learning_rate": 0.0003976494463653142, + "loss": 1.5449, + "step": 8033 + }, + { + "epoch": 0.71, + "learning_rate": 0.0003974194687345037, + "loss": 1.5664, + "step": 8034 + }, + { + "epoch": 0.71, + "learning_rate": 0.0003971895411328077, + "loss": 1.4492, + "step": 8035 + }, + { + "epoch": 0.71, + "learning_rate": 0.0003969596635793156, + "loss": 1.6406, + "step": 8036 + }, + { + "epoch": 0.71, + "learning_rate": 0.0003967298360931125, + "loss": 1.5664, + "step": 8037 + }, + { + "epoch": 0.72, + "learning_rate": 0.00039650005869328075, + "loss": 1.5586, + "step": 8038 + }, + { + "epoch": 0.72, + "learning_rate": 0.00039627033139889715, + "loss": 1.5195, + "step": 8039 + }, + { + "epoch": 0.72, + "learning_rate": 0.00039604065422903456, + "loss": 1.5059, + "step": 8040 + }, + { + "epoch": 0.72, + "learning_rate": 0.00039581102720276183, + "loss": 1.6562, + "step": 8041 + }, + { + "epoch": 0.72, + "learning_rate": 0.00039558145033914417, + "loss": 1.6289, + "step": 8042 + }, + { + "epoch": 0.72, + "learning_rate": 0.00039535192365724184, + "loss": 1.6543, + "step": 8043 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003951224471761112, + "loss": 1.6406, + "step": 8044 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003948930209148043, + "loss": 1.5762, + "step": 8045 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003946636448923697, + "loss": 1.543, + "step": 8046 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003944343191278511, + "loss": 1.6016, + "step": 8047 + }, + { + "epoch": 0.72, + "learning_rate": 0.00039420504364028817, + "loss": 1.5918, + "step": 8048 + }, + { + "epoch": 0.72, + "learning_rate": 0.00039397581844871654, + "loss": 1.668, + "step": 8049 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003937466435721674, + "loss": 1.5, + "step": 8050 + }, + { + "epoch": 0.72, + "learning_rate": 0.00039351751902966806, + "loss": 1.6504, + "step": 8051 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003932884448402411, + "loss": 1.6016, + "step": 8052 + }, + { + "epoch": 0.72, + "learning_rate": 0.00039305942102290607, + "loss": 1.6504, + "step": 8053 + }, + { + "epoch": 0.72, + "learning_rate": 0.00039283044759667743, + "loss": 1.4824, + "step": 8054 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003926015245805654, + "loss": 1.5801, + "step": 8055 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003923726519935765, + "loss": 1.5566, + "step": 8056 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003921438298547123, + "loss": 1.6113, + "step": 8057 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003919150581829716, + "loss": 1.5156, + "step": 8058 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003916863369973475, + "loss": 1.5352, + "step": 8059 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003914576663168298, + "loss": 1.4785, + "step": 8060 + }, + { + "epoch": 0.72, + "learning_rate": 0.00039122904616040324, + "loss": 1.5449, + "step": 8061 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003910004765470497, + "loss": 1.6094, + "step": 8062 + }, + { + "epoch": 0.72, + "learning_rate": 0.00039077195749574604, + "loss": 1.4277, + "step": 8063 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003905434890254647, + "loss": 1.5254, + "step": 8064 + }, + { + "epoch": 0.72, + "learning_rate": 0.00039031507115517395, + "loss": 1.5918, + "step": 8065 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003900867039038388, + "loss": 1.5293, + "step": 8066 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003898583872904192, + "loss": 1.6035, + "step": 8067 + }, + { + "epoch": 0.72, + "learning_rate": 0.000389630121333871, + "loss": 1.6094, + "step": 8068 + }, + { + "epoch": 0.72, + "learning_rate": 0.00038940190605314554, + "loss": 1.5781, + "step": 8069 + }, + { + "epoch": 0.72, + "learning_rate": 0.00038917374146719096, + "loss": 1.5859, + "step": 8070 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003889456275949504, + "loss": 1.5645, + "step": 8071 + }, + { + "epoch": 0.72, + "learning_rate": 0.000388717564455363, + "loss": 1.5898, + "step": 8072 + }, + { + "epoch": 0.72, + "learning_rate": 0.000388489552067363, + "loss": 1.5664, + "step": 8073 + }, + { + "epoch": 0.72, + "learning_rate": 0.00038826159044988217, + "loss": 1.5508, + "step": 8074 + }, + { + "epoch": 0.72, + "learning_rate": 0.00038803367962184635, + "loss": 1.5098, + "step": 8075 + }, + { + "epoch": 0.72, + "learning_rate": 0.00038780581960217787, + "loss": 1.6113, + "step": 8076 + }, + { + "epoch": 0.72, + "learning_rate": 0.00038757801040979436, + "loss": 1.6289, + "step": 8077 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003873502520636104, + "loss": 1.6289, + "step": 8078 + }, + { + "epoch": 0.72, + "learning_rate": 0.00038712254458253515, + "loss": 1.8047, + "step": 8079 + }, + { + "epoch": 0.72, + "learning_rate": 0.000386894887985474, + "loss": 1.5352, + "step": 8080 + }, + { + "epoch": 0.72, + "learning_rate": 0.00038666728229132784, + "loss": 1.5469, + "step": 8081 + }, + { + "epoch": 0.72, + "learning_rate": 0.00038643972751899414, + "loss": 1.4902, + "step": 8082 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003862122236873653, + "loss": 1.5195, + "step": 8083 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003859847708153298, + "loss": 1.5859, + "step": 8084 + }, + { + "epoch": 0.72, + "learning_rate": 0.00038575736892177136, + "loss": 1.5156, + "step": 8085 + }, + { + "epoch": 0.72, + "learning_rate": 0.000385530018025571, + "loss": 1.5137, + "step": 8086 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003853027181456037, + "loss": 1.5898, + "step": 8087 + }, + { + "epoch": 0.72, + "learning_rate": 0.00038507546930074136, + "loss": 1.6055, + "step": 8088 + }, + { + "epoch": 0.72, + "learning_rate": 0.00038484827150985103, + "loss": 1.4727, + "step": 8089 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003846211247917958, + "loss": 1.6504, + "step": 8090 + }, + { + "epoch": 0.72, + "learning_rate": 0.00038439402916543453, + "loss": 1.5508, + "step": 8091 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003841669846496214, + "loss": 1.5547, + "step": 8092 + }, + { + "epoch": 0.72, + "learning_rate": 0.00038393999126320745, + "loss": 1.4297, + "step": 8093 + }, + { + "epoch": 0.72, + "learning_rate": 0.00038371304902503847, + "loss": 1.543, + "step": 8094 + }, + { + "epoch": 0.72, + "learning_rate": 0.000383486157953956, + "loss": 1.5898, + "step": 8095 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003832593180687981, + "loss": 1.623, + "step": 8096 + }, + { + "epoch": 0.72, + "learning_rate": 0.00038303252938839727, + "loss": 1.4766, + "step": 8097 + }, + { + "epoch": 0.72, + "learning_rate": 0.00038280579193158363, + "loss": 1.5586, + "step": 8098 + }, + { + "epoch": 0.72, + "learning_rate": 0.00038257910571718145, + "loss": 1.4785, + "step": 8099 + }, + { + "epoch": 0.72, + "learning_rate": 0.00038235247076401146, + "loss": 1.6777, + "step": 8100 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003821258870908896, + "loss": 1.5762, + "step": 8101 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003818993547166285, + "loss": 1.6543, + "step": 8102 + }, + { + "epoch": 0.72, + "learning_rate": 0.00038167287366003565, + "loss": 1.5234, + "step": 8103 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003814464439399147, + "loss": 1.5215, + "step": 8104 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003812200655750645, + "loss": 1.582, + "step": 8105 + }, + { + "epoch": 0.72, + "learning_rate": 0.00038099373858428064, + "loss": 1.4062, + "step": 8106 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003807674629863538, + "loss": 1.666, + "step": 8107 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003805412388000703, + "loss": 1.625, + "step": 8108 + }, + { + "epoch": 0.72, + "learning_rate": 0.000380315066044212, + "loss": 1.5508, + "step": 8109 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003800889447375575, + "loss": 1.5742, + "step": 8110 + }, + { + "epoch": 0.72, + "learning_rate": 0.00037986287489888014, + "loss": 1.5469, + "step": 8111 + }, + { + "epoch": 0.72, + "learning_rate": 0.00037963685654694945, + "loss": 1.6191, + "step": 8112 + }, + { + "epoch": 0.72, + "learning_rate": 0.00037941088970053005, + "loss": 1.5508, + "step": 8113 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003791849743783835, + "loss": 1.5957, + "step": 8114 + }, + { + "epoch": 0.72, + "learning_rate": 0.00037895911059926613, + "loss": 1.541, + "step": 8115 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003787332983819298, + "loss": 1.5625, + "step": 8116 + }, + { + "epoch": 0.72, + "learning_rate": 0.00037850753774512326, + "loss": 1.6992, + "step": 8117 + }, + { + "epoch": 0.72, + "learning_rate": 0.00037828182870758986, + "loss": 1.5586, + "step": 8118 + }, + { + "epoch": 0.72, + "learning_rate": 0.000378056171288069, + "loss": 1.5449, + "step": 8119 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003778305655052955, + "loss": 1.6855, + "step": 8120 + }, + { + "epoch": 0.72, + "learning_rate": 0.00037760501137800096, + "loss": 1.6035, + "step": 8121 + }, + { + "epoch": 0.72, + "learning_rate": 0.00037737950892491166, + "loss": 1.5801, + "step": 8122 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003771540581647498, + "loss": 1.5195, + "step": 8123 + }, + { + "epoch": 0.72, + "learning_rate": 0.00037692865911623306, + "loss": 1.4941, + "step": 8124 + }, + { + "epoch": 0.72, + "learning_rate": 0.00037670331179807595, + "loss": 1.6074, + "step": 8125 + }, + { + "epoch": 0.72, + "learning_rate": 0.00037647801622898736, + "loss": 1.623, + "step": 8126 + }, + { + "epoch": 0.72, + "learning_rate": 0.00037625277242767243, + "loss": 1.6348, + "step": 8127 + }, + { + "epoch": 0.72, + "learning_rate": 0.00037602758041283213, + "loss": 1.4102, + "step": 8128 + }, + { + "epoch": 0.72, + "learning_rate": 0.00037580244020316277, + "loss": 1.6445, + "step": 8129 + }, + { + "epoch": 0.72, + "learning_rate": 0.00037557735181735675, + "loss": 1.6035, + "step": 8130 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003753523152741015, + "loss": 1.5566, + "step": 8131 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003751273305920814, + "loss": 1.5078, + "step": 8132 + }, + { + "epoch": 0.72, + "learning_rate": 0.00037490239778997537, + "loss": 1.5234, + "step": 8133 + }, + { + "epoch": 0.72, + "learning_rate": 0.00037467751688645846, + "loss": 1.5625, + "step": 8134 + }, + { + "epoch": 0.72, + "learning_rate": 0.00037445268790020126, + "loss": 1.6113, + "step": 8135 + }, + { + "epoch": 0.72, + "learning_rate": 0.00037422791084986985, + "loss": 1.5215, + "step": 8136 + }, + { + "epoch": 0.72, + "learning_rate": 0.00037400318575412706, + "loss": 1.6426, + "step": 8137 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003737785126316301, + "loss": 1.5488, + "step": 8138 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003735538915010327, + "loss": 1.4766, + "step": 8139 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003733293223809834, + "loss": 1.5898, + "step": 8140 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003731048052901277, + "loss": 1.7754, + "step": 8141 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003728803402471058, + "loss": 1.5723, + "step": 8142 + }, + { + "epoch": 0.72, + "learning_rate": 0.00037265592727055385, + "loss": 1.4043, + "step": 8143 + }, + { + "epoch": 0.72, + "learning_rate": 0.00037243156637910345, + "loss": 1.498, + "step": 8144 + }, + { + "epoch": 0.72, + "learning_rate": 0.00037220725759138263, + "loss": 1.4375, + "step": 8145 + }, + { + "epoch": 0.72, + "learning_rate": 0.00037198300092601436, + "loss": 1.5039, + "step": 8146 + }, + { + "epoch": 0.72, + "learning_rate": 0.00037175879640161737, + "loss": 1.5254, + "step": 8147 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003715346440368059, + "loss": 1.5762, + "step": 8148 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003713105438501909, + "loss": 1.6133, + "step": 8149 + }, + { + "epoch": 0.73, + "learning_rate": 0.0003710864958603779, + "loss": 1.625, + "step": 8150 + }, + { + "epoch": 0.73, + "learning_rate": 0.0003708625000859683, + "loss": 1.6504, + "step": 8151 + }, + { + "epoch": 0.73, + "learning_rate": 0.0003706385565455591, + "loss": 1.5664, + "step": 8152 + }, + { + "epoch": 0.73, + "learning_rate": 0.0003704146652577437, + "loss": 1.5254, + "step": 8153 + }, + { + "epoch": 0.73, + "learning_rate": 0.0003701908262411104, + "loss": 1.6484, + "step": 8154 + }, + { + "epoch": 0.73, + "learning_rate": 0.0003699670395142435, + "loss": 1.6973, + "step": 8155 + }, + { + "epoch": 0.73, + "learning_rate": 0.0003697433050957222, + "loss": 1.5684, + "step": 8156 + }, + { + "epoch": 0.73, + "learning_rate": 0.0003695196230041229, + "loss": 1.7812, + "step": 8157 + }, + { + "epoch": 0.73, + "learning_rate": 0.0003692959932580163, + "loss": 1.6602, + "step": 8158 + }, + { + "epoch": 0.73, + "learning_rate": 0.0003690724158759694, + "loss": 1.709, + "step": 8159 + }, + { + "epoch": 0.73, + "learning_rate": 0.00036884889087654405, + "loss": 1.627, + "step": 8160 + }, + { + "epoch": 0.73, + "learning_rate": 0.00036862541827829923, + "loss": 1.5703, + "step": 8161 + }, + { + "epoch": 0.73, + "learning_rate": 0.00036840199809978834, + "loss": 1.5977, + "step": 8162 + }, + { + "epoch": 0.73, + "learning_rate": 0.00036817863035956065, + "loss": 1.6016, + "step": 8163 + }, + { + "epoch": 0.73, + "learning_rate": 0.00036795531507616107, + "loss": 1.5664, + "step": 8164 + }, + { + "epoch": 0.73, + "learning_rate": 0.000367732052268131, + "loss": 1.5957, + "step": 8165 + }, + { + "epoch": 0.73, + "learning_rate": 0.00036750884195400625, + "loss": 1.541, + "step": 8166 + }, + { + "epoch": 0.73, + "learning_rate": 0.0003672856841523189, + "loss": 1.6758, + "step": 8167 + }, + { + "epoch": 0.73, + "learning_rate": 0.00036706257888159655, + "loss": 1.4629, + "step": 8168 + }, + { + "epoch": 0.73, + "learning_rate": 0.00036683952616036256, + "loss": 1.666, + "step": 8169 + }, + { + "epoch": 0.73, + "learning_rate": 0.0003666165260071357, + "loss": 1.7168, + "step": 8170 + }, + { + "epoch": 0.73, + "learning_rate": 0.00036639357844043033, + "loss": 1.5312, + "step": 8171 + }, + { + "epoch": 0.73, + "learning_rate": 0.0003661706834787571, + "loss": 1.5645, + "step": 8172 + }, + { + "epoch": 0.73, + "learning_rate": 0.0003659478411406216, + "loss": 1.4688, + "step": 8173 + }, + { + "epoch": 0.73, + "learning_rate": 0.0003657250514445254, + "loss": 1.6191, + "step": 8174 + }, + { + "epoch": 0.73, + "learning_rate": 0.0003655023144089652, + "loss": 1.752, + "step": 8175 + }, + { + "epoch": 0.73, + "learning_rate": 0.0003652796300524337, + "loss": 1.5234, + "step": 8176 + }, + { + "epoch": 0.73, + "learning_rate": 0.00036505699839341975, + "loss": 1.5195, + "step": 8177 + }, + { + "epoch": 0.73, + "learning_rate": 0.00036483441945040694, + "loss": 1.5508, + "step": 8178 + }, + { + "epoch": 0.73, + "learning_rate": 0.0003646118932418748, + "loss": 1.6797, + "step": 8179 + }, + { + "epoch": 0.73, + "learning_rate": 0.00036438941978629834, + "loss": 1.5801, + "step": 8180 + }, + { + "epoch": 0.73, + "learning_rate": 0.000364166999102149, + "loss": 1.5156, + "step": 8181 + }, + { + "epoch": 0.73, + "learning_rate": 0.0003639446312078927, + "loss": 1.541, + "step": 8182 + }, + { + "epoch": 0.73, + "learning_rate": 0.0003637223161219917, + "loss": 1.5586, + "step": 8183 + }, + { + "epoch": 0.73, + "learning_rate": 0.0003635000538629032, + "loss": 1.625, + "step": 8184 + }, + { + "epoch": 0.73, + "learning_rate": 0.0003632778444490811, + "loss": 1.4434, + "step": 8185 + }, + { + "epoch": 0.73, + "learning_rate": 0.00036305568789897415, + "loss": 1.543, + "step": 8186 + }, + { + "epoch": 0.73, + "learning_rate": 0.00036283358423102664, + "loss": 1.5684, + "step": 8187 + }, + { + "epoch": 0.73, + "learning_rate": 0.0003626115334636785, + "loss": 1.6113, + "step": 8188 + }, + { + "epoch": 0.73, + "learning_rate": 0.00036238953561536593, + "loss": 1.6152, + "step": 8189 + }, + { + "epoch": 0.73, + "learning_rate": 0.0003621675907045201, + "loss": 1.5391, + "step": 8190 + }, + { + "epoch": 0.73, + "learning_rate": 0.00036194569874956783, + "loss": 1.5605, + "step": 8191 + }, + { + "epoch": 0.73, + "learning_rate": 0.0003617238597689314, + "loss": 1.6133, + "step": 8192 + }, + { + "epoch": 0.73, + "learning_rate": 0.0003615020737810296, + "loss": 1.5312, + "step": 8193 + }, + { + "epoch": 0.73, + "learning_rate": 0.00036128034080427576, + "loss": 1.4629, + "step": 8194 + }, + { + "epoch": 0.73, + "learning_rate": 0.00036105866085707917, + "loss": 1.7285, + "step": 8195 + }, + { + "epoch": 0.73, + "learning_rate": 0.0003608370339578446, + "loss": 1.4902, + "step": 8196 + }, + { + "epoch": 0.73, + "learning_rate": 0.00036061546012497306, + "loss": 1.4375, + "step": 8197 + }, + { + "epoch": 0.73, + "learning_rate": 0.0003603939393768605, + "loss": 1.5879, + "step": 8198 + }, + { + "epoch": 0.73, + "learning_rate": 0.00036017247173189814, + "loss": 1.7324, + "step": 8199 + }, + { + "epoch": 0.73, + "learning_rate": 0.00035995105720847397, + "loss": 1.5996, + "step": 8200 + }, + { + "epoch": 0.73, + "learning_rate": 0.0003597296958249707, + "loss": 1.543, + "step": 8201 + }, + { + "epoch": 0.73, + "learning_rate": 0.00035950838759976666, + "loss": 1.5488, + "step": 8202 + }, + { + "epoch": 0.73, + "learning_rate": 0.0003592871325512357, + "loss": 1.623, + "step": 8203 + }, + { + "epoch": 0.73, + "learning_rate": 0.000359065930697748, + "loss": 1.5625, + "step": 8204 + }, + { + "epoch": 0.73, + "learning_rate": 0.00035884478205766855, + "loss": 1.4863, + "step": 8205 + }, + { + "epoch": 0.73, + "learning_rate": 0.00035862368664935806, + "loss": 1.6875, + "step": 8206 + }, + { + "epoch": 0.73, + "learning_rate": 0.0003584026444911731, + "loss": 1.6016, + "step": 8207 + }, + { + "epoch": 0.73, + "learning_rate": 0.00035818165560146554, + "loss": 1.5996, + "step": 8208 + }, + { + "epoch": 0.73, + "learning_rate": 0.00035796071999858294, + "loss": 1.541, + "step": 8209 + }, + { + "epoch": 0.73, + "learning_rate": 0.00035773983770086813, + "loss": 1.6406, + "step": 8210 + }, + { + "epoch": 0.73, + "learning_rate": 0.00035751900872666044, + "loss": 1.5781, + "step": 8211 + }, + { + "epoch": 0.73, + "learning_rate": 0.0003572982330942939, + "loss": 1.7305, + "step": 8212 + }, + { + "epoch": 0.73, + "learning_rate": 0.0003570775108220982, + "loss": 1.541, + "step": 8213 + }, + { + "epoch": 0.73, + "learning_rate": 0.00035685684192839886, + "loss": 1.5801, + "step": 8214 + }, + { + "epoch": 0.73, + "learning_rate": 0.0003566362264315165, + "loss": 1.5039, + "step": 8215 + }, + { + "epoch": 0.73, + "learning_rate": 0.00035641566434976846, + "loss": 1.6484, + "step": 8216 + }, + { + "epoch": 0.73, + "learning_rate": 0.00035619515570146633, + "loss": 1.5332, + "step": 8217 + }, + { + "epoch": 0.73, + "learning_rate": 0.00035597470050491786, + "loss": 1.6641, + "step": 8218 + }, + { + "epoch": 0.73, + "learning_rate": 0.000355754298778426, + "loss": 1.6523, + "step": 8219 + }, + { + "epoch": 0.73, + "learning_rate": 0.00035553395054029025, + "loss": 1.5723, + "step": 8220 + }, + { + "epoch": 0.73, + "learning_rate": 0.0003553136558088046, + "loss": 1.5156, + "step": 8221 + }, + { + "epoch": 0.73, + "learning_rate": 0.00035509341460225884, + "loss": 1.6406, + "step": 8222 + }, + { + "epoch": 0.73, + "learning_rate": 0.0003548732269389384, + "loss": 1.5996, + "step": 8223 + }, + { + "epoch": 0.73, + "learning_rate": 0.0003546530928371248, + "loss": 1.6934, + "step": 8224 + }, + { + "epoch": 0.73, + "learning_rate": 0.0003544330123150942, + "loss": 1.6562, + "step": 8225 + }, + { + "epoch": 0.73, + "learning_rate": 0.0003542129853911189, + "loss": 1.5215, + "step": 8226 + }, + { + "epoch": 0.73, + "learning_rate": 0.00035399301208346626, + "loss": 1.5273, + "step": 8227 + }, + { + "epoch": 0.73, + "learning_rate": 0.0003537730924104, + "loss": 1.6738, + "step": 8228 + }, + { + "epoch": 0.73, + "learning_rate": 0.00035355322639017873, + "loss": 1.5879, + "step": 8229 + }, + { + "epoch": 0.73, + "learning_rate": 0.0003533334140410568, + "loss": 1.6211, + "step": 8230 + }, + { + "epoch": 0.73, + "learning_rate": 0.0003531136553812836, + "loss": 1.709, + "step": 8231 + }, + { + "epoch": 0.73, + "learning_rate": 0.00035289395042910533, + "loss": 1.5078, + "step": 8232 + }, + { + "epoch": 0.73, + "learning_rate": 0.0003526742992027625, + "loss": 1.623, + "step": 8233 + }, + { + "epoch": 0.73, + "learning_rate": 0.00035245470172049176, + "loss": 1.6602, + "step": 8234 + }, + { + "epoch": 0.73, + "learning_rate": 0.0003522351580005247, + "loss": 1.543, + "step": 8235 + }, + { + "epoch": 0.73, + "learning_rate": 0.00035201566806108964, + "loss": 1.498, + "step": 8236 + }, + { + "epoch": 0.73, + "learning_rate": 0.00035179623192040923, + "loss": 1.6914, + "step": 8237 + }, + { + "epoch": 0.73, + "learning_rate": 0.0003515768495967022, + "loss": 1.666, + "step": 8238 + }, + { + "epoch": 0.73, + "learning_rate": 0.0003513575211081823, + "loss": 1.6152, + "step": 8239 + }, + { + "epoch": 0.73, + "learning_rate": 0.00035113824647306003, + "loss": 1.5098, + "step": 8240 + }, + { + "epoch": 0.73, + "learning_rate": 0.0003509190257095402, + "loss": 1.6367, + "step": 8241 + }, + { + "epoch": 0.73, + "learning_rate": 0.0003506998588358235, + "loss": 1.627, + "step": 8242 + }, + { + "epoch": 0.73, + "learning_rate": 0.00035048074587010593, + "loss": 1.4863, + "step": 8243 + }, + { + "epoch": 0.73, + "learning_rate": 0.00035026168683058, + "loss": 1.6562, + "step": 8244 + }, + { + "epoch": 0.73, + "learning_rate": 0.00035004268173543276, + "loss": 1.498, + "step": 8245 + }, + { + "epoch": 0.73, + "learning_rate": 0.0003498237306028469, + "loss": 1.5762, + "step": 8246 + }, + { + "epoch": 0.73, + "learning_rate": 0.0003496048334510009, + "loss": 1.5957, + "step": 8247 + }, + { + "epoch": 0.73, + "learning_rate": 0.0003493859902980686, + "loss": 1.5898, + "step": 8248 + }, + { + "epoch": 0.73, + "learning_rate": 0.0003491672011622195, + "loss": 1.6934, + "step": 8249 + }, + { + "epoch": 0.73, + "learning_rate": 0.00034894846606161803, + "loss": 1.4766, + "step": 8250 + }, + { + "epoch": 0.73, + "learning_rate": 0.0003487297850144254, + "loss": 1.5898, + "step": 8251 + }, + { + "epoch": 0.73, + "learning_rate": 0.0003485111580387972, + "loss": 1.5234, + "step": 8252 + }, + { + "epoch": 0.73, + "learning_rate": 0.00034829258515288485, + "loss": 1.6387, + "step": 8253 + }, + { + "epoch": 0.73, + "learning_rate": 0.0003480740663748353, + "loss": 1.5469, + "step": 8254 + }, + { + "epoch": 0.73, + "learning_rate": 0.00034785560172279073, + "loss": 1.6133, + "step": 8255 + }, + { + "epoch": 0.73, + "learning_rate": 0.00034763719121488993, + "loss": 1.5723, + "step": 8256 + }, + { + "epoch": 0.73, + "learning_rate": 0.00034741883486926573, + "loss": 1.4434, + "step": 8257 + }, + { + "epoch": 0.73, + "learning_rate": 0.0003472005327040474, + "loss": 1.457, + "step": 8258 + }, + { + "epoch": 0.73, + "learning_rate": 0.00034698228473735893, + "loss": 1.5586, + "step": 8259 + }, + { + "epoch": 0.73, + "learning_rate": 0.0003467640909873211, + "loss": 1.6719, + "step": 8260 + }, + { + "epoch": 0.73, + "learning_rate": 0.0003465459514720489, + "loss": 1.5469, + "step": 8261 + }, + { + "epoch": 0.73, + "learning_rate": 0.0003463278662096535, + "loss": 1.6055, + "step": 8262 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003461098352182409, + "loss": 1.4355, + "step": 8263 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003458918585159139, + "loss": 1.6562, + "step": 8264 + }, + { + "epoch": 0.74, + "learning_rate": 0.00034567393612076945, + "loss": 1.5742, + "step": 8265 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003454560680509006, + "loss": 1.7188, + "step": 8266 + }, + { + "epoch": 0.74, + "learning_rate": 0.00034523825432439546, + "loss": 1.5293, + "step": 8267 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003450204949593386, + "loss": 1.5918, + "step": 8268 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003448027899738092, + "loss": 1.5312, + "step": 8269 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003445851393858821, + "loss": 1.6035, + "step": 8270 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003443675432136274, + "loss": 1.5703, + "step": 8271 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003441500014751115, + "loss": 1.6406, + "step": 8272 + }, + { + "epoch": 0.74, + "learning_rate": 0.00034393251418839556, + "loss": 1.5508, + "step": 8273 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003437150813715364, + "loss": 1.4941, + "step": 8274 + }, + { + "epoch": 0.74, + "learning_rate": 0.00034349770304258596, + "loss": 1.5664, + "step": 8275 + }, + { + "epoch": 0.74, + "learning_rate": 0.00034328037921959275, + "loss": 1.6113, + "step": 8276 + }, + { + "epoch": 0.74, + "learning_rate": 0.00034306310992059975, + "loss": 1.5332, + "step": 8277 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003428458951636455, + "loss": 1.6465, + "step": 8278 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003426287349667639, + "loss": 1.5508, + "step": 8279 + }, + { + "epoch": 0.74, + "learning_rate": 0.00034241162934798555, + "loss": 1.5977, + "step": 8280 + }, + { + "epoch": 0.74, + "learning_rate": 0.000342194578325335, + "loss": 1.6777, + "step": 8281 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003419775819168328, + "loss": 1.7012, + "step": 8282 + }, + { + "epoch": 0.74, + "learning_rate": 0.000341760640140495, + "loss": 1.5625, + "step": 8283 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003415437530143337, + "loss": 1.5098, + "step": 8284 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003413269205563554, + "loss": 1.6074, + "step": 8285 + }, + { + "epoch": 0.74, + "learning_rate": 0.00034111014278456276, + "loss": 1.5977, + "step": 8286 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003408934197169536, + "loss": 1.6484, + "step": 8287 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003406767513715214, + "loss": 1.459, + "step": 8288 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003404601377662546, + "loss": 1.5918, + "step": 8289 + }, + { + "epoch": 0.74, + "learning_rate": 0.00034024357891913816, + "loss": 1.582, + "step": 8290 + }, + { + "epoch": 0.74, + "learning_rate": 0.00034002707484815146, + "loss": 1.6387, + "step": 8291 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003398106255712699, + "loss": 1.5977, + "step": 8292 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003395942311064638, + "loss": 1.5488, + "step": 8293 + }, + { + "epoch": 0.74, + "learning_rate": 0.00033937789147169917, + "loss": 1.5059, + "step": 8294 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003391616066849381, + "loss": 1.6289, + "step": 8295 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003389453767641375, + "loss": 1.6875, + "step": 8296 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003387292017272495, + "loss": 1.5273, + "step": 8297 + }, + { + "epoch": 0.74, + "learning_rate": 0.00033851308159222183, + "loss": 1.6484, + "step": 8298 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003382970163769985, + "loss": 1.6211, + "step": 8299 + }, + { + "epoch": 0.74, + "learning_rate": 0.00033808100609951776, + "loss": 1.5566, + "step": 8300 + }, + { + "epoch": 0.74, + "learning_rate": 0.00033786505077771403, + "loss": 1.5801, + "step": 8301 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003376491504295165, + "loss": 1.5117, + "step": 8302 + }, + { + "epoch": 0.74, + "learning_rate": 0.00033743330507285087, + "loss": 1.5605, + "step": 8303 + }, + { + "epoch": 0.74, + "learning_rate": 0.00033721751472563745, + "loss": 1.5508, + "step": 8304 + }, + { + "epoch": 0.74, + "learning_rate": 0.00033700177940579215, + "loss": 1.4609, + "step": 8305 + }, + { + "epoch": 0.74, + "learning_rate": 0.00033678609913122596, + "loss": 1.4805, + "step": 8306 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003365704739198463, + "loss": 1.5996, + "step": 8307 + }, + { + "epoch": 0.74, + "learning_rate": 0.00033635490378955527, + "loss": 1.625, + "step": 8308 + }, + { + "epoch": 0.74, + "learning_rate": 0.00033613938875825037, + "loss": 1.6035, + "step": 8309 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003359239288438244, + "loss": 1.7031, + "step": 8310 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003357085240641665, + "loss": 1.6016, + "step": 8311 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003354931744371603, + "loss": 1.5977, + "step": 8312 + }, + { + "epoch": 0.74, + "learning_rate": 0.00033527787998068525, + "loss": 1.6074, + "step": 8313 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003350626407126156, + "loss": 1.4922, + "step": 8314 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003348474566508224, + "loss": 1.6523, + "step": 8315 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003346323278131709, + "loss": 1.5645, + "step": 8316 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003344172542175219, + "loss": 1.6348, + "step": 8317 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003342022358817317, + "loss": 1.5625, + "step": 8318 + }, + { + "epoch": 0.74, + "learning_rate": 0.00033398727282365283, + "loss": 1.5781, + "step": 8319 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003337723650611322, + "loss": 1.4961, + "step": 8320 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003335575126120124, + "loss": 1.6035, + "step": 8321 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003333427154941312, + "loss": 1.6465, + "step": 8322 + }, + { + "epoch": 0.74, + "learning_rate": 0.00033312797372532277, + "loss": 1.5723, + "step": 8323 + }, + { + "epoch": 0.74, + "learning_rate": 0.00033291328732341566, + "loss": 1.543, + "step": 8324 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003326986563062342, + "loss": 1.6953, + "step": 8325 + }, + { + "epoch": 0.74, + "learning_rate": 0.00033248408069159807, + "loss": 1.5098, + "step": 8326 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003322695604973223, + "loss": 1.6973, + "step": 8327 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003320550957412171, + "loss": 1.7168, + "step": 8328 + }, + { + "epoch": 0.74, + "learning_rate": 0.000331840686441089, + "loss": 1.6895, + "step": 8329 + }, + { + "epoch": 0.74, + "learning_rate": 0.00033162633261473893, + "loss": 1.6602, + "step": 8330 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003314120342799637, + "loss": 1.623, + "step": 8331 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003311977914545552, + "loss": 1.5996, + "step": 8332 + }, + { + "epoch": 0.74, + "learning_rate": 0.000330983604156301, + "loss": 1.6699, + "step": 8333 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003307694724029836, + "loss": 1.6934, + "step": 8334 + }, + { + "epoch": 0.74, + "learning_rate": 0.00033055539621238187, + "loss": 1.5605, + "step": 8335 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003303413756022692, + "loss": 1.6035, + "step": 8336 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003301274105904145, + "loss": 1.668, + "step": 8337 + }, + { + "epoch": 0.74, + "learning_rate": 0.00032991350119458186, + "loss": 1.5312, + "step": 8338 + }, + { + "epoch": 0.74, + "learning_rate": 0.00032969964743253177, + "loss": 1.6035, + "step": 8339 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003294858493220191, + "loss": 1.541, + "step": 8340 + }, + { + "epoch": 0.74, + "learning_rate": 0.00032927210688079423, + "loss": 1.6523, + "step": 8341 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003290584201266028, + "loss": 1.668, + "step": 8342 + }, + { + "epoch": 0.74, + "learning_rate": 0.00032884478907718685, + "loss": 1.4883, + "step": 8343 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003286312137502827, + "loss": 1.5391, + "step": 8344 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003284176941636224, + "loss": 1.4922, + "step": 8345 + }, + { + "epoch": 0.74, + "learning_rate": 0.00032820423033493295, + "loss": 1.623, + "step": 8346 + }, + { + "epoch": 0.74, + "learning_rate": 0.00032799082228193786, + "loss": 1.5527, + "step": 8347 + }, + { + "epoch": 0.74, + "learning_rate": 0.00032777747002235493, + "loss": 1.5508, + "step": 8348 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003275641735738978, + "loss": 1.4766, + "step": 8349 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003273509329542748, + "loss": 1.582, + "step": 8350 + }, + { + "epoch": 0.74, + "learning_rate": 0.000327137748181191, + "loss": 1.502, + "step": 8351 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003269246192723457, + "loss": 1.6309, + "step": 8352 + }, + { + "epoch": 0.74, + "learning_rate": 0.00032671154624543385, + "loss": 1.627, + "step": 8353 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003264985291181455, + "loss": 1.6074, + "step": 8354 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003262855679081669, + "loss": 1.6035, + "step": 8355 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003260726626331788, + "loss": 1.5234, + "step": 8356 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003258598133108577, + "loss": 1.6172, + "step": 8357 + }, + { + "epoch": 0.74, + "learning_rate": 0.00032564701995887503, + "loss": 1.5898, + "step": 8358 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003254342825948985, + "loss": 1.6484, + "step": 8359 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003252216012365903, + "loss": 1.5742, + "step": 8360 + }, + { + "epoch": 0.74, + "learning_rate": 0.00032500897590160817, + "loss": 1.6191, + "step": 8361 + }, + { + "epoch": 0.74, + "learning_rate": 0.00032479640660760514, + "loss": 1.5625, + "step": 8362 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003245838933722302, + "loss": 1.6504, + "step": 8363 + }, + { + "epoch": 0.74, + "learning_rate": 0.00032437143621312693, + "loss": 1.7148, + "step": 8364 + }, + { + "epoch": 0.74, + "learning_rate": 0.00032415903514793456, + "loss": 1.5703, + "step": 8365 + }, + { + "epoch": 0.74, + "learning_rate": 0.00032394669019428755, + "loss": 1.6191, + "step": 8366 + }, + { + "epoch": 0.74, + "learning_rate": 0.000323734401369816, + "loss": 1.6465, + "step": 8367 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003235221686921448, + "loss": 1.6387, + "step": 8368 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003233099921788949, + "loss": 1.5898, + "step": 8369 + }, + { + "epoch": 0.74, + "learning_rate": 0.00032309787184768216, + "loss": 1.5938, + "step": 8370 + }, + { + "epoch": 0.74, + "learning_rate": 0.00032288580771611765, + "loss": 1.5918, + "step": 8371 + }, + { + "epoch": 0.74, + "learning_rate": 0.000322673799801808, + "loss": 1.5039, + "step": 8372 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003224618481223548, + "loss": 1.4512, + "step": 8373 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003222499526953561, + "loss": 1.7031, + "step": 8374 + }, + { + "epoch": 0.75, + "learning_rate": 0.00032203811353840385, + "loss": 1.5977, + "step": 8375 + }, + { + "epoch": 0.75, + "learning_rate": 0.00032182633066908607, + "loss": 1.748, + "step": 8376 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003216146041049858, + "loss": 1.7109, + "step": 8377 + }, + { + "epoch": 0.75, + "learning_rate": 0.000321402933863682, + "loss": 1.5723, + "step": 8378 + }, + { + "epoch": 0.75, + "learning_rate": 0.00032119131996274843, + "loss": 1.6914, + "step": 8379 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003209797624197542, + "loss": 1.5625, + "step": 8380 + }, + { + "epoch": 0.75, + "learning_rate": 0.00032076826125226345, + "loss": 1.5625, + "step": 8381 + }, + { + "epoch": 0.75, + "learning_rate": 0.00032055681647783673, + "loss": 1.5957, + "step": 8382 + }, + { + "epoch": 0.75, + "learning_rate": 0.000320345428114029, + "loss": 1.6504, + "step": 8383 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003201340961783904, + "loss": 1.5625, + "step": 8384 + }, + { + "epoch": 0.75, + "learning_rate": 0.00031992282068846667, + "loss": 1.5898, + "step": 8385 + }, + { + "epoch": 0.75, + "learning_rate": 0.00031971160166179946, + "loss": 1.5957, + "step": 8386 + }, + { + "epoch": 0.75, + "learning_rate": 0.00031950043911592495, + "loss": 1.6387, + "step": 8387 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003192893330683747, + "loss": 1.7051, + "step": 8388 + }, + { + "epoch": 0.75, + "learning_rate": 0.00031907828353667557, + "loss": 1.6016, + "step": 8389 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003188672905383504, + "loss": 1.6445, + "step": 8390 + }, + { + "epoch": 0.75, + "learning_rate": 0.00031865635409091664, + "loss": 1.543, + "step": 8391 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003184454742118872, + "loss": 1.502, + "step": 8392 + }, + { + "epoch": 0.75, + "learning_rate": 0.00031823465091877003, + "loss": 1.5977, + "step": 8393 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003180238842290694, + "loss": 1.543, + "step": 8394 + }, + { + "epoch": 0.75, + "learning_rate": 0.00031781317416028354, + "loss": 1.5488, + "step": 8395 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003176025207299069, + "loss": 1.6523, + "step": 8396 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003173919239554286, + "loss": 1.5605, + "step": 8397 + }, + { + "epoch": 0.75, + "learning_rate": 0.00031718138385433394, + "loss": 1.6602, + "step": 8398 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003169709004441027, + "loss": 1.6973, + "step": 8399 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003167604737422102, + "loss": 1.4844, + "step": 8400 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003165501037661267, + "loss": 1.4668, + "step": 8401 + }, + { + "epoch": 0.75, + "learning_rate": 0.00031633979053331883, + "loss": 1.5234, + "step": 8402 + }, + { + "epoch": 0.75, + "learning_rate": 0.00031612953406124744, + "loss": 1.4785, + "step": 8403 + }, + { + "epoch": 0.75, + "learning_rate": 0.000315919334367369, + "loss": 1.4922, + "step": 8404 + }, + { + "epoch": 0.75, + "learning_rate": 0.00031570919146913537, + "loss": 1.6367, + "step": 8405 + }, + { + "epoch": 0.75, + "learning_rate": 0.00031549910538399353, + "loss": 1.6328, + "step": 8406 + }, + { + "epoch": 0.75, + "learning_rate": 0.00031528907612938563, + "loss": 1.4375, + "step": 8407 + }, + { + "epoch": 0.75, + "learning_rate": 0.00031507910372274985, + "loss": 1.4707, + "step": 8408 + }, + { + "epoch": 0.75, + "learning_rate": 0.00031486918818151877, + "loss": 1.6055, + "step": 8409 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003146593295231206, + "loss": 1.6543, + "step": 8410 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003144495277649789, + "loss": 1.623, + "step": 8411 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003142397829245123, + "loss": 1.5781, + "step": 8412 + }, + { + "epoch": 0.75, + "learning_rate": 0.00031403009501913446, + "loss": 1.459, + "step": 8413 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003138204640662555, + "loss": 1.4707, + "step": 8414 + }, + { + "epoch": 0.75, + "learning_rate": 0.00031361089008327946, + "loss": 1.709, + "step": 8415 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003134013730876063, + "loss": 1.6562, + "step": 8416 + }, + { + "epoch": 0.75, + "learning_rate": 0.00031319191309663074, + "loss": 1.6152, + "step": 8417 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003129825101277439, + "loss": 1.582, + "step": 8418 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003127731641983309, + "loss": 1.6523, + "step": 8419 + }, + { + "epoch": 0.75, + "learning_rate": 0.00031256387532577277, + "loss": 1.6289, + "step": 8420 + }, + { + "epoch": 0.75, + "learning_rate": 0.00031235464352744545, + "loss": 1.5215, + "step": 8421 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003121454688207207, + "loss": 1.5371, + "step": 8422 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003119363512229653, + "loss": 1.6465, + "step": 8423 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003117272907515407, + "loss": 1.5391, + "step": 8424 + }, + { + "epoch": 0.75, + "learning_rate": 0.00031151828742380415, + "loss": 1.6543, + "step": 8425 + }, + { + "epoch": 0.75, + "learning_rate": 0.00031130934125710863, + "loss": 1.541, + "step": 8426 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003111004522688016, + "loss": 1.5449, + "step": 8427 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003108916204762259, + "loss": 1.4492, + "step": 8428 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003106828458967194, + "loss": 1.6855, + "step": 8429 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003104741285476164, + "loss": 1.6367, + "step": 8430 + }, + { + "epoch": 0.75, + "learning_rate": 0.00031026546844624506, + "loss": 1.5762, + "step": 8431 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003100568656099295, + "loss": 1.6309, + "step": 8432 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003098483200559885, + "loss": 1.582, + "step": 8433 + }, + { + "epoch": 0.75, + "learning_rate": 0.00030963983180173725, + "loss": 1.4648, + "step": 8434 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003094314008644851, + "loss": 1.6016, + "step": 8435 + }, + { + "epoch": 0.75, + "learning_rate": 0.00030922302726153696, + "loss": 1.6211, + "step": 8436 + }, + { + "epoch": 0.75, + "learning_rate": 0.00030901471101019265, + "loss": 1.5879, + "step": 8437 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003088064521277482, + "loss": 1.623, + "step": 8438 + }, + { + "epoch": 0.75, + "learning_rate": 0.00030859825063149406, + "loss": 1.6699, + "step": 8439 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003083901065387159, + "loss": 1.5508, + "step": 8440 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003081820198666948, + "loss": 1.5371, + "step": 8441 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003079739906327076, + "loss": 1.6562, + "step": 8442 + }, + { + "epoch": 0.75, + "learning_rate": 0.00030776601885402544, + "loss": 1.6641, + "step": 8443 + }, + { + "epoch": 0.75, + "learning_rate": 0.00030755810454791534, + "loss": 1.4902, + "step": 8444 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003073502477316392, + "loss": 1.6699, + "step": 8445 + }, + { + "epoch": 0.75, + "learning_rate": 0.00030714244842245444, + "loss": 1.5801, + "step": 8446 + }, + { + "epoch": 0.75, + "learning_rate": 0.00030693470663761303, + "loss": 1.6152, + "step": 8447 + }, + { + "epoch": 0.75, + "learning_rate": 0.00030672702239436346, + "loss": 1.6055, + "step": 8448 + }, + { + "epoch": 0.75, + "learning_rate": 0.00030651939570994837, + "loss": 1.625, + "step": 8449 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003063118266016057, + "loss": 1.5234, + "step": 8450 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003061043150865692, + "loss": 1.5781, + "step": 8451 + }, + { + "epoch": 0.75, + "learning_rate": 0.00030589686118206685, + "loss": 1.5352, + "step": 8452 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003056894649053231, + "loss": 1.502, + "step": 8453 + }, + { + "epoch": 0.75, + "learning_rate": 0.00030548212627355686, + "loss": 1.5469, + "step": 8454 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003052748453039822, + "loss": 1.541, + "step": 8455 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003050676220138084, + "loss": 1.5195, + "step": 8456 + }, + { + "epoch": 0.75, + "learning_rate": 0.00030486045642024064, + "loss": 1.5859, + "step": 8457 + }, + { + "epoch": 0.75, + "learning_rate": 0.00030465334854047854, + "loss": 1.5371, + "step": 8458 + }, + { + "epoch": 0.75, + "learning_rate": 0.00030444629839171725, + "loss": 1.627, + "step": 8459 + }, + { + "epoch": 0.75, + "learning_rate": 0.00030423930599114647, + "loss": 1.7227, + "step": 8460 + }, + { + "epoch": 0.75, + "learning_rate": 0.00030403237135595264, + "loss": 1.6406, + "step": 8461 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003038254945033161, + "loss": 1.5977, + "step": 8462 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003036186754504127, + "loss": 1.5566, + "step": 8463 + }, + { + "epoch": 0.75, + "learning_rate": 0.00030341191421441306, + "loss": 1.4863, + "step": 8464 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003032052108124843, + "loss": 1.5371, + "step": 8465 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003029985652617877, + "loss": 1.5332, + "step": 8466 + }, + { + "epoch": 0.75, + "learning_rate": 0.00030279197757947983, + "loss": 1.5625, + "step": 8467 + }, + { + "epoch": 0.75, + "learning_rate": 0.00030258544778271224, + "loss": 1.5957, + "step": 8468 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003023789758886327, + "loss": 1.6309, + "step": 8469 + }, + { + "epoch": 0.75, + "learning_rate": 0.00030217256191438315, + "loss": 1.5371, + "step": 8470 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003019662058771011, + "loss": 1.6641, + "step": 8471 + }, + { + "epoch": 0.75, + "learning_rate": 0.00030175990779391894, + "loss": 1.668, + "step": 8472 + }, + { + "epoch": 0.75, + "learning_rate": 0.00030155366768196514, + "loss": 1.5059, + "step": 8473 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003013474855583622, + "loss": 1.5059, + "step": 8474 + }, + { + "epoch": 0.75, + "learning_rate": 0.00030114136144022876, + "loss": 1.5898, + "step": 8475 + }, + { + "epoch": 0.75, + "learning_rate": 0.00030093529534467743, + "loss": 1.502, + "step": 8476 + }, + { + "epoch": 0.75, + "learning_rate": 0.00030072928728881786, + "loss": 1.6523, + "step": 8477 + }, + { + "epoch": 0.75, + "learning_rate": 0.00030052333728975333, + "loss": 1.5078, + "step": 8478 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003003174453645827, + "loss": 1.4902, + "step": 8479 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003001116115304, + "loss": 1.541, + "step": 8480 + }, + { + "epoch": 0.75, + "learning_rate": 0.00029990583580429496, + "loss": 1.6152, + "step": 8481 + }, + { + "epoch": 0.75, + "learning_rate": 0.00029970011820335186, + "loss": 1.5898, + "step": 8482 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002994944587446504, + "loss": 1.6738, + "step": 8483 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002992888574452652, + "loss": 1.5742, + "step": 8484 + }, + { + "epoch": 0.75, + "learning_rate": 0.00029908331432226663, + "loss": 1.6348, + "step": 8485 + }, + { + "epoch": 0.75, + "learning_rate": 0.00029887782939271913, + "loss": 1.6055, + "step": 8486 + }, + { + "epoch": 0.76, + "learning_rate": 0.000298672402673684, + "loss": 1.4766, + "step": 8487 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002984670341822162, + "loss": 1.625, + "step": 8488 + }, + { + "epoch": 0.76, + "learning_rate": 0.00029826172393536654, + "loss": 1.6426, + "step": 8489 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002980564719501808, + "loss": 1.5898, + "step": 8490 + }, + { + "epoch": 0.76, + "learning_rate": 0.00029785127824370007, + "loss": 1.6191, + "step": 8491 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002976461428329601, + "loss": 1.709, + "step": 8492 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002974410657349929, + "loss": 1.6133, + "step": 8493 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002972360469668245, + "loss": 1.6387, + "step": 8494 + }, + { + "epoch": 0.76, + "learning_rate": 0.00029703108654547674, + "loss": 1.623, + "step": 8495 + }, + { + "epoch": 0.76, + "learning_rate": 0.00029682618448796595, + "loss": 1.5156, + "step": 8496 + }, + { + "epoch": 0.76, + "learning_rate": 0.00029662134081130475, + "loss": 1.541, + "step": 8497 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002964165555324999, + "loss": 1.6016, + "step": 8498 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002962118286685538, + "loss": 1.5742, + "step": 8499 + }, + { + "epoch": 0.76, + "learning_rate": 0.00029600716023646335, + "loss": 1.4492, + "step": 8500 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002958025502532218, + "loss": 1.4707, + "step": 8501 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002955979987358165, + "loss": 1.4902, + "step": 8502 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002953935057012305, + "loss": 1.5703, + "step": 8503 + }, + { + "epoch": 0.76, + "learning_rate": 0.00029518907116644113, + "loss": 1.5254, + "step": 8504 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002949846951484224, + "loss": 1.5508, + "step": 8505 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002947803776641421, + "loss": 1.752, + "step": 8506 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002945761187305639, + "loss": 1.4785, + "step": 8507 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002943719183646457, + "loss": 1.4941, + "step": 8508 + }, + { + "epoch": 0.76, + "learning_rate": 0.00029416777658334214, + "loss": 1.5547, + "step": 8509 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002939636934036015, + "loss": 1.4238, + "step": 8510 + }, + { + "epoch": 0.76, + "learning_rate": 0.00029375966884236793, + "loss": 1.6309, + "step": 8511 + }, + { + "epoch": 0.76, + "learning_rate": 0.00029355570291657995, + "loss": 1.4805, + "step": 8512 + }, + { + "epoch": 0.76, + "learning_rate": 0.00029335179564317273, + "loss": 1.6074, + "step": 8513 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002931479470390751, + "loss": 1.6523, + "step": 8514 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002929441571212116, + "loss": 1.4121, + "step": 8515 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002927404259065014, + "loss": 1.6992, + "step": 8516 + }, + { + "epoch": 0.76, + "learning_rate": 0.00029253675341186017, + "loss": 1.6895, + "step": 8517 + }, + { + "epoch": 0.76, + "learning_rate": 0.00029233313965419715, + "loss": 1.582, + "step": 8518 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002921295846504176, + "loss": 1.4805, + "step": 8519 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002919260884174211, + "loss": 1.4434, + "step": 8520 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002917226509721035, + "loss": 1.8359, + "step": 8521 + }, + { + "epoch": 0.76, + "learning_rate": 0.000291519272331355, + "loss": 1.5352, + "step": 8522 + }, + { + "epoch": 0.76, + "learning_rate": 0.000291315952512061, + "loss": 1.5762, + "step": 8523 + }, + { + "epoch": 0.76, + "learning_rate": 0.00029111269153110197, + "loss": 1.582, + "step": 8524 + }, + { + "epoch": 0.76, + "learning_rate": 0.00029090948940535376, + "loss": 1.4766, + "step": 8525 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002907063461516869, + "loss": 1.6387, + "step": 8526 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002905032617869677, + "loss": 1.4648, + "step": 8527 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002903002363280571, + "loss": 1.6426, + "step": 8528 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002900972697918112, + "loss": 1.5703, + "step": 8529 + }, + { + "epoch": 0.76, + "learning_rate": 0.00028989436219508123, + "loss": 1.6523, + "step": 8530 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002896915135547135, + "loss": 1.5312, + "step": 8531 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002894887238875492, + "loss": 1.4707, + "step": 8532 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002892859932104256, + "loss": 1.6562, + "step": 8533 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002890833215401739, + "loss": 1.6211, + "step": 8534 + }, + { + "epoch": 0.76, + "learning_rate": 0.00028888070889362093, + "loss": 1.5059, + "step": 8535 + }, + { + "epoch": 0.76, + "learning_rate": 0.00028867815528758835, + "loss": 1.6387, + "step": 8536 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002884756607388936, + "loss": 1.4434, + "step": 8537 + }, + { + "epoch": 0.76, + "learning_rate": 0.00028827322526434865, + "loss": 1.5566, + "step": 8538 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002880708488807601, + "loss": 1.4414, + "step": 8539 + }, + { + "epoch": 0.76, + "learning_rate": 0.00028786853160493085, + "loss": 1.541, + "step": 8540 + }, + { + "epoch": 0.76, + "learning_rate": 0.00028766627345365816, + "loss": 1.541, + "step": 8541 + }, + { + "epoch": 0.76, + "learning_rate": 0.00028746407444373424, + "loss": 1.5586, + "step": 8542 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002872619345919464, + "loss": 1.5312, + "step": 8543 + }, + { + "epoch": 0.76, + "learning_rate": 0.00028705985391507796, + "loss": 1.5176, + "step": 8544 + }, + { + "epoch": 0.76, + "learning_rate": 0.00028685783242990605, + "loss": 1.5332, + "step": 8545 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002866558701532038, + "loss": 1.584, + "step": 8546 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002864539671017385, + "loss": 1.6953, + "step": 8547 + }, + { + "epoch": 0.76, + "learning_rate": 0.00028625212329227393, + "loss": 1.584, + "step": 8548 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002860503387415676, + "loss": 1.5918, + "step": 8549 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002858486134663728, + "loss": 1.4258, + "step": 8550 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002856469474834373, + "loss": 1.6191, + "step": 8551 + }, + { + "epoch": 0.76, + "learning_rate": 0.00028544534080950513, + "loss": 1.5215, + "step": 8552 + }, + { + "epoch": 0.76, + "learning_rate": 0.00028524379346131425, + "loss": 1.6699, + "step": 8553 + }, + { + "epoch": 0.76, + "learning_rate": 0.00028504230545559804, + "loss": 1.584, + "step": 8554 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002848408768090848, + "loss": 1.6035, + "step": 8555 + }, + { + "epoch": 0.76, + "learning_rate": 0.00028463950753849853, + "loss": 1.5547, + "step": 8556 + }, + { + "epoch": 0.76, + "learning_rate": 0.00028443819766055776, + "loss": 1.5586, + "step": 8557 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002842369471919761, + "loss": 1.5898, + "step": 8558 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002840357561494621, + "loss": 1.5996, + "step": 8559 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002838346245497201, + "loss": 1.5898, + "step": 8560 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002836335524094488, + "loss": 1.5566, + "step": 8561 + }, + { + "epoch": 0.76, + "learning_rate": 0.00028343253974534214, + "loss": 1.5469, + "step": 8562 + }, + { + "epoch": 0.76, + "learning_rate": 0.000283231586574089, + "loss": 1.4746, + "step": 8563 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002830306929123737, + "loss": 1.625, + "step": 8564 + }, + { + "epoch": 0.76, + "learning_rate": 0.00028282985877687493, + "loss": 1.4844, + "step": 8565 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002826290841842676, + "loss": 1.5703, + "step": 8566 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002824283691512206, + "loss": 1.6367, + "step": 8567 + }, + { + "epoch": 0.76, + "learning_rate": 0.00028222771369439836, + "loss": 1.5898, + "step": 8568 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002820271178304602, + "loss": 1.6113, + "step": 8569 + }, + { + "epoch": 0.76, + "learning_rate": 0.00028182658157606047, + "loss": 1.4609, + "step": 8570 + }, + { + "epoch": 0.76, + "learning_rate": 0.00028162610494784847, + "loss": 1.5117, + "step": 8571 + }, + { + "epoch": 0.76, + "learning_rate": 0.00028142568796246913, + "loss": 1.5762, + "step": 8572 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002812253306365621, + "loss": 1.5469, + "step": 8573 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002810250329867616, + "loss": 1.5156, + "step": 8574 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002808247950296972, + "loss": 1.5137, + "step": 8575 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002806246167819941, + "loss": 1.5781, + "step": 8576 + }, + { + "epoch": 0.76, + "learning_rate": 0.00028042449826027195, + "loss": 1.5625, + "step": 8577 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002802244394811454, + "loss": 1.5449, + "step": 8578 + }, + { + "epoch": 0.76, + "learning_rate": 0.000280024440461224, + "loss": 1.6289, + "step": 8579 + }, + { + "epoch": 0.76, + "learning_rate": 0.00027982450121711325, + "loss": 1.5801, + "step": 8580 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002796246217654127, + "loss": 1.6152, + "step": 8581 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002794248021227174, + "loss": 1.6191, + "step": 8582 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002792250423056168, + "loss": 1.5957, + "step": 8583 + }, + { + "epoch": 0.76, + "learning_rate": 0.00027902534233069685, + "loss": 1.6211, + "step": 8584 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002788257022145372, + "loss": 1.5117, + "step": 8585 + }, + { + "epoch": 0.76, + "learning_rate": 0.00027862612197371273, + "loss": 1.6328, + "step": 8586 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002784266016247935, + "loss": 1.5684, + "step": 8587 + }, + { + "epoch": 0.76, + "learning_rate": 0.000278227141184345, + "loss": 1.6836, + "step": 8588 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002780277406689272, + "loss": 1.5703, + "step": 8589 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002778284000950954, + "loss": 1.6641, + "step": 8590 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002776291194793993, + "loss": 1.5215, + "step": 8591 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002774298988383849, + "loss": 1.3984, + "step": 8592 + }, + { + "epoch": 0.76, + "learning_rate": 0.000277230738188592, + "loss": 1.5684, + "step": 8593 + }, + { + "epoch": 0.76, + "learning_rate": 0.00027703163754655603, + "loss": 1.6035, + "step": 8594 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002768325969288068, + "loss": 1.6113, + "step": 8595 + }, + { + "epoch": 0.76, + "learning_rate": 0.00027663361635187035, + "loss": 1.5371, + "step": 8596 + }, + { + "epoch": 0.76, + "learning_rate": 0.00027643469583226663, + "loss": 1.5703, + "step": 8597 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002762358353865111, + "loss": 1.4902, + "step": 8598 + }, + { + "epoch": 0.76, + "learning_rate": 0.00027603703503111356, + "loss": 1.627, + "step": 8599 + }, + { + "epoch": 0.77, + "learning_rate": 0.00027583829478258017, + "loss": 1.5605, + "step": 8600 + }, + { + "epoch": 0.77, + "learning_rate": 0.00027563961465741096, + "loss": 1.6094, + "step": 8601 + }, + { + "epoch": 0.77, + "learning_rate": 0.00027544099467210115, + "loss": 1.6133, + "step": 8602 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002752424348431414, + "loss": 1.7188, + "step": 8603 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002750439351870164, + "loss": 1.5781, + "step": 8604 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002748454957202076, + "loss": 1.5137, + "step": 8605 + }, + { + "epoch": 0.77, + "learning_rate": 0.00027464711645918983, + "loss": 1.5957, + "step": 8606 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002744487974204334, + "loss": 1.5625, + "step": 8607 + }, + { + "epoch": 0.77, + "learning_rate": 0.00027425053862040395, + "loss": 1.5801, + "step": 8608 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002740523400755617, + "loss": 1.5781, + "step": 8609 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002738542018023621, + "loss": 1.5996, + "step": 8610 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002736561238172552, + "loss": 1.582, + "step": 8611 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002734581061366871, + "loss": 1.6055, + "step": 8612 + }, + { + "epoch": 0.77, + "learning_rate": 0.00027326014877709757, + "loss": 1.6133, + "step": 8613 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002730622517549224, + "loss": 1.4668, + "step": 8614 + }, + { + "epoch": 0.77, + "learning_rate": 0.00027286441508659134, + "loss": 1.4375, + "step": 8615 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002726666387885305, + "loss": 1.5586, + "step": 8616 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002724689228771597, + "loss": 1.5293, + "step": 8617 + }, + { + "epoch": 0.77, + "learning_rate": 0.00027227126736889453, + "loss": 1.5898, + "step": 8618 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002720736722801448, + "loss": 1.5195, + "step": 8619 + }, + { + "epoch": 0.77, + "learning_rate": 0.00027187613762731644, + "loss": 1.6523, + "step": 8620 + }, + { + "epoch": 0.77, + "learning_rate": 0.00027167866342680944, + "loss": 1.6465, + "step": 8621 + }, + { + "epoch": 0.77, + "learning_rate": 0.00027148124969501874, + "loss": 1.7539, + "step": 8622 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002712838964483353, + "loss": 1.5801, + "step": 8623 + }, + { + "epoch": 0.77, + "learning_rate": 0.00027108660370314387, + "loss": 1.5938, + "step": 8624 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002708893714758246, + "loss": 1.666, + "step": 8625 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002706921997827525, + "loss": 1.6582, + "step": 8626 + }, + { + "epoch": 0.77, + "learning_rate": 0.00027049508864029827, + "loss": 1.5605, + "step": 8627 + }, + { + "epoch": 0.77, + "learning_rate": 0.00027029803806482655, + "loss": 1.625, + "step": 8628 + }, + { + "epoch": 0.77, + "learning_rate": 0.00027010104807269765, + "loss": 1.5723, + "step": 8629 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002699041186802662, + "loss": 1.4531, + "step": 8630 + }, + { + "epoch": 0.77, + "learning_rate": 0.00026970724990388296, + "loss": 1.6172, + "step": 8631 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002695104417598924, + "loss": 1.5762, + "step": 8632 + }, + { + "epoch": 0.77, + "learning_rate": 0.00026931369426463447, + "loss": 1.5234, + "step": 8633 + }, + { + "epoch": 0.77, + "learning_rate": 0.000269117007434444, + "loss": 1.6543, + "step": 8634 + }, + { + "epoch": 0.77, + "learning_rate": 0.00026892038128565146, + "loss": 1.5391, + "step": 8635 + }, + { + "epoch": 0.77, + "learning_rate": 0.00026872381583458116, + "loss": 1.5234, + "step": 8636 + }, + { + "epoch": 0.77, + "learning_rate": 0.000268527311097553, + "loss": 1.627, + "step": 8637 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002683308670908816, + "loss": 1.6895, + "step": 8638 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002681344838308771, + "loss": 1.543, + "step": 8639 + }, + { + "epoch": 0.77, + "learning_rate": 0.00026793816133384397, + "loss": 1.6152, + "step": 8640 + }, + { + "epoch": 0.77, + "learning_rate": 0.00026774189961608177, + "loss": 1.5469, + "step": 8641 + }, + { + "epoch": 0.77, + "learning_rate": 0.00026754569869388514, + "loss": 1.584, + "step": 8642 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002673495585835436, + "loss": 1.6309, + "step": 8643 + }, + { + "epoch": 0.77, + "learning_rate": 0.00026715347930134125, + "loss": 1.5703, + "step": 8644 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002669574608635583, + "loss": 1.6113, + "step": 8645 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002667615032864688, + "loss": 1.5312, + "step": 8646 + }, + { + "epoch": 0.77, + "learning_rate": 0.000266565606586342, + "loss": 1.5547, + "step": 8647 + }, + { + "epoch": 0.77, + "learning_rate": 0.00026636977077944214, + "loss": 1.5684, + "step": 8648 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002661739958820286, + "loss": 1.5371, + "step": 8649 + }, + { + "epoch": 0.77, + "learning_rate": 0.00026597828191035524, + "loss": 1.6113, + "step": 8650 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002657826288806715, + "loss": 1.6289, + "step": 8651 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002655870368092215, + "loss": 1.4531, + "step": 8652 + }, + { + "epoch": 0.77, + "learning_rate": 0.000265391505712244, + "loss": 1.6113, + "step": 8653 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002651960356059726, + "loss": 1.6309, + "step": 8654 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002650006265066369, + "loss": 1.6035, + "step": 8655 + }, + { + "epoch": 0.77, + "learning_rate": 0.00026480527843046045, + "loss": 1.4766, + "step": 8656 + }, + { + "epoch": 0.77, + "learning_rate": 0.00026460999139366173, + "loss": 1.4785, + "step": 8657 + }, + { + "epoch": 0.77, + "learning_rate": 0.00026441476541245437, + "loss": 1.6582, + "step": 8658 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002642196005030473, + "loss": 1.5957, + "step": 8659 + }, + { + "epoch": 0.77, + "learning_rate": 0.00026402449668164397, + "loss": 1.6406, + "step": 8660 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002638294539644427, + "loss": 1.668, + "step": 8661 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002636344723676366, + "loss": 1.5977, + "step": 8662 + }, + { + "epoch": 0.77, + "learning_rate": 0.00026343955190741465, + "loss": 1.6855, + "step": 8663 + }, + { + "epoch": 0.77, + "learning_rate": 0.00026324469259995963, + "loss": 1.5703, + "step": 8664 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002630498944614498, + "loss": 1.5625, + "step": 8665 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002628551575080579, + "loss": 1.6191, + "step": 8666 + }, + { + "epoch": 0.77, + "learning_rate": 0.00026266048175595247, + "loss": 1.502, + "step": 8667 + }, + { + "epoch": 0.77, + "learning_rate": 0.00026246586722129627, + "loss": 1.5547, + "step": 8668 + }, + { + "epoch": 0.77, + "learning_rate": 0.00026227131392024707, + "loss": 1.6621, + "step": 8669 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002620768218689571, + "loss": 1.4668, + "step": 8670 + }, + { + "epoch": 0.77, + "learning_rate": 0.00026188239108357493, + "loss": 1.5195, + "step": 8671 + }, + { + "epoch": 0.77, + "learning_rate": 0.00026168802158024274, + "loss": 1.6094, + "step": 8672 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002614937133750979, + "loss": 1.707, + "step": 8673 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002612994664842726, + "loss": 1.5391, + "step": 8674 + }, + { + "epoch": 0.77, + "learning_rate": 0.00026110528092389474, + "loss": 1.6133, + "step": 8675 + }, + { + "epoch": 0.77, + "learning_rate": 0.00026091115671008616, + "loss": 1.4629, + "step": 8676 + }, + { + "epoch": 0.77, + "learning_rate": 0.00026071709385896405, + "loss": 1.6348, + "step": 8677 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002605230923866402, + "loss": 1.6289, + "step": 8678 + }, + { + "epoch": 0.77, + "learning_rate": 0.00026032915230922194, + "loss": 1.6094, + "step": 8679 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002601352736428111, + "loss": 1.7031, + "step": 8680 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002599414564035041, + "loss": 1.6465, + "step": 8681 + }, + { + "epoch": 0.77, + "learning_rate": 0.00025974770060739274, + "loss": 1.6465, + "step": 8682 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002595540062705632, + "loss": 1.4551, + "step": 8683 + }, + { + "epoch": 0.77, + "learning_rate": 0.00025936037340909757, + "loss": 1.6582, + "step": 8684 + }, + { + "epoch": 0.77, + "learning_rate": 0.00025916680203907184, + "loss": 1.498, + "step": 8685 + }, + { + "epoch": 0.77, + "learning_rate": 0.00025897329217655716, + "loss": 1.5469, + "step": 8686 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002587798438376198, + "loss": 1.5723, + "step": 8687 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002585864570383206, + "loss": 1.4688, + "step": 8688 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002583931317947156, + "loss": 1.7734, + "step": 8689 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002581998681228552, + "loss": 1.582, + "step": 8690 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002580066660387856, + "loss": 1.6328, + "step": 8691 + }, + { + "epoch": 0.77, + "learning_rate": 0.00025781352555854733, + "loss": 1.3945, + "step": 8692 + }, + { + "epoch": 0.77, + "learning_rate": 0.00025762044669817543, + "loss": 1.5918, + "step": 8693 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002574274294737003, + "loss": 1.4746, + "step": 8694 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002572344739011476, + "loss": 1.6621, + "step": 8695 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002570415799965371, + "loss": 1.5898, + "step": 8696 + }, + { + "epoch": 0.77, + "learning_rate": 0.00025684874777588375, + "loss": 1.5371, + "step": 8697 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002566559772551972, + "loss": 1.5371, + "step": 8698 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002564632684504827, + "loss": 1.6738, + "step": 8699 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002562706213777397, + "loss": 1.5352, + "step": 8700 + }, + { + "epoch": 0.77, + "learning_rate": 0.00025607803605296256, + "loss": 1.6602, + "step": 8701 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002558855124921402, + "loss": 1.4824, + "step": 8702 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002556930507112577, + "loss": 1.7012, + "step": 8703 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002555006507262937, + "loss": 1.5039, + "step": 8704 + }, + { + "epoch": 0.77, + "learning_rate": 0.00025530831255322217, + "loss": 1.5527, + "step": 8705 + }, + { + "epoch": 0.77, + "learning_rate": 0.00025511603620801185, + "loss": 1.582, + "step": 8706 + }, + { + "epoch": 0.77, + "learning_rate": 0.00025492382170662673, + "loss": 1.5957, + "step": 8707 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002547316690650252, + "loss": 1.5996, + "step": 8708 + }, + { + "epoch": 0.77, + "learning_rate": 0.00025453957829916054, + "loss": 1.5254, + "step": 8709 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002543475494249814, + "loss": 1.6895, + "step": 8710 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002541555824584308, + "loss": 1.6289, + "step": 8711 + }, + { + "epoch": 0.78, + "learning_rate": 0.00025396367741544677, + "loss": 1.5039, + "step": 8712 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002537718343119617, + "loss": 1.5332, + "step": 8713 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002535800531639041, + "loss": 1.5723, + "step": 8714 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002533883339871962, + "loss": 1.5488, + "step": 8715 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002531966767977554, + "loss": 1.5703, + "step": 8716 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002530050816114938, + "loss": 1.668, + "step": 8717 + }, + { + "epoch": 0.78, + "learning_rate": 0.00025281354844431905, + "loss": 1.6289, + "step": 8718 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002526220773121328, + "loss": 1.4922, + "step": 8719 + }, + { + "epoch": 0.78, + "learning_rate": 0.00025243066823083204, + "loss": 1.5449, + "step": 8720 + }, + { + "epoch": 0.78, + "learning_rate": 0.00025223932121630846, + "loss": 1.5547, + "step": 8721 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002520480362844485, + "loss": 1.5293, + "step": 8722 + }, + { + "epoch": 0.78, + "learning_rate": 0.00025185681345113334, + "loss": 1.6289, + "step": 8723 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002516656527322396, + "loss": 1.4355, + "step": 8724 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002514745541436384, + "loss": 1.5176, + "step": 8725 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002512835177011954, + "loss": 1.5625, + "step": 8726 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002510925434207715, + "loss": 1.709, + "step": 8727 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002509016313182222, + "loss": 1.6016, + "step": 8728 + }, + { + "epoch": 0.78, + "learning_rate": 0.00025071078140939776, + "loss": 1.6934, + "step": 8729 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002505199937101438, + "loss": 1.6133, + "step": 8730 + }, + { + "epoch": 0.78, + "learning_rate": 0.00025032926823630045, + "loss": 1.5801, + "step": 8731 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002501386050037023, + "loss": 1.6133, + "step": 8732 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002499480040281792, + "loss": 1.7051, + "step": 8733 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002497574653255561, + "loss": 1.6328, + "step": 8734 + }, + { + "epoch": 0.78, + "learning_rate": 0.00024956698891165217, + "loss": 1.5625, + "step": 8735 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002493765748022817, + "loss": 1.6621, + "step": 8736 + }, + { + "epoch": 0.78, + "learning_rate": 0.00024918622301325357, + "loss": 1.5391, + "step": 8737 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002489959335603721, + "loss": 1.627, + "step": 8738 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002488057064594359, + "loss": 1.6328, + "step": 8739 + }, + { + "epoch": 0.78, + "learning_rate": 0.00024861554172623835, + "loss": 1.6504, + "step": 8740 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002484254393765678, + "loss": 1.5586, + "step": 8741 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002482353994262079, + "loss": 1.582, + "step": 8742 + }, + { + "epoch": 0.78, + "learning_rate": 0.00024804542189093626, + "loss": 1.6484, + "step": 8743 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002478555067865259, + "loss": 1.6055, + "step": 8744 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002476656541287441, + "loss": 1.6875, + "step": 8745 + }, + { + "epoch": 0.78, + "learning_rate": 0.000247475863933354, + "loss": 1.4453, + "step": 8746 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002472861362161125, + "loss": 1.5391, + "step": 8747 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002470964709927719, + "loss": 1.4863, + "step": 8748 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002469068682790785, + "loss": 1.7129, + "step": 8749 + }, + { + "epoch": 0.78, + "learning_rate": 0.00024671732809077485, + "loss": 1.5723, + "step": 8750 + }, + { + "epoch": 0.78, + "learning_rate": 0.000246527850443597, + "loss": 1.584, + "step": 8751 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002463384353532766, + "loss": 1.7812, + "step": 8752 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002461490828355392, + "loss": 1.4805, + "step": 8753 + }, + { + "epoch": 0.78, + "learning_rate": 0.00024595979290610635, + "loss": 1.5957, + "step": 8754 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002457705655806938, + "loss": 1.4668, + "step": 8755 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002455814008750118, + "loss": 1.5684, + "step": 8756 + }, + { + "epoch": 0.78, + "learning_rate": 0.00024539229880476553, + "loss": 1.5176, + "step": 8757 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002452032593856557, + "loss": 1.5742, + "step": 8758 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002450142826333771, + "loss": 1.5566, + "step": 8759 + }, + { + "epoch": 0.78, + "learning_rate": 0.00024482536856361924, + "loss": 1.5996, + "step": 8760 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002446365171920669, + "loss": 1.5938, + "step": 8761 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002444477285343991, + "loss": 1.6328, + "step": 8762 + }, + { + "epoch": 0.78, + "learning_rate": 0.00024425900260629043, + "loss": 1.5762, + "step": 8763 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002440703394234096, + "loss": 1.5039, + "step": 8764 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002438817390014204, + "loss": 1.5566, + "step": 8765 + }, + { + "epoch": 0.78, + "learning_rate": 0.00024369320135598116, + "loss": 1.4961, + "step": 8766 + }, + { + "epoch": 0.78, + "learning_rate": 0.00024350472650274535, + "loss": 1.5059, + "step": 8767 + }, + { + "epoch": 0.78, + "learning_rate": 0.00024331631445736102, + "loss": 1.5078, + "step": 8768 + }, + { + "epoch": 0.78, + "learning_rate": 0.00024312796523547064, + "loss": 1.6133, + "step": 8769 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002429396788527125, + "loss": 1.5449, + "step": 8770 + }, + { + "epoch": 0.78, + "learning_rate": 0.00024275145532471888, + "loss": 1.543, + "step": 8771 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002425632946671168, + "loss": 1.5781, + "step": 8772 + }, + { + "epoch": 0.78, + "learning_rate": 0.000242375196895528, + "loss": 1.6074, + "step": 8773 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002421871620255699, + "loss": 1.6777, + "step": 8774 + }, + { + "epoch": 0.78, + "learning_rate": 0.00024199919007285387, + "loss": 1.6289, + "step": 8775 + }, + { + "epoch": 0.78, + "learning_rate": 0.000241811281052986, + "loss": 1.4922, + "step": 8776 + }, + { + "epoch": 0.78, + "learning_rate": 0.00024162343498156726, + "loss": 1.6816, + "step": 8777 + }, + { + "epoch": 0.78, + "learning_rate": 0.00024143565187419391, + "loss": 1.7676, + "step": 8778 + }, + { + "epoch": 0.78, + "learning_rate": 0.00024124793174645664, + "loss": 1.5723, + "step": 8779 + }, + { + "epoch": 0.78, + "learning_rate": 0.00024106027461394064, + "loss": 1.6504, + "step": 8780 + }, + { + "epoch": 0.78, + "learning_rate": 0.00024087268049222588, + "loss": 1.6309, + "step": 8781 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002406851493968879, + "loss": 1.5781, + "step": 8782 + }, + { + "epoch": 0.78, + "learning_rate": 0.00024049768134349613, + "loss": 1.668, + "step": 8783 + }, + { + "epoch": 0.78, + "learning_rate": 0.00024031027634761505, + "loss": 1.6074, + "step": 8784 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002401229344248036, + "loss": 1.5957, + "step": 8785 + }, + { + "epoch": 0.78, + "learning_rate": 0.00023993565559061647, + "loss": 1.5449, + "step": 8786 + }, + { + "epoch": 0.78, + "learning_rate": 0.00023974843986060212, + "loss": 1.5898, + "step": 8787 + }, + { + "epoch": 0.78, + "learning_rate": 0.000239561287250304, + "loss": 1.6465, + "step": 8788 + }, + { + "epoch": 0.78, + "learning_rate": 0.00023937419777526027, + "loss": 1.541, + "step": 8789 + }, + { + "epoch": 0.78, + "learning_rate": 0.00023918717145100443, + "loss": 1.5059, + "step": 8790 + }, + { + "epoch": 0.78, + "learning_rate": 0.00023900020829306412, + "loss": 1.7344, + "step": 8791 + }, + { + "epoch": 0.78, + "learning_rate": 0.00023881330831696158, + "loss": 1.6836, + "step": 8792 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002386264715382148, + "loss": 1.5176, + "step": 8793 + }, + { + "epoch": 0.78, + "learning_rate": 0.00023843969797233546, + "loss": 1.668, + "step": 8794 + }, + { + "epoch": 0.78, + "learning_rate": 0.00023825298763483039, + "loss": 1.5273, + "step": 8795 + }, + { + "epoch": 0.78, + "learning_rate": 0.00023806634054120092, + "loss": 1.5293, + "step": 8796 + }, + { + "epoch": 0.78, + "learning_rate": 0.00023787975670694396, + "loss": 1.5898, + "step": 8797 + }, + { + "epoch": 0.78, + "learning_rate": 0.00023769323614755033, + "loss": 1.5801, + "step": 8798 + }, + { + "epoch": 0.78, + "learning_rate": 0.00023750677887850592, + "loss": 1.666, + "step": 8799 + }, + { + "epoch": 0.78, + "learning_rate": 0.00023732038491529106, + "loss": 1.6309, + "step": 8800 + }, + { + "epoch": 0.78, + "learning_rate": 0.00023713405427338132, + "loss": 1.6289, + "step": 8801 + }, + { + "epoch": 0.78, + "learning_rate": 0.00023694778696824637, + "loss": 1.5957, + "step": 8802 + }, + { + "epoch": 0.78, + "learning_rate": 0.00023676158301535167, + "loss": 1.7207, + "step": 8803 + }, + { + "epoch": 0.78, + "learning_rate": 0.00023657544243015628, + "loss": 1.6289, + "step": 8804 + }, + { + "epoch": 0.78, + "learning_rate": 0.00023638936522811461, + "loss": 1.541, + "step": 8805 + }, + { + "epoch": 0.78, + "learning_rate": 0.00023620335142467575, + "loss": 1.5195, + "step": 8806 + }, + { + "epoch": 0.78, + "learning_rate": 0.00023601740103528336, + "loss": 1.6133, + "step": 8807 + }, + { + "epoch": 0.78, + "learning_rate": 0.00023583151407537562, + "loss": 1.6836, + "step": 8808 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002356456905603863, + "loss": 1.5742, + "step": 8809 + }, + { + "epoch": 0.78, + "learning_rate": 0.00023545993050574332, + "loss": 1.4922, + "step": 8810 + }, + { + "epoch": 0.78, + "learning_rate": 0.00023527423392686908, + "loss": 1.6367, + "step": 8811 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002350886008391807, + "loss": 1.5684, + "step": 8812 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002349030312580912, + "loss": 1.5898, + "step": 8813 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002347175251990068, + "loss": 1.457, + "step": 8814 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002345320826773294, + "loss": 1.6426, + "step": 8815 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002343467037084549, + "loss": 1.5879, + "step": 8816 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002341613883077749, + "loss": 1.5391, + "step": 8817 + }, + { + "epoch": 0.78, + "learning_rate": 0.00023397613649067496, + "loss": 1.7246, + "step": 8818 + }, + { + "epoch": 0.78, + "learning_rate": 0.00023379094827253544, + "loss": 1.6309, + "step": 8819 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002336058236687314, + "loss": 1.5703, + "step": 8820 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002334207626946333, + "loss": 1.584, + "step": 8821 + }, + { + "epoch": 0.78, + "learning_rate": 0.00023323576536560554, + "loss": 1.584, + "step": 8822 + }, + { + "epoch": 0.78, + "learning_rate": 0.00023305083169700735, + "loss": 1.6406, + "step": 8823 + }, + { + "epoch": 0.78, + "learning_rate": 0.00023286596170419261, + "loss": 1.6191, + "step": 8824 + }, + { + "epoch": 0.79, + "learning_rate": 0.00023268115540251078, + "loss": 1.4434, + "step": 8825 + }, + { + "epoch": 0.79, + "learning_rate": 0.00023249641280730484, + "loss": 1.668, + "step": 8826 + }, + { + "epoch": 0.79, + "learning_rate": 0.00023231173393391326, + "loss": 1.5527, + "step": 8827 + }, + { + "epoch": 0.79, + "learning_rate": 0.00023212711879766845, + "loss": 1.5039, + "step": 8828 + }, + { + "epoch": 0.79, + "learning_rate": 0.00023194256741389875, + "loss": 1.5332, + "step": 8829 + }, + { + "epoch": 0.79, + "learning_rate": 0.00023175807979792617, + "loss": 1.6543, + "step": 8830 + }, + { + "epoch": 0.79, + "learning_rate": 0.00023157365596506785, + "loss": 1.5605, + "step": 8831 + }, + { + "epoch": 0.79, + "learning_rate": 0.00023138929593063506, + "loss": 1.5703, + "step": 8832 + }, + { + "epoch": 0.79, + "learning_rate": 0.00023120499970993492, + "loss": 1.5703, + "step": 8833 + }, + { + "epoch": 0.79, + "learning_rate": 0.00023102076731826838, + "loss": 1.5703, + "step": 8834 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002308365987709311, + "loss": 1.5918, + "step": 8835 + }, + { + "epoch": 0.79, + "learning_rate": 0.00023065249408321352, + "loss": 1.623, + "step": 8836 + }, + { + "epoch": 0.79, + "learning_rate": 0.00023046845327040144, + "loss": 1.5195, + "step": 8837 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002302844763477745, + "loss": 1.5352, + "step": 8838 + }, + { + "epoch": 0.79, + "learning_rate": 0.00023010056333060724, + "loss": 1.457, + "step": 8839 + }, + { + "epoch": 0.79, + "learning_rate": 0.00022991671423416916, + "loss": 1.4805, + "step": 8840 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002297329290737239, + "loss": 1.5645, + "step": 8841 + }, + { + "epoch": 0.79, + "learning_rate": 0.00022954920786453083, + "loss": 1.582, + "step": 8842 + }, + { + "epoch": 0.79, + "learning_rate": 0.00022936555062184294, + "loss": 1.5078, + "step": 8843 + }, + { + "epoch": 0.79, + "learning_rate": 0.00022918195736090853, + "loss": 1.5684, + "step": 8844 + }, + { + "epoch": 0.79, + "learning_rate": 0.00022899842809697012, + "loss": 1.5938, + "step": 8845 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002288149628452655, + "loss": 1.5195, + "step": 8846 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002286315616210266, + "loss": 1.5918, + "step": 8847 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002284482244394801, + "loss": 1.5703, + "step": 8848 + }, + { + "epoch": 0.79, + "learning_rate": 0.000228264951315848, + "loss": 1.6289, + "step": 8849 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002280817422653464, + "loss": 1.7305, + "step": 8850 + }, + { + "epoch": 0.79, + "learning_rate": 0.00022789859730318595, + "loss": 1.6973, + "step": 8851 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002277155164445721, + "loss": 1.6289, + "step": 8852 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002275324997047057, + "loss": 1.4551, + "step": 8853 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002273495470987812, + "loss": 1.4707, + "step": 8854 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002271666586419884, + "loss": 1.5039, + "step": 8855 + }, + { + "epoch": 0.79, + "learning_rate": 0.00022698383434951118, + "loss": 1.5723, + "step": 8856 + }, + { + "epoch": 0.79, + "learning_rate": 0.00022680107423652906, + "loss": 1.5195, + "step": 8857 + }, + { + "epoch": 0.79, + "learning_rate": 0.00022661837831821542, + "loss": 1.4668, + "step": 8858 + }, + { + "epoch": 0.79, + "learning_rate": 0.00022643574660973843, + "loss": 1.5254, + "step": 8859 + }, + { + "epoch": 0.79, + "learning_rate": 0.000226253179126261, + "loss": 1.6133, + "step": 8860 + }, + { + "epoch": 0.79, + "learning_rate": 0.00022607067588294107, + "loss": 1.5918, + "step": 8861 + }, + { + "epoch": 0.79, + "learning_rate": 0.00022588823689493077, + "loss": 1.6133, + "step": 8862 + }, + { + "epoch": 0.79, + "learning_rate": 0.000225705862177377, + "loss": 1.6738, + "step": 8863 + }, + { + "epoch": 0.79, + "learning_rate": 0.00022552355174542129, + "loss": 1.4941, + "step": 8864 + }, + { + "epoch": 0.79, + "learning_rate": 0.00022534130561420018, + "loss": 1.5625, + "step": 8865 + }, + { + "epoch": 0.79, + "learning_rate": 0.00022515912379884452, + "loss": 1.6309, + "step": 8866 + }, + { + "epoch": 0.79, + "learning_rate": 0.00022497700631447993, + "loss": 1.5918, + "step": 8867 + }, + { + "epoch": 0.79, + "learning_rate": 0.00022479495317622624, + "loss": 1.627, + "step": 8868 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002246129643991991, + "loss": 1.5449, + "step": 8869 + }, + { + "epoch": 0.79, + "learning_rate": 0.00022443103999850777, + "loss": 1.625, + "step": 8870 + }, + { + "epoch": 0.79, + "learning_rate": 0.00022424917998925642, + "loss": 1.5195, + "step": 8871 + }, + { + "epoch": 0.79, + "learning_rate": 0.00022406738438654372, + "loss": 1.6172, + "step": 8872 + }, + { + "epoch": 0.79, + "learning_rate": 0.00022388565320546373, + "loss": 1.5879, + "step": 8873 + }, + { + "epoch": 0.79, + "learning_rate": 0.00022370398646110435, + "loss": 1.5742, + "step": 8874 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002235223841685482, + "loss": 1.5762, + "step": 8875 + }, + { + "epoch": 0.79, + "learning_rate": 0.00022334084634287332, + "loss": 1.666, + "step": 8876 + }, + { + "epoch": 0.79, + "learning_rate": 0.00022315937299915146, + "loss": 1.5996, + "step": 8877 + }, + { + "epoch": 0.79, + "learning_rate": 0.00022297796415244952, + "loss": 1.582, + "step": 8878 + }, + { + "epoch": 0.79, + "learning_rate": 0.00022279661981782894, + "loss": 1.5254, + "step": 8879 + }, + { + "epoch": 0.79, + "learning_rate": 0.00022261534001034568, + "loss": 1.5098, + "step": 8880 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002224341247450503, + "loss": 1.6055, + "step": 8881 + }, + { + "epoch": 0.79, + "learning_rate": 0.00022225297403698864, + "loss": 1.627, + "step": 8882 + }, + { + "epoch": 0.79, + "learning_rate": 0.00022207188790120046, + "loss": 1.582, + "step": 8883 + }, + { + "epoch": 0.79, + "learning_rate": 0.00022189086635272025, + "loss": 1.5645, + "step": 8884 + }, + { + "epoch": 0.79, + "learning_rate": 0.00022170990940657752, + "loss": 1.5391, + "step": 8885 + }, + { + "epoch": 0.79, + "learning_rate": 0.00022152901707779606, + "loss": 1.5742, + "step": 8886 + }, + { + "epoch": 0.79, + "learning_rate": 0.00022134818938139412, + "loss": 1.5469, + "step": 8887 + }, + { + "epoch": 0.79, + "learning_rate": 0.00022116742633238552, + "loss": 1.4863, + "step": 8888 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002209867279457777, + "loss": 1.6328, + "step": 8889 + }, + { + "epoch": 0.79, + "learning_rate": 0.00022080609423657304, + "loss": 1.6699, + "step": 8890 + }, + { + "epoch": 0.79, + "learning_rate": 0.00022062552521976854, + "loss": 1.5469, + "step": 8891 + }, + { + "epoch": 0.79, + "learning_rate": 0.00022044502091035634, + "loss": 1.4727, + "step": 8892 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002202645813233225, + "loss": 1.6621, + "step": 8893 + }, + { + "epoch": 0.79, + "learning_rate": 0.00022008420647364791, + "loss": 1.6523, + "step": 8894 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021990389637630803, + "loss": 1.7422, + "step": 8895 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021972365104627346, + "loss": 1.7246, + "step": 8896 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021954347049850885, + "loss": 1.5586, + "step": 8897 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021936335474797365, + "loss": 1.5898, + "step": 8898 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002191833038096217, + "loss": 1.6445, + "step": 8899 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002190033176984021, + "loss": 1.5176, + "step": 8900 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021882339642925787, + "loss": 1.6621, + "step": 8901 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021864354001712704, + "loss": 1.6055, + "step": 8902 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021846374847694195, + "loss": 1.4609, + "step": 8903 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021828402182363016, + "loss": 1.623, + "step": 8904 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002181043600721131, + "loss": 1.5098, + "step": 8905 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021792476323730738, + "loss": 1.625, + "step": 8906 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002177452313341235, + "loss": 1.6387, + "step": 8907 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021756576437746767, + "loss": 1.6191, + "step": 8908 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021738636238223985, + "loss": 1.5605, + "step": 8909 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002172070253633349, + "loss": 1.6016, + "step": 8910 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021702775333564184, + "loss": 1.7168, + "step": 8911 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002168485463140454, + "loss": 1.6211, + "step": 8912 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021666940431342362, + "loss": 1.6465, + "step": 8913 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021649032734865004, + "loss": 1.4902, + "step": 8914 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021631131543459214, + "loss": 1.6016, + "step": 8915 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021613236858611284, + "loss": 1.5781, + "step": 8916 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002159534868180688, + "loss": 1.5918, + "step": 8917 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002157746701453118, + "loss": 1.6074, + "step": 8918 + }, + { + "epoch": 0.79, + "learning_rate": 0.000215595918582688, + "loss": 1.5723, + "step": 8919 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021541723214503795, + "loss": 1.502, + "step": 8920 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021523861084719764, + "loss": 1.4844, + "step": 8921 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021506005470399659, + "loss": 1.6504, + "step": 8922 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021488156373025957, + "loss": 1.4805, + "step": 8923 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021470313794080576, + "loss": 1.6074, + "step": 8924 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021452477735044883, + "loss": 1.5254, + "step": 8925 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002143464819739971, + "loss": 1.5059, + "step": 8926 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021416825182625344, + "loss": 1.5664, + "step": 8927 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002139900869220156, + "loss": 1.5195, + "step": 8928 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021381198727607565, + "loss": 1.6387, + "step": 8929 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002136339529032202, + "loss": 1.5527, + "step": 8930 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002134559838182303, + "loss": 1.5723, + "step": 8931 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021327808003588223, + "loss": 1.5801, + "step": 8932 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021310024157094622, + "loss": 1.502, + "step": 8933 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021292246843818718, + "loss": 1.6367, + "step": 8934 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021274476065236448, + "loss": 1.5566, + "step": 8935 + }, + { + "epoch": 0.79, + "learning_rate": 0.00021256711822823283, + "loss": 1.5859, + "step": 8936 + }, + { + "epoch": 0.8, + "learning_rate": 0.00021238954118054065, + "loss": 1.4844, + "step": 8937 + }, + { + "epoch": 0.8, + "learning_rate": 0.00021221202952403128, + "loss": 1.6895, + "step": 8938 + }, + { + "epoch": 0.8, + "learning_rate": 0.00021203458327344226, + "loss": 1.4941, + "step": 8939 + }, + { + "epoch": 0.8, + "learning_rate": 0.0002118572024435066, + "loss": 1.6582, + "step": 8940 + }, + { + "epoch": 0.8, + "learning_rate": 0.0002116798870489509, + "loss": 1.6094, + "step": 8941 + }, + { + "epoch": 0.8, + "learning_rate": 0.00021150263710449692, + "loss": 1.5488, + "step": 8942 + }, + { + "epoch": 0.8, + "learning_rate": 0.00021132545262486047, + "loss": 1.498, + "step": 8943 + }, + { + "epoch": 0.8, + "learning_rate": 0.00021114833362475283, + "loss": 1.582, + "step": 8944 + }, + { + "epoch": 0.8, + "learning_rate": 0.0002109712801188789, + "loss": 1.5898, + "step": 8945 + }, + { + "epoch": 0.8, + "learning_rate": 0.00021079429212193847, + "loss": 1.5176, + "step": 8946 + }, + { + "epoch": 0.8, + "learning_rate": 0.00021061736964862577, + "loss": 1.4746, + "step": 8947 + }, + { + "epoch": 0.8, + "learning_rate": 0.00021044051271363028, + "loss": 1.6094, + "step": 8948 + }, + { + "epoch": 0.8, + "learning_rate": 0.0002102637213316353, + "loss": 1.4941, + "step": 8949 + }, + { + "epoch": 0.8, + "learning_rate": 0.00021008699551731868, + "loss": 1.6328, + "step": 8950 + }, + { + "epoch": 0.8, + "learning_rate": 0.00020991033528535298, + "loss": 1.6895, + "step": 8951 + }, + { + "epoch": 0.8, + "learning_rate": 0.00020973374065040584, + "loss": 1.5684, + "step": 8952 + }, + { + "epoch": 0.8, + "learning_rate": 0.0002095572116271387, + "loss": 1.6113, + "step": 8953 + }, + { + "epoch": 0.8, + "learning_rate": 0.0002093807482302078, + "loss": 1.5762, + "step": 8954 + }, + { + "epoch": 0.8, + "learning_rate": 0.0002092043504742639, + "loss": 1.6094, + "step": 8955 + }, + { + "epoch": 0.8, + "learning_rate": 0.0002090280183739527, + "loss": 1.6973, + "step": 8956 + }, + { + "epoch": 0.8, + "learning_rate": 0.00020885175194391392, + "loss": 1.6621, + "step": 8957 + }, + { + "epoch": 0.8, + "learning_rate": 0.0002086755511987821, + "loss": 1.5723, + "step": 8958 + }, + { + "epoch": 0.8, + "learning_rate": 0.000208499416153186, + "loss": 1.4922, + "step": 8959 + }, + { + "epoch": 0.8, + "learning_rate": 0.00020832334682174968, + "loss": 1.6016, + "step": 8960 + }, + { + "epoch": 0.8, + "learning_rate": 0.000208147343219091, + "loss": 1.4648, + "step": 8961 + }, + { + "epoch": 0.8, + "learning_rate": 0.00020797140535982274, + "loss": 1.6328, + "step": 8962 + }, + { + "epoch": 0.8, + "learning_rate": 0.00020779553325855183, + "loss": 1.5195, + "step": 8963 + }, + { + "epoch": 0.8, + "learning_rate": 0.00020761972692988028, + "loss": 1.6055, + "step": 8964 + }, + { + "epoch": 0.8, + "learning_rate": 0.00020744398638840422, + "loss": 1.5977, + "step": 8965 + }, + { + "epoch": 0.8, + "learning_rate": 0.0002072683116487143, + "loss": 1.7227, + "step": 8966 + }, + { + "epoch": 0.8, + "learning_rate": 0.00020709270272539638, + "loss": 1.5332, + "step": 8967 + }, + { + "epoch": 0.8, + "learning_rate": 0.00020691715963302992, + "loss": 1.5332, + "step": 8968 + }, + { + "epoch": 0.8, + "learning_rate": 0.00020674168238618963, + "loss": 1.4355, + "step": 8969 + }, + { + "epoch": 0.8, + "learning_rate": 0.00020656627099944404, + "loss": 1.5859, + "step": 8970 + }, + { + "epoch": 0.8, + "learning_rate": 0.00020639092548735717, + "loss": 1.6406, + "step": 8971 + }, + { + "epoch": 0.8, + "learning_rate": 0.00020621564586448672, + "loss": 1.5508, + "step": 8972 + }, + { + "epoch": 0.8, + "learning_rate": 0.00020604043214538537, + "loss": 1.6094, + "step": 8973 + }, + { + "epoch": 0.8, + "learning_rate": 0.00020586528434459996, + "loss": 1.5, + "step": 8974 + }, + { + "epoch": 0.8, + "learning_rate": 0.00020569020247667246, + "loss": 1.5039, + "step": 8975 + }, + { + "epoch": 0.8, + "learning_rate": 0.000205515186556139, + "loss": 1.6328, + "step": 8976 + }, + { + "epoch": 0.8, + "learning_rate": 0.00020534023659752988, + "loss": 1.4238, + "step": 8977 + }, + { + "epoch": 0.8, + "learning_rate": 0.00020516535261537027, + "loss": 1.4941, + "step": 8978 + }, + { + "epoch": 0.8, + "learning_rate": 0.00020499053462418038, + "loss": 1.5391, + "step": 8979 + }, + { + "epoch": 0.8, + "learning_rate": 0.0002048157826384741, + "loss": 1.584, + "step": 8980 + }, + { + "epoch": 0.8, + "learning_rate": 0.00020464109667276033, + "loss": 1.6152, + "step": 8981 + }, + { + "epoch": 0.8, + "learning_rate": 0.00020446647674154174, + "loss": 1.5879, + "step": 8982 + }, + { + "epoch": 0.8, + "learning_rate": 0.00020429192285931696, + "loss": 1.5117, + "step": 8983 + }, + { + "epoch": 0.8, + "learning_rate": 0.0002041174350405779, + "loss": 1.6172, + "step": 8984 + }, + { + "epoch": 0.8, + "learning_rate": 0.00020394301329981136, + "loss": 1.6523, + "step": 8985 + }, + { + "epoch": 0.8, + "learning_rate": 0.00020376865765149842, + "loss": 1.5449, + "step": 8986 + }, + { + "epoch": 0.8, + "learning_rate": 0.00020359436811011544, + "loss": 1.5078, + "step": 8987 + }, + { + "epoch": 0.8, + "learning_rate": 0.0002034201446901325, + "loss": 1.5801, + "step": 8988 + }, + { + "epoch": 0.8, + "learning_rate": 0.00020324598740601452, + "loss": 1.5977, + "step": 8989 + }, + { + "epoch": 0.8, + "learning_rate": 0.00020307189627222057, + "loss": 1.5781, + "step": 8990 + }, + { + "epoch": 0.8, + "learning_rate": 0.00020289787130320502, + "loss": 1.6113, + "step": 8991 + }, + { + "epoch": 0.8, + "learning_rate": 0.00020272391251341604, + "loss": 1.5215, + "step": 8992 + }, + { + "epoch": 0.8, + "learning_rate": 0.00020255001991729648, + "loss": 1.5566, + "step": 8993 + }, + { + "epoch": 0.8, + "learning_rate": 0.0002023761935292835, + "loss": 1.5, + "step": 8994 + }, + { + "epoch": 0.8, + "learning_rate": 0.0002022024333638094, + "loss": 1.6875, + "step": 8995 + }, + { + "epoch": 0.8, + "learning_rate": 0.00020202873943530054, + "loss": 1.4414, + "step": 8996 + }, + { + "epoch": 0.8, + "learning_rate": 0.00020185511175817772, + "loss": 1.6289, + "step": 8997 + }, + { + "epoch": 0.8, + "learning_rate": 0.00020168155034685633, + "loss": 1.623, + "step": 8998 + }, + { + "epoch": 0.8, + "learning_rate": 0.00020150805521574588, + "loss": 1.582, + "step": 8999 + }, + { + "epoch": 0.8, + "learning_rate": 0.00020133462637925148, + "loss": 1.5352, + "step": 9000 + }, + { + "epoch": 0.8, + "learning_rate": 0.0002011612638517718, + "loss": 1.6348, + "step": 9001 + }, + { + "epoch": 0.8, + "learning_rate": 0.0002009879676477, + "loss": 1.7578, + "step": 9002 + }, + { + "epoch": 0.8, + "learning_rate": 0.000200814737781424, + "loss": 1.5098, + "step": 9003 + }, + { + "epoch": 0.8, + "learning_rate": 0.00020064157426732642, + "loss": 1.5488, + "step": 9004 + }, + { + "epoch": 0.8, + "learning_rate": 0.00020046847711978376, + "loss": 1.6465, + "step": 9005 + }, + { + "epoch": 0.8, + "learning_rate": 0.00020029544635316742, + "loss": 1.6016, + "step": 9006 + }, + { + "epoch": 0.8, + "learning_rate": 0.00020012248198184368, + "loss": 1.5234, + "step": 9007 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001999495840201725, + "loss": 1.6465, + "step": 9008 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001997767524825087, + "loss": 1.6562, + "step": 9009 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001996039873832015, + "loss": 1.5879, + "step": 9010 + }, + { + "epoch": 0.8, + "learning_rate": 0.00019943128873659499, + "loss": 1.6035, + "step": 9011 + }, + { + "epoch": 0.8, + "learning_rate": 0.00019925865655702734, + "loss": 1.5215, + "step": 9012 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001990860908588311, + "loss": 1.5508, + "step": 9013 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001989135916563335, + "loss": 1.5293, + "step": 9014 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001987411589638566, + "loss": 1.582, + "step": 9015 + }, + { + "epoch": 0.8, + "learning_rate": 0.00019856879279571638, + "loss": 1.4766, + "step": 9016 + }, + { + "epoch": 0.8, + "learning_rate": 0.00019839649316622342, + "loss": 1.5859, + "step": 9017 + }, + { + "epoch": 0.8, + "learning_rate": 0.00019822426008968275, + "loss": 1.5352, + "step": 9018 + }, + { + "epoch": 0.8, + "learning_rate": 0.00019805209358039443, + "loss": 1.5566, + "step": 9019 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001978799936526522, + "loss": 1.584, + "step": 9020 + }, + { + "epoch": 0.8, + "learning_rate": 0.00019770796032074467, + "loss": 1.709, + "step": 9021 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001975359935989547, + "loss": 1.5254, + "step": 9022 + }, + { + "epoch": 0.8, + "learning_rate": 0.00019736409350156026, + "loss": 1.5156, + "step": 9023 + }, + { + "epoch": 0.8, + "learning_rate": 0.00019719226004283298, + "loss": 1.5391, + "step": 9024 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001970204932370393, + "loss": 1.6309, + "step": 9025 + }, + { + "epoch": 0.8, + "learning_rate": 0.00019684879309843996, + "loss": 1.627, + "step": 9026 + }, + { + "epoch": 0.8, + "learning_rate": 0.00019667715964129086, + "loss": 1.5879, + "step": 9027 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001965055928798414, + "loss": 1.5684, + "step": 9028 + }, + { + "epoch": 0.8, + "learning_rate": 0.00019633409282833602, + "loss": 1.582, + "step": 9029 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001961626595010132, + "loss": 1.5273, + "step": 9030 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001959912929121066, + "loss": 1.5547, + "step": 9031 + }, + { + "epoch": 0.8, + "learning_rate": 0.00019581999307584374, + "loss": 1.5039, + "step": 9032 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001956487600064466, + "loss": 1.625, + "step": 9033 + }, + { + "epoch": 0.8, + "learning_rate": 0.00019547759371813168, + "loss": 1.6035, + "step": 9034 + }, + { + "epoch": 0.8, + "learning_rate": 0.00019530649422511037, + "loss": 1.584, + "step": 9035 + }, + { + "epoch": 0.8, + "learning_rate": 0.00019513546154158813, + "loss": 1.6328, + "step": 9036 + }, + { + "epoch": 0.8, + "learning_rate": 0.00019496449568176468, + "loss": 1.5137, + "step": 9037 + }, + { + "epoch": 0.8, + "learning_rate": 0.00019479359665983432, + "loss": 1.5352, + "step": 9038 + }, + { + "epoch": 0.8, + "learning_rate": 0.00019462276448998627, + "loss": 1.6348, + "step": 9039 + }, + { + "epoch": 0.8, + "learning_rate": 0.00019445199918640378, + "loss": 1.625, + "step": 9040 + }, + { + "epoch": 0.8, + "learning_rate": 0.00019428130076326434, + "loss": 1.6348, + "step": 9041 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001941106692347404, + "loss": 1.5957, + "step": 9042 + }, + { + "epoch": 0.8, + "learning_rate": 0.00019394010461499844, + "loss": 1.6152, + "step": 9043 + }, + { + "epoch": 0.8, + "learning_rate": 0.00019376960691819966, + "loss": 1.5508, + "step": 9044 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001935991761584993, + "loss": 1.6602, + "step": 9045 + }, + { + "epoch": 0.8, + "learning_rate": 0.00019342881235004784, + "loss": 1.5879, + "step": 9046 + }, + { + "epoch": 0.8, + "learning_rate": 0.00019325851550698936, + "loss": 1.5508, + "step": 9047 + }, + { + "epoch": 0.8, + "learning_rate": 0.00019308828564346294, + "loss": 1.4629, + "step": 9048 + }, + { + "epoch": 0.8, + "learning_rate": 0.00019291812277360133, + "loss": 1.5898, + "step": 9049 + }, + { + "epoch": 0.81, + "learning_rate": 0.00019274802691153292, + "loss": 1.5898, + "step": 9050 + }, + { + "epoch": 0.81, + "learning_rate": 0.00019257799807137965, + "loss": 1.5586, + "step": 9051 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001924080362672581, + "loss": 1.4375, + "step": 9052 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001922381415132791, + "loss": 1.4512, + "step": 9053 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001920683138235486, + "loss": 1.5977, + "step": 9054 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001918985532121662, + "loss": 1.707, + "step": 9055 + }, + { + "epoch": 0.81, + "learning_rate": 0.00019172885969322628, + "loss": 1.6621, + "step": 9056 + }, + { + "epoch": 0.81, + "learning_rate": 0.00019155923328081738, + "loss": 1.498, + "step": 9057 + }, + { + "epoch": 0.81, + "learning_rate": 0.00019138967398902318, + "loss": 1.5918, + "step": 9058 + }, + { + "epoch": 0.81, + "learning_rate": 0.00019122018183192102, + "loss": 1.582, + "step": 9059 + }, + { + "epoch": 0.81, + "learning_rate": 0.00019105075682358298, + "loss": 1.6016, + "step": 9060 + }, + { + "epoch": 0.81, + "learning_rate": 0.00019088139897807522, + "loss": 1.7129, + "step": 9061 + }, + { + "epoch": 0.81, + "learning_rate": 0.00019071210830945927, + "loss": 1.5957, + "step": 9062 + }, + { + "epoch": 0.81, + "learning_rate": 0.00019054288483179007, + "loss": 1.7246, + "step": 9063 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001903737285591174, + "loss": 1.6719, + "step": 9064 + }, + { + "epoch": 0.81, + "learning_rate": 0.00019020463950548527, + "loss": 1.5, + "step": 9065 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001900356176849326, + "loss": 1.416, + "step": 9066 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018986666311149224, + "loss": 1.6152, + "step": 9067 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018969777579919155, + "loss": 1.4238, + "step": 9068 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001895289557620522, + "loss": 1.541, + "step": 9069 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018936020301409085, + "loss": 1.5371, + "step": 9070 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018919151756931795, + "loss": 1.666, + "step": 9071 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018902289944173868, + "loss": 1.5781, + "step": 9072 + }, + { + "epoch": 0.81, + "learning_rate": 0.000188854348645352, + "loss": 1.4961, + "step": 9073 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018868586519415242, + "loss": 1.6875, + "step": 9074 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001885174491021281, + "loss": 1.4688, + "step": 9075 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001883491003832617, + "loss": 1.5879, + "step": 9076 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018818081905153027, + "loss": 1.5918, + "step": 9077 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018801260512090511, + "loss": 1.582, + "step": 9078 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018784445860535272, + "loss": 1.6797, + "step": 9079 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001876763795188331, + "loss": 1.5117, + "step": 9080 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001875083678753009, + "loss": 1.6953, + "step": 9081 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001873404236887054, + "loss": 1.5332, + "step": 9082 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018717254697299003, + "loss": 1.4199, + "step": 9083 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001870047377420928, + "loss": 1.4824, + "step": 9084 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018683699600994576, + "loss": 1.5527, + "step": 9085 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018666932179047603, + "loss": 1.6777, + "step": 9086 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001865017150976046, + "loss": 1.6602, + "step": 9087 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018633417594524692, + "loss": 1.5469, + "step": 9088 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018616670434731263, + "loss": 1.5391, + "step": 9089 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001859993003177066, + "loss": 1.5371, + "step": 9090 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018583196387032718, + "loss": 1.584, + "step": 9091 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018566469501906746, + "loss": 1.4824, + "step": 9092 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018549749377781466, + "loss": 1.4785, + "step": 9093 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018533036016045112, + "loss": 1.5742, + "step": 9094 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018516329418085276, + "loss": 1.5742, + "step": 9095 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018499629585289035, + "loss": 1.5371, + "step": 9096 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018482936519042859, + "loss": 1.5547, + "step": 9097 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018466250220732717, + "loss": 1.709, + "step": 9098 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018449570691743988, + "loss": 1.6562, + "step": 9099 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018432897933461467, + "loss": 1.5488, + "step": 9100 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001841623194726939, + "loss": 1.6465, + "step": 9101 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018399572734551496, + "loss": 1.5078, + "step": 9102 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018382920296690876, + "loss": 1.4883, + "step": 9103 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018366274635070112, + "loss": 1.6504, + "step": 9104 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001834963575107117, + "loss": 1.7285, + "step": 9105 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018333003646075552, + "loss": 1.4941, + "step": 9106 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001831637832146409, + "loss": 1.5859, + "step": 9107 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001829975977861712, + "loss": 1.5508, + "step": 9108 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001828314801891434, + "loss": 1.5879, + "step": 9109 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001826654304373503, + "loss": 1.6172, + "step": 9110 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018249944854457746, + "loss": 1.5605, + "step": 9111 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018233353452460567, + "loss": 1.6641, + "step": 9112 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018216768839120968, + "loss": 1.543, + "step": 9113 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018200191015815936, + "loss": 1.4902, + "step": 9114 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018183619983921806, + "loss": 1.5723, + "step": 9115 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018167055744814387, + "loss": 1.5273, + "step": 9116 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018150498299868901, + "loss": 1.6816, + "step": 9117 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001813394765046007, + "loss": 1.6562, + "step": 9118 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018117403797961984, + "loss": 1.5625, + "step": 9119 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018100866743748202, + "loss": 1.5566, + "step": 9120 + }, + { + "epoch": 0.81, + "learning_rate": 0.000180843364891917, + "loss": 1.5742, + "step": 9121 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018067813035664893, + "loss": 1.6094, + "step": 9122 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001805129638453965, + "loss": 1.5781, + "step": 9123 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001803478653718723, + "loss": 1.5195, + "step": 9124 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018018283494978415, + "loss": 1.6074, + "step": 9125 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001800178725928333, + "loss": 1.6504, + "step": 9126 + }, + { + "epoch": 0.81, + "learning_rate": 0.00017985297831471593, + "loss": 1.7441, + "step": 9127 + }, + { + "epoch": 0.81, + "learning_rate": 0.00017968815212912205, + "loss": 1.3672, + "step": 9128 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001795233940497364, + "loss": 1.6094, + "step": 9129 + }, + { + "epoch": 0.81, + "learning_rate": 0.00017935870409023814, + "loss": 1.502, + "step": 9130 + }, + { + "epoch": 0.81, + "learning_rate": 0.00017919408226430068, + "loss": 1.6094, + "step": 9131 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001790295285855912, + "loss": 1.541, + "step": 9132 + }, + { + "epoch": 0.81, + "learning_rate": 0.00017886504306777241, + "loss": 1.5938, + "step": 9133 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001787006257245003, + "loss": 1.6406, + "step": 9134 + }, + { + "epoch": 0.81, + "learning_rate": 0.00017853627656942573, + "loss": 1.6035, + "step": 9135 + }, + { + "epoch": 0.81, + "learning_rate": 0.00017837199561619344, + "loss": 1.5859, + "step": 9136 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001782077828784433, + "loss": 1.5703, + "step": 9137 + }, + { + "epoch": 0.81, + "learning_rate": 0.00017804363836980874, + "loss": 1.5957, + "step": 9138 + }, + { + "epoch": 0.81, + "learning_rate": 0.00017787956210391797, + "loss": 1.6172, + "step": 9139 + }, + { + "epoch": 0.81, + "learning_rate": 0.00017771555409439298, + "loss": 1.6484, + "step": 9140 + }, + { + "epoch": 0.81, + "learning_rate": 0.000177551614354851, + "loss": 1.6445, + "step": 9141 + }, + { + "epoch": 0.81, + "learning_rate": 0.00017738774289890303, + "loss": 1.7305, + "step": 9142 + }, + { + "epoch": 0.81, + "learning_rate": 0.00017722393974015427, + "loss": 1.5742, + "step": 9143 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001770602048922042, + "loss": 1.582, + "step": 9144 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001768965383686475, + "loss": 1.582, + "step": 9145 + }, + { + "epoch": 0.81, + "learning_rate": 0.00017673294018307218, + "loss": 1.5898, + "step": 9146 + }, + { + "epoch": 0.81, + "learning_rate": 0.00017656941034906106, + "loss": 1.5215, + "step": 9147 + }, + { + "epoch": 0.81, + "learning_rate": 0.00017640594888019068, + "loss": 1.5293, + "step": 9148 + }, + { + "epoch": 0.81, + "learning_rate": 0.00017624255579003323, + "loss": 1.5879, + "step": 9149 + }, + { + "epoch": 0.81, + "learning_rate": 0.00017607923109215384, + "loss": 1.5664, + "step": 9150 + }, + { + "epoch": 0.81, + "learning_rate": 0.00017591597480011269, + "loss": 1.5625, + "step": 9151 + }, + { + "epoch": 0.81, + "learning_rate": 0.00017575278692746367, + "loss": 1.5566, + "step": 9152 + }, + { + "epoch": 0.81, + "learning_rate": 0.00017558966748775607, + "loss": 1.5234, + "step": 9153 + }, + { + "epoch": 0.81, + "learning_rate": 0.00017542661649453263, + "loss": 1.5918, + "step": 9154 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001752636339613305, + "loss": 1.5859, + "step": 9155 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001751007199016813, + "loss": 1.4961, + "step": 9156 + }, + { + "epoch": 0.81, + "learning_rate": 0.00017493787432911067, + "loss": 1.5254, + "step": 9157 + }, + { + "epoch": 0.81, + "learning_rate": 0.00017477509725713936, + "loss": 1.6523, + "step": 9158 + }, + { + "epoch": 0.81, + "learning_rate": 0.00017461238869928165, + "loss": 1.6406, + "step": 9159 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001744497486690464, + "loss": 1.5449, + "step": 9160 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001742871771799367, + "loss": 1.5859, + "step": 9161 + }, + { + "epoch": 0.82, + "learning_rate": 0.00017412467424545008, + "loss": 1.5352, + "step": 9162 + }, + { + "epoch": 0.82, + "learning_rate": 0.00017396223987907822, + "loss": 1.4883, + "step": 9163 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001737998740943071, + "loss": 1.5586, + "step": 9164 + }, + { + "epoch": 0.82, + "learning_rate": 0.00017363757690461745, + "loss": 1.5234, + "step": 9165 + }, + { + "epoch": 0.82, + "learning_rate": 0.00017347534832348377, + "loss": 1.5312, + "step": 9166 + }, + { + "epoch": 0.82, + "learning_rate": 0.00017331318836437515, + "loss": 1.625, + "step": 9167 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001731510970407544, + "loss": 1.6465, + "step": 9168 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001729890743660798, + "loss": 1.623, + "step": 9169 + }, + { + "epoch": 0.82, + "learning_rate": 0.00017282712035380287, + "loss": 1.5762, + "step": 9170 + }, + { + "epoch": 0.82, + "learning_rate": 0.00017266523501736997, + "loss": 1.6152, + "step": 9171 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001725034183702212, + "loss": 1.6465, + "step": 9172 + }, + { + "epoch": 0.82, + "learning_rate": 0.00017234167042579196, + "loss": 1.5566, + "step": 9173 + }, + { + "epoch": 0.82, + "learning_rate": 0.00017217999119751104, + "loss": 1.5762, + "step": 9174 + }, + { + "epoch": 0.82, + "learning_rate": 0.00017201838069880182, + "loss": 1.668, + "step": 9175 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001718568389430817, + "loss": 1.625, + "step": 9176 + }, + { + "epoch": 0.82, + "learning_rate": 0.00017169536594376311, + "loss": 1.5352, + "step": 9177 + }, + { + "epoch": 0.82, + "learning_rate": 0.00017153396171425218, + "loss": 1.5918, + "step": 9178 + }, + { + "epoch": 0.82, + "learning_rate": 0.00017137262626794935, + "loss": 1.5137, + "step": 9179 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001712113596182493, + "loss": 1.6074, + "step": 9180 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001710501617785416, + "loss": 1.4551, + "step": 9181 + }, + { + "epoch": 0.82, + "learning_rate": 0.00017088903276220947, + "loss": 1.5918, + "step": 9182 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001707279725826306, + "loss": 1.4805, + "step": 9183 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001705669812531766, + "loss": 1.5254, + "step": 9184 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001704060587872144, + "loss": 1.6074, + "step": 9185 + }, + { + "epoch": 0.82, + "learning_rate": 0.00017024520519810416, + "loss": 1.5117, + "step": 9186 + }, + { + "epoch": 0.82, + "learning_rate": 0.00017008442049920092, + "loss": 1.4004, + "step": 9187 + }, + { + "epoch": 0.82, + "learning_rate": 0.00016992370470385332, + "loss": 1.6152, + "step": 9188 + }, + { + "epoch": 0.82, + "learning_rate": 0.00016976305782540535, + "loss": 1.5684, + "step": 9189 + }, + { + "epoch": 0.82, + "learning_rate": 0.00016960247987719447, + "loss": 1.6328, + "step": 9190 + }, + { + "epoch": 0.82, + "learning_rate": 0.00016944197087255254, + "loss": 1.6016, + "step": 9191 + }, + { + "epoch": 0.82, + "learning_rate": 0.00016928153082480557, + "loss": 1.6426, + "step": 9192 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001691211597472747, + "loss": 1.459, + "step": 9193 + }, + { + "epoch": 0.82, + "learning_rate": 0.00016896085765327417, + "loss": 1.5977, + "step": 9194 + }, + { + "epoch": 0.82, + "learning_rate": 0.00016880062455611322, + "loss": 1.5645, + "step": 9195 + }, + { + "epoch": 0.82, + "learning_rate": 0.00016864046046909497, + "loss": 1.6602, + "step": 9196 + }, + { + "epoch": 0.82, + "learning_rate": 0.00016848036540551735, + "loss": 1.5957, + "step": 9197 + }, + { + "epoch": 0.82, + "learning_rate": 0.00016832033937867196, + "loss": 1.666, + "step": 9198 + }, + { + "epoch": 0.82, + "learning_rate": 0.00016816038240184518, + "loss": 1.6035, + "step": 9199 + }, + { + "epoch": 0.82, + "learning_rate": 0.000168000494488317, + "loss": 1.498, + "step": 9200 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001678406756513624, + "loss": 1.5234, + "step": 9201 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001676809259042502, + "loss": 1.4883, + "step": 9202 + }, + { + "epoch": 0.82, + "learning_rate": 0.00016752124526024336, + "loss": 1.4902, + "step": 9203 + }, + { + "epoch": 0.82, + "learning_rate": 0.00016736163373259983, + "loss": 1.4062, + "step": 9204 + }, + { + "epoch": 0.82, + "learning_rate": 0.00016720209133457099, + "loss": 1.5742, + "step": 9205 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001670426180794029, + "loss": 1.5312, + "step": 9206 + }, + { + "epoch": 0.82, + "learning_rate": 0.00016688321398033578, + "loss": 1.6523, + "step": 9207 + }, + { + "epoch": 0.82, + "learning_rate": 0.00016672387905060382, + "loss": 1.5508, + "step": 9208 + }, + { + "epoch": 0.82, + "learning_rate": 0.00016656461330343632, + "loss": 1.498, + "step": 9209 + }, + { + "epoch": 0.82, + "learning_rate": 0.000166405416752056, + "loss": 1.582, + "step": 9210 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001662462894096801, + "loss": 1.5645, + "step": 9211 + }, + { + "epoch": 0.82, + "learning_rate": 0.00016608723128951996, + "loss": 1.6133, + "step": 9212 + }, + { + "epoch": 0.82, + "learning_rate": 0.00016592824240478177, + "loss": 1.5488, + "step": 9213 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001657693227686654, + "loss": 1.625, + "step": 9214 + }, + { + "epoch": 0.82, + "learning_rate": 0.00016561047239436466, + "loss": 1.5352, + "step": 9215 + }, + { + "epoch": 0.82, + "learning_rate": 0.00016545169129506877, + "loss": 1.4473, + "step": 9216 + }, + { + "epoch": 0.82, + "learning_rate": 0.00016529297948396016, + "loss": 1.5254, + "step": 9217 + }, + { + "epoch": 0.82, + "learning_rate": 0.00016513433697421586, + "loss": 1.5215, + "step": 9218 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001649757637790069, + "loss": 1.5195, + "step": 9219 + }, + { + "epoch": 0.82, + "learning_rate": 0.00016481725991149932, + "loss": 1.6738, + "step": 9220 + }, + { + "epoch": 0.82, + "learning_rate": 0.00016465882538485243, + "loss": 1.4727, + "step": 9221 + }, + { + "epoch": 0.82, + "learning_rate": 0.00016450046021222042, + "loss": 1.6387, + "step": 9222 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001643421644067512, + "loss": 1.709, + "step": 9223 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001641839379815878, + "loss": 1.5449, + "step": 9224 + }, + { + "epoch": 0.82, + "learning_rate": 0.00016402578094986653, + "loss": 1.668, + "step": 9225 + }, + { + "epoch": 0.82, + "learning_rate": 0.00016386769332471851, + "loss": 1.4629, + "step": 9226 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001637096751192686, + "loss": 1.6914, + "step": 9227 + }, + { + "epoch": 0.82, + "learning_rate": 0.00016355172634663673, + "loss": 1.4141, + "step": 9228 + }, + { + "epoch": 0.82, + "learning_rate": 0.00016339384701993633, + "loss": 1.5488, + "step": 9229 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001632360371522752, + "loss": 1.4883, + "step": 9230 + }, + { + "epoch": 0.82, + "learning_rate": 0.00016307829675675544, + "loss": 1.5742, + "step": 9231 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001629206258464737, + "loss": 1.6445, + "step": 9232 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001627630244345204, + "loss": 1.5703, + "step": 9233 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001626054925339804, + "loss": 1.5879, + "step": 9234 + }, + { + "epoch": 0.82, + "learning_rate": 0.00016244803015793243, + "loss": 1.5566, + "step": 9235 + }, + { + "epoch": 0.82, + "learning_rate": 0.00016229063731945038, + "loss": 1.6191, + "step": 9236 + }, + { + "epoch": 0.82, + "learning_rate": 0.00016213331403160125, + "loss": 1.6328, + "step": 9237 + }, + { + "epoch": 0.82, + "learning_rate": 0.00016197606030744706, + "loss": 1.6094, + "step": 9238 + }, + { + "epoch": 0.82, + "learning_rate": 0.00016181887616004365, + "loss": 1.6523, + "step": 9239 + }, + { + "epoch": 0.82, + "learning_rate": 0.00016166176160244128, + "loss": 1.6113, + "step": 9240 + }, + { + "epoch": 0.82, + "learning_rate": 0.00016150471664768418, + "loss": 1.418, + "step": 9241 + }, + { + "epoch": 0.82, + "learning_rate": 0.00016134774130881102, + "loss": 1.5703, + "step": 9242 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001611908355988545, + "loss": 1.6387, + "step": 9243 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001610339995308422, + "loss": 1.7637, + "step": 9244 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001608772331177949, + "loss": 1.6094, + "step": 9245 + }, + { + "epoch": 0.82, + "learning_rate": 0.00016072053637272832, + "loss": 1.625, + "step": 9246 + }, + { + "epoch": 0.82, + "learning_rate": 0.00016056390930865194, + "loss": 1.5273, + "step": 9247 + }, + { + "epoch": 0.82, + "learning_rate": 0.00016040735193857003, + "loss": 1.4746, + "step": 9248 + }, + { + "epoch": 0.82, + "learning_rate": 0.00016025086427548063, + "loss": 1.4277, + "step": 9249 + }, + { + "epoch": 0.82, + "learning_rate": 0.000160094446332376, + "loss": 1.5664, + "step": 9250 + }, + { + "epoch": 0.82, + "learning_rate": 0.00015993809812224247, + "loss": 1.4707, + "step": 9251 + }, + { + "epoch": 0.82, + "learning_rate": 0.00015978181965806137, + "loss": 1.5547, + "step": 9252 + }, + { + "epoch": 0.82, + "learning_rate": 0.00015962561095280738, + "loss": 1.7852, + "step": 9253 + }, + { + "epoch": 0.82, + "learning_rate": 0.00015946947201944973, + "loss": 1.4258, + "step": 9254 + }, + { + "epoch": 0.82, + "learning_rate": 0.00015931340287095154, + "loss": 1.5312, + "step": 9255 + }, + { + "epoch": 0.82, + "learning_rate": 0.00015915740352027085, + "loss": 1.6172, + "step": 9256 + }, + { + "epoch": 0.82, + "learning_rate": 0.00015900147398035936, + "loss": 1.5566, + "step": 9257 + }, + { + "epoch": 0.82, + "learning_rate": 0.000158845614264163, + "loss": 1.4453, + "step": 9258 + }, + { + "epoch": 0.82, + "learning_rate": 0.00015868982438462176, + "loss": 1.666, + "step": 9259 + }, + { + "epoch": 0.82, + "learning_rate": 0.00015853410435467052, + "loss": 1.7832, + "step": 9260 + }, + { + "epoch": 0.82, + "learning_rate": 0.00015837845418723774, + "loss": 1.6367, + "step": 9261 + }, + { + "epoch": 0.82, + "learning_rate": 0.00015822287389524614, + "loss": 1.5664, + "step": 9262 + }, + { + "epoch": 0.82, + "learning_rate": 0.00015806736349161256, + "loss": 1.5332, + "step": 9263 + }, + { + "epoch": 0.82, + "learning_rate": 0.00015791192298924872, + "loss": 1.5469, + "step": 9264 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001577565524010598, + "loss": 1.5352, + "step": 9265 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001576012517399452, + "loss": 1.543, + "step": 9266 + }, + { + "epoch": 0.82, + "learning_rate": 0.00015744602101879879, + "loss": 1.5371, + "step": 9267 + }, + { + "epoch": 0.82, + "learning_rate": 0.00015729086025050888, + "loss": 1.5332, + "step": 9268 + }, + { + "epoch": 0.82, + "learning_rate": 0.00015713576944795748, + "loss": 1.6289, + "step": 9269 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001569807486240209, + "loss": 1.5215, + "step": 9270 + }, + { + "epoch": 0.82, + "learning_rate": 0.00015682579779156946, + "loss": 1.6602, + "step": 9271 + }, + { + "epoch": 0.82, + "learning_rate": 0.00015667091696346846, + "loss": 1.6328, + "step": 9272 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001565161061525766, + "loss": 1.5645, + "step": 9273 + }, + { + "epoch": 0.83, + "learning_rate": 0.00015636136537174695, + "loss": 1.5176, + "step": 9274 + }, + { + "epoch": 0.83, + "learning_rate": 0.00015620669463382674, + "loss": 1.5586, + "step": 9275 + }, + { + "epoch": 0.83, + "learning_rate": 0.00015605209395165776, + "loss": 1.5566, + "step": 9276 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001558975633380756, + "loss": 1.6035, + "step": 9277 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001557431028059101, + "loss": 1.4863, + "step": 9278 + }, + { + "epoch": 0.83, + "learning_rate": 0.00015558871236798523, + "loss": 1.6426, + "step": 9279 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001554343920371193, + "loss": 1.6074, + "step": 9280 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001552801418261247, + "loss": 1.6348, + "step": 9281 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001551259617478078, + "loss": 1.6328, + "step": 9282 + }, + { + "epoch": 0.83, + "learning_rate": 0.00015497185181496975, + "loss": 1.6055, + "step": 9283 + }, + { + "epoch": 0.83, + "learning_rate": 0.00015481781204040534, + "loss": 1.6445, + "step": 9284 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001546638424369038, + "loss": 1.6113, + "step": 9285 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001545099430172482, + "loss": 1.6758, + "step": 9286 + }, + { + "epoch": 0.83, + "learning_rate": 0.000154356113794216, + "loss": 1.5488, + "step": 9287 + }, + { + "epoch": 0.83, + "learning_rate": 0.00015420235478057898, + "loss": 1.502, + "step": 9288 + }, + { + "epoch": 0.83, + "learning_rate": 0.00015404866598910306, + "loss": 1.6836, + "step": 9289 + }, + { + "epoch": 0.83, + "learning_rate": 0.00015389504743254802, + "loss": 1.4785, + "step": 9290 + }, + { + "epoch": 0.83, + "learning_rate": 0.00015374149912366787, + "loss": 1.5137, + "step": 9291 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001535880210752113, + "loss": 1.5137, + "step": 9292 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001534346132999206, + "loss": 1.5605, + "step": 9293 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001532812758105324, + "loss": 1.4922, + "step": 9294 + }, + { + "epoch": 0.83, + "learning_rate": 0.00015312800861977727, + "loss": 1.4199, + "step": 9295 + }, + { + "epoch": 0.83, + "learning_rate": 0.00015297481174038075, + "loss": 1.625, + "step": 9296 + }, + { + "epoch": 0.83, + "learning_rate": 0.00015282168518506155, + "loss": 1.5938, + "step": 9297 + }, + { + "epoch": 0.83, + "learning_rate": 0.000152668628966533, + "loss": 1.5117, + "step": 9298 + }, + { + "epoch": 0.83, + "learning_rate": 0.00015251564309750275, + "loss": 1.5176, + "step": 9299 + }, + { + "epoch": 0.83, + "learning_rate": 0.00015236272759067238, + "loss": 1.5508, + "step": 9300 + }, + { + "epoch": 0.83, + "learning_rate": 0.00015220988245873767, + "loss": 1.6191, + "step": 9301 + }, + { + "epoch": 0.83, + "learning_rate": 0.00015205710771438817, + "loss": 1.5918, + "step": 9302 + }, + { + "epoch": 0.83, + "learning_rate": 0.00015190440337030852, + "loss": 1.6289, + "step": 9303 + }, + { + "epoch": 0.83, + "learning_rate": 0.00015175176943917667, + "loss": 1.6113, + "step": 9304 + }, + { + "epoch": 0.83, + "learning_rate": 0.00015159920593366515, + "loss": 1.5527, + "step": 9305 + }, + { + "epoch": 0.83, + "learning_rate": 0.00015144671286644018, + "loss": 1.541, + "step": 9306 + }, + { + "epoch": 0.83, + "learning_rate": 0.00015129429025016294, + "loss": 1.5254, + "step": 9307 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001511419380974881, + "loss": 1.4766, + "step": 9308 + }, + { + "epoch": 0.83, + "learning_rate": 0.00015098965642106456, + "loss": 1.7266, + "step": 9309 + }, + { + "epoch": 0.83, + "learning_rate": 0.00015083744523353536, + "loss": 1.5527, + "step": 9310 + }, + { + "epoch": 0.83, + "learning_rate": 0.00015068530454753816, + "loss": 1.6719, + "step": 9311 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001505332343757042, + "loss": 1.7246, + "step": 9312 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001503812347306591, + "loss": 1.5859, + "step": 9313 + }, + { + "epoch": 0.83, + "learning_rate": 0.00015022930562502246, + "loss": 1.5957, + "step": 9314 + }, + { + "epoch": 0.83, + "learning_rate": 0.00015007744707140846, + "loss": 1.5352, + "step": 9315 + }, + { + "epoch": 0.83, + "learning_rate": 0.00014992565908242495, + "loss": 1.5957, + "step": 9316 + }, + { + "epoch": 0.83, + "learning_rate": 0.00014977394167067415, + "loss": 1.6172, + "step": 9317 + }, + { + "epoch": 0.83, + "learning_rate": 0.00014962229484875224, + "loss": 1.5312, + "step": 9318 + }, + { + "epoch": 0.83, + "learning_rate": 0.00014947071862924977, + "loss": 1.5898, + "step": 9319 + }, + { + "epoch": 0.83, + "learning_rate": 0.00014931921302475127, + "loss": 1.6289, + "step": 9320 + }, + { + "epoch": 0.83, + "learning_rate": 0.00014916777804783554, + "loss": 1.6172, + "step": 9321 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001490164137110751, + "loss": 1.4395, + "step": 9322 + }, + { + "epoch": 0.83, + "learning_rate": 0.00014886512002703733, + "loss": 1.6934, + "step": 9323 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001487138970082834, + "loss": 1.707, + "step": 9324 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001485627446673683, + "loss": 1.6465, + "step": 9325 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001484116630168414, + "loss": 1.623, + "step": 9326 + }, + { + "epoch": 0.83, + "learning_rate": 0.00014826065206924644, + "loss": 1.5723, + "step": 9327 + }, + { + "epoch": 0.83, + "learning_rate": 0.00014810971183712107, + "loss": 1.5176, + "step": 9328 + }, + { + "epoch": 0.83, + "learning_rate": 0.00014795884233299695, + "loss": 1.5059, + "step": 9329 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001478080435693998, + "loss": 1.582, + "step": 9330 + }, + { + "epoch": 0.83, + "learning_rate": 0.00014765731555885009, + "loss": 1.6367, + "step": 9331 + }, + { + "epoch": 0.83, + "learning_rate": 0.00014750665831386167, + "loss": 1.5762, + "step": 9332 + }, + { + "epoch": 0.83, + "learning_rate": 0.000147356071846943, + "loss": 1.6094, + "step": 9333 + }, + { + "epoch": 0.83, + "learning_rate": 0.00014720555617059617, + "loss": 1.7559, + "step": 9334 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001470551112973181, + "loss": 1.5957, + "step": 9335 + }, + { + "epoch": 0.83, + "learning_rate": 0.00014690473723959929, + "loss": 1.6309, + "step": 9336 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001467544340099246, + "loss": 1.625, + "step": 9337 + }, + { + "epoch": 0.83, + "learning_rate": 0.00014660420162077248, + "loss": 1.6094, + "step": 9338 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001464540400846166, + "loss": 1.6445, + "step": 9339 + }, + { + "epoch": 0.83, + "learning_rate": 0.00014630394941392367, + "loss": 1.666, + "step": 9340 + }, + { + "epoch": 0.83, + "learning_rate": 0.00014615392962115514, + "loss": 1.6465, + "step": 9341 + }, + { + "epoch": 0.83, + "learning_rate": 0.00014600398071876598, + "loss": 1.5098, + "step": 9342 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001458541027192062, + "loss": 1.5391, + "step": 9343 + }, + { + "epoch": 0.83, + "learning_rate": 0.00014570429563491905, + "loss": 1.4941, + "step": 9344 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001455545594783424, + "loss": 1.5801, + "step": 9345 + }, + { + "epoch": 0.83, + "learning_rate": 0.00014540489426190774, + "loss": 1.5625, + "step": 9346 + }, + { + "epoch": 0.83, + "learning_rate": 0.00014525529999804144, + "loss": 1.6895, + "step": 9347 + }, + { + "epoch": 0.83, + "learning_rate": 0.00014510577669916337, + "loss": 1.5664, + "step": 9348 + }, + { + "epoch": 0.83, + "learning_rate": 0.00014495632437768757, + "loss": 1.5723, + "step": 9349 + }, + { + "epoch": 0.83, + "learning_rate": 0.00014480694304602217, + "loss": 1.6035, + "step": 9350 + }, + { + "epoch": 0.83, + "learning_rate": 0.00014465763271656983, + "loss": 1.5273, + "step": 9351 + }, + { + "epoch": 0.83, + "learning_rate": 0.00014450839340172695, + "loss": 1.5098, + "step": 9352 + }, + { + "epoch": 0.83, + "learning_rate": 0.00014435922511388388, + "loss": 1.5723, + "step": 9353 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001442101278654253, + "loss": 1.5137, + "step": 9354 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001440611016687302, + "loss": 1.5762, + "step": 9355 + }, + { + "epoch": 0.83, + "learning_rate": 0.00014391214653617146, + "loss": 1.6426, + "step": 9356 + }, + { + "epoch": 0.83, + "learning_rate": 0.00014376326248011583, + "loss": 1.6172, + "step": 9357 + }, + { + "epoch": 0.83, + "learning_rate": 0.00014361444951292447, + "loss": 1.7168, + "step": 9358 + }, + { + "epoch": 0.83, + "learning_rate": 0.00014346570764695255, + "loss": 1.6016, + "step": 9359 + }, + { + "epoch": 0.83, + "learning_rate": 0.00014331703689454933, + "loss": 1.584, + "step": 9360 + }, + { + "epoch": 0.83, + "learning_rate": 0.00014316843726805794, + "loss": 1.5508, + "step": 9361 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001430199087798162, + "loss": 1.5293, + "step": 9362 + }, + { + "epoch": 0.83, + "learning_rate": 0.00014287145144215553, + "loss": 1.5371, + "step": 9363 + }, + { + "epoch": 0.83, + "learning_rate": 0.00014272306526740152, + "loss": 1.6172, + "step": 9364 + }, + { + "epoch": 0.83, + "learning_rate": 0.00014257475026787403, + "loss": 1.6562, + "step": 9365 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001424265064558864, + "loss": 1.5898, + "step": 9366 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001422783338437472, + "loss": 1.5156, + "step": 9367 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001421302324437581, + "loss": 1.5078, + "step": 9368 + }, + { + "epoch": 0.83, + "learning_rate": 0.00014198220226821522, + "loss": 1.5664, + "step": 9369 + }, + { + "epoch": 0.83, + "learning_rate": 0.00014183424332940854, + "loss": 1.5254, + "step": 9370 + }, + { + "epoch": 0.83, + "learning_rate": 0.00014168635563962262, + "loss": 1.5488, + "step": 9371 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001415385392111358, + "loss": 1.6348, + "step": 9372 + }, + { + "epoch": 0.83, + "learning_rate": 0.00014139079405622035, + "loss": 1.5586, + "step": 9373 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001412431201871426, + "loss": 1.5801, + "step": 9374 + }, + { + "epoch": 0.83, + "learning_rate": 0.00014109551761616357, + "loss": 1.7656, + "step": 9375 + }, + { + "epoch": 0.83, + "learning_rate": 0.00014094798635553785, + "loss": 1.4922, + "step": 9376 + }, + { + "epoch": 0.83, + "learning_rate": 0.00014080052641751394, + "loss": 1.6445, + "step": 9377 + }, + { + "epoch": 0.83, + "learning_rate": 0.00014065313781433454, + "loss": 1.459, + "step": 9378 + }, + { + "epoch": 0.83, + "learning_rate": 0.00014050582055823713, + "loss": 1.6309, + "step": 9379 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001403585746614523, + "loss": 1.6016, + "step": 9380 + }, + { + "epoch": 0.83, + "learning_rate": 0.00014021140013620526, + "loss": 1.5859, + "step": 9381 + }, + { + "epoch": 0.83, + "learning_rate": 0.00014006429699471478, + "loss": 1.6562, + "step": 9382 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013991726524919458, + "loss": 1.6055, + "step": 9383 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001397703049118517, + "loss": 1.5508, + "step": 9384 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013962341599488726, + "loss": 1.5391, + "step": 9385 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013947659851049722, + "loss": 1.5508, + "step": 9386 + }, + { + "epoch": 0.84, + "learning_rate": 0.00013932985247087072, + "loss": 1.6426, + "step": 9387 + }, + { + "epoch": 0.84, + "learning_rate": 0.00013918317788819135, + "loss": 1.5664, + "step": 9388 + }, + { + "epoch": 0.84, + "learning_rate": 0.00013903657477463648, + "loss": 1.541, + "step": 9389 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001388900431423783, + "loss": 1.5742, + "step": 9390 + }, + { + "epoch": 0.84, + "learning_rate": 0.00013874358300358235, + "loss": 1.6309, + "step": 9391 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001385971943704084, + "loss": 1.6152, + "step": 9392 + }, + { + "epoch": 0.84, + "learning_rate": 0.00013845087725501026, + "loss": 1.6113, + "step": 9393 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001383046316695361, + "loss": 1.6953, + "step": 9394 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001381584576261279, + "loss": 1.6445, + "step": 9395 + }, + { + "epoch": 0.84, + "learning_rate": 0.00013801235513692146, + "loss": 1.4746, + "step": 9396 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001378663242140472, + "loss": 1.4141, + "step": 9397 + }, + { + "epoch": 0.84, + "learning_rate": 0.00013772036486962925, + "loss": 1.5879, + "step": 9398 + }, + { + "epoch": 0.84, + "learning_rate": 0.00013757447711578564, + "loss": 1.5977, + "step": 9399 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001374286609646289, + "loss": 1.4609, + "step": 9400 + }, + { + "epoch": 0.84, + "learning_rate": 0.00013728291642826508, + "loss": 1.5, + "step": 9401 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001371372435187951, + "loss": 1.6328, + "step": 9402 + }, + { + "epoch": 0.84, + "learning_rate": 0.00013699164224831295, + "loss": 1.541, + "step": 9403 + }, + { + "epoch": 0.84, + "learning_rate": 0.00013684611262890746, + "loss": 1.4609, + "step": 9404 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001367006546726608, + "loss": 1.5879, + "step": 9405 + }, + { + "epoch": 0.84, + "learning_rate": 0.00013655526839165011, + "loss": 1.6055, + "step": 9406 + }, + { + "epoch": 0.84, + "learning_rate": 0.00013640995379794575, + "loss": 1.6699, + "step": 9407 + }, + { + "epoch": 0.84, + "learning_rate": 0.00013626471090361235, + "loss": 1.5918, + "step": 9408 + }, + { + "epoch": 0.84, + "learning_rate": 0.00013611953972070867, + "loss": 1.5684, + "step": 9409 + }, + { + "epoch": 0.84, + "learning_rate": 0.00013597444026128793, + "loss": 1.5234, + "step": 9410 + }, + { + "epoch": 0.84, + "learning_rate": 0.00013582941253739657, + "loss": 1.5703, + "step": 9411 + }, + { + "epoch": 0.84, + "learning_rate": 0.00013568445656107563, + "loss": 1.5117, + "step": 9412 + }, + { + "epoch": 0.84, + "learning_rate": 0.00013553957234435976, + "loss": 1.6094, + "step": 9413 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001353947598992785, + "loss": 1.5508, + "step": 9414 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001352500192378545, + "loss": 1.5352, + "step": 9415 + }, + { + "epoch": 0.84, + "learning_rate": 0.00013510535037210492, + "loss": 1.6074, + "step": 9416 + }, + { + "epoch": 0.84, + "learning_rate": 0.00013496075331404056, + "loss": 1.4902, + "step": 9417 + }, + { + "epoch": 0.84, + "learning_rate": 0.00013481622807566708, + "loss": 1.584, + "step": 9418 + }, + { + "epoch": 0.84, + "learning_rate": 0.00013467177466898328, + "loss": 1.5938, + "step": 9419 + }, + { + "epoch": 0.84, + "learning_rate": 0.00013452739310598262, + "loss": 1.7051, + "step": 9420 + }, + { + "epoch": 0.84, + "learning_rate": 0.00013438308339865202, + "loss": 1.6055, + "step": 9421 + }, + { + "epoch": 0.84, + "learning_rate": 0.00013423884555897303, + "loss": 1.4746, + "step": 9422 + }, + { + "epoch": 0.84, + "learning_rate": 0.00013409467959892108, + "loss": 1.584, + "step": 9423 + }, + { + "epoch": 0.84, + "learning_rate": 0.00013395058553046524, + "loss": 1.5137, + "step": 9424 + }, + { + "epoch": 0.84, + "learning_rate": 0.00013380656336556873, + "loss": 1.5449, + "step": 9425 + }, + { + "epoch": 0.84, + "learning_rate": 0.00013366261311618943, + "loss": 1.4824, + "step": 9426 + }, + { + "epoch": 0.84, + "learning_rate": 0.00013351873479427857, + "loss": 1.5938, + "step": 9427 + }, + { + "epoch": 0.84, + "learning_rate": 0.00013337492841178166, + "loss": 1.5234, + "step": 9428 + }, + { + "epoch": 0.84, + "learning_rate": 0.00013323119398063788, + "loss": 1.6328, + "step": 9429 + }, + { + "epoch": 0.84, + "learning_rate": 0.00013308753151278118, + "loss": 1.6562, + "step": 9430 + }, + { + "epoch": 0.84, + "learning_rate": 0.00013294394102013896, + "loss": 1.627, + "step": 9431 + }, + { + "epoch": 0.84, + "learning_rate": 0.00013280042251463277, + "loss": 1.6133, + "step": 9432 + }, + { + "epoch": 0.84, + "learning_rate": 0.00013265697600817794, + "loss": 1.5352, + "step": 9433 + }, + { + "epoch": 0.84, + "learning_rate": 0.00013251360151268453, + "loss": 1.6641, + "step": 9434 + }, + { + "epoch": 0.84, + "learning_rate": 0.00013237029904005605, + "loss": 1.5508, + "step": 9435 + }, + { + "epoch": 0.84, + "learning_rate": 0.00013222706860219002, + "loss": 1.5957, + "step": 9436 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001320839102109781, + "loss": 1.4746, + "step": 9437 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001319408238783062, + "loss": 1.5566, + "step": 9438 + }, + { + "epoch": 0.84, + "learning_rate": 0.00013179780961605382, + "loss": 1.502, + "step": 9439 + }, + { + "epoch": 0.84, + "learning_rate": 0.00013165486743609456, + "loss": 1.6035, + "step": 9440 + }, + { + "epoch": 0.84, + "learning_rate": 0.00013151199735029662, + "loss": 1.5957, + "step": 9441 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001313691993705215, + "loss": 1.5996, + "step": 9442 + }, + { + "epoch": 0.84, + "learning_rate": 0.00013122647350862494, + "loss": 1.5645, + "step": 9443 + }, + { + "epoch": 0.84, + "learning_rate": 0.00013108381977645667, + "loss": 1.5312, + "step": 9444 + }, + { + "epoch": 0.84, + "learning_rate": 0.00013094123818586044, + "loss": 1.5645, + "step": 9445 + }, + { + "epoch": 0.84, + "learning_rate": 0.00013079872874867437, + "loss": 1.4766, + "step": 9446 + }, + { + "epoch": 0.84, + "learning_rate": 0.00013065629147673009, + "loss": 1.5488, + "step": 9447 + }, + { + "epoch": 0.84, + "learning_rate": 0.00013051392638185343, + "loss": 1.6191, + "step": 9448 + }, + { + "epoch": 0.84, + "learning_rate": 0.000130371633475864, + "loss": 1.6914, + "step": 9449 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001302294127705761, + "loss": 1.5801, + "step": 9450 + }, + { + "epoch": 0.84, + "learning_rate": 0.00013008726427779727, + "loss": 1.7324, + "step": 9451 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001299451880093293, + "loss": 1.5605, + "step": 9452 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001298031839769681, + "loss": 1.5371, + "step": 9453 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001296612521925037, + "loss": 1.6133, + "step": 9454 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012951939266771985, + "loss": 1.6719, + "step": 9455 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012937760541439436, + "loss": 1.582, + "step": 9456 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012923589044429884, + "loss": 1.5117, + "step": 9457 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012909424776919964, + "loss": 1.6426, + "step": 9458 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012895267740085649, + "loss": 1.4336, + "step": 9459 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012881117935102304, + "loss": 1.6445, + "step": 9460 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012866975363144705, + "loss": 1.5566, + "step": 9461 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001285284002538707, + "loss": 1.6504, + "step": 9462 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012838711923002965, + "loss": 1.6875, + "step": 9463 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001282459105716537, + "loss": 1.5527, + "step": 9464 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012810477429046652, + "loss": 1.5469, + "step": 9465 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012796371039818633, + "loss": 1.5898, + "step": 9466 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012782271890652463, + "loss": 1.5352, + "step": 9467 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001276817998271872, + "loss": 1.5898, + "step": 9468 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012754095317187396, + "loss": 1.5781, + "step": 9469 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012740017895227862, + "loss": 1.5605, + "step": 9470 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001272594771800889, + "loss": 1.7207, + "step": 9471 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012711884786698635, + "loss": 1.5723, + "step": 9472 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001269782910246471, + "loss": 1.5117, + "step": 9473 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012683780666474054, + "loss": 1.6289, + "step": 9474 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001266973947989306, + "loss": 1.6445, + "step": 9475 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012655705543887464, + "loss": 1.7324, + "step": 9476 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012641678859622453, + "loss": 1.6758, + "step": 9477 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012627659428262572, + "loss": 1.6953, + "step": 9478 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001261364725097177, + "loss": 1.6152, + "step": 9479 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012599642328913453, + "loss": 1.6367, + "step": 9480 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001258564466325035, + "loss": 1.5488, + "step": 9481 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012571654255144604, + "loss": 1.6074, + "step": 9482 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012557671105757785, + "loss": 1.5742, + "step": 9483 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012543695216250806, + "loss": 1.7012, + "step": 9484 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001252972658778405, + "loss": 1.6387, + "step": 9485 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001251576522151725, + "loss": 1.627, + "step": 9486 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001250181111860954, + "loss": 1.5996, + "step": 9487 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001248786428021943, + "loss": 1.6426, + "step": 9488 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012473924707504913, + "loss": 1.5645, + "step": 9489 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012459992401623278, + "loss": 1.5996, + "step": 9490 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001244606736373126, + "loss": 1.5684, + "step": 9491 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012432149594984965, + "loss": 1.5801, + "step": 9492 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001241823909653995, + "loss": 1.5781, + "step": 9493 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001240433586955111, + "loss": 1.5898, + "step": 9494 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012390439915172758, + "loss": 1.6699, + "step": 9495 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012376551234558576, + "loss": 1.6035, + "step": 9496 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012362669828861717, + "loss": 1.6641, + "step": 9497 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001234879569923466, + "loss": 1.5742, + "step": 9498 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012334928846829306, + "loss": 1.6719, + "step": 9499 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012321069272796915, + "loss": 1.4883, + "step": 9500 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012307216978288228, + "loss": 1.5293, + "step": 9501 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012293371964453305, + "loss": 1.5234, + "step": 9502 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012279534232441624, + "loss": 1.5977, + "step": 9503 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012265703783402026, + "loss": 1.5801, + "step": 9504 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012251880618482846, + "loss": 1.5762, + "step": 9505 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012238064738831722, + "loss": 1.6328, + "step": 9506 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012224256145595692, + "loss": 1.5664, + "step": 9507 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012210454839921227, + "loss": 1.5762, + "step": 9508 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012196660822954186, + "loss": 1.6582, + "step": 9509 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012182874095839813, + "loss": 1.5684, + "step": 9510 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012169094659722734, + "loss": 1.5195, + "step": 9511 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012155322515746981, + "loss": 1.4629, + "step": 9512 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012141557665056014, + "loss": 1.6035, + "step": 9513 + }, + { + "epoch": 0.85, + "learning_rate": 0.0001212780010879263, + "loss": 1.5879, + "step": 9514 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012114049848099062, + "loss": 1.6074, + "step": 9515 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012100306884116907, + "loss": 1.5137, + "step": 9516 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012086571217987185, + "loss": 1.5566, + "step": 9517 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012072842850850285, + "loss": 1.5586, + "step": 9518 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012059121783845994, + "loss": 1.5332, + "step": 9519 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012045408018113525, + "loss": 1.6719, + "step": 9520 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012031701554791453, + "loss": 1.5156, + "step": 9521 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012018002395017746, + "loss": 1.6172, + "step": 9522 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012004310539929785, + "loss": 1.7617, + "step": 9523 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011990625990664295, + "loss": 1.6699, + "step": 9524 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011976948748357485, + "loss": 1.5801, + "step": 9525 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011963278814144895, + "loss": 1.5762, + "step": 9526 + }, + { + "epoch": 0.85, + "learning_rate": 0.0001194961618916145, + "loss": 1.6504, + "step": 9527 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011935960874541462, + "loss": 1.584, + "step": 9528 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011922312871418728, + "loss": 1.4805, + "step": 9529 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011908672180926327, + "loss": 1.5723, + "step": 9530 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011895038804196778, + "loss": 1.6133, + "step": 9531 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011881412742361974, + "loss": 1.5449, + "step": 9532 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011867793996553255, + "loss": 1.4902, + "step": 9533 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011854182567901294, + "loss": 1.5391, + "step": 9534 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011840578457536178, + "loss": 1.6738, + "step": 9535 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011826981666587356, + "loss": 1.5254, + "step": 9536 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011813392196183747, + "loss": 1.5391, + "step": 9537 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011799810047453596, + "loss": 1.5156, + "step": 9538 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011786235221524555, + "loss": 1.5625, + "step": 9539 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011772667719523655, + "loss": 1.6445, + "step": 9540 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011759107542577362, + "loss": 1.4746, + "step": 9541 + }, + { + "epoch": 0.85, + "learning_rate": 0.000117455546918115, + "loss": 1.6387, + "step": 9542 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011732009168351276, + "loss": 1.5312, + "step": 9543 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011718470973321305, + "loss": 1.5801, + "step": 9544 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011704940107845618, + "loss": 1.5625, + "step": 9545 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011691416573047597, + "loss": 1.6133, + "step": 9546 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011677900370050032, + "loss": 1.6133, + "step": 9547 + }, + { + "epoch": 0.85, + "learning_rate": 0.0001166439149997508, + "loss": 1.5527, + "step": 9548 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011650889963944344, + "loss": 1.4824, + "step": 9549 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011637395763078784, + "loss": 1.5449, + "step": 9550 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011623908898498714, + "loss": 1.6094, + "step": 9551 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011610429371323938, + "loss": 1.5625, + "step": 9552 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011596957182673551, + "loss": 1.6289, + "step": 9553 + }, + { + "epoch": 0.85, + "learning_rate": 0.0001158349233366609, + "loss": 1.5234, + "step": 9554 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011570034825419474, + "loss": 1.5977, + "step": 9555 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011556584659050994, + "loss": 1.4805, + "step": 9556 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011543141835677362, + "loss": 1.4492, + "step": 9557 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011529706356414638, + "loss": 1.6445, + "step": 9558 + }, + { + "epoch": 0.85, + "learning_rate": 0.0001151627822237834, + "loss": 1.4902, + "step": 9559 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011502857434683334, + "loss": 1.627, + "step": 9560 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011489443994443849, + "loss": 1.5938, + "step": 9561 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011476037902773539, + "loss": 1.5684, + "step": 9562 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011462639160785438, + "loss": 1.625, + "step": 9563 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011449247769592008, + "loss": 1.666, + "step": 9564 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011435863730305041, + "loss": 1.6484, + "step": 9565 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011422487044035735, + "loss": 1.5996, + "step": 9566 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011409117711894679, + "loss": 1.6328, + "step": 9567 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011395755734991909, + "loss": 1.5996, + "step": 9568 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011382401114436757, + "loss": 1.6152, + "step": 9569 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011369053851337996, + "loss": 1.4453, + "step": 9570 + }, + { + "epoch": 0.85, + "learning_rate": 0.0001135571394680377, + "loss": 1.5859, + "step": 9571 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011342381401941648, + "loss": 1.5039, + "step": 9572 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011329056217858547, + "loss": 1.6406, + "step": 9573 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011315738395660791, + "loss": 1.5996, + "step": 9574 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011302427936454062, + "loss": 1.4922, + "step": 9575 + }, + { + "epoch": 0.85, + "learning_rate": 0.000112891248413435, + "loss": 1.6875, + "step": 9576 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011275829111433577, + "loss": 1.6055, + "step": 9577 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011262540747828165, + "loss": 1.625, + "step": 9578 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011249259751630502, + "loss": 1.498, + "step": 9579 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011235986123943286, + "loss": 1.5352, + "step": 9580 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011222719865868536, + "loss": 1.6465, + "step": 9581 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011209460978507668, + "loss": 1.4727, + "step": 9582 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011196209462961504, + "loss": 1.6113, + "step": 9583 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011182965320330263, + "loss": 1.7773, + "step": 9584 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011169728551713532, + "loss": 1.5254, + "step": 9585 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011156499158210288, + "loss": 1.5586, + "step": 9586 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011143277140918872, + "loss": 1.6328, + "step": 9587 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011130062500937089, + "loss": 1.6113, + "step": 9588 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011116855239362055, + "loss": 1.5391, + "step": 9589 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011103655357290298, + "loss": 1.5977, + "step": 9590 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011090462855817729, + "loss": 1.5684, + "step": 9591 + }, + { + "epoch": 0.85, + "learning_rate": 0.0001107727773603967, + "loss": 1.6504, + "step": 9592 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011064099999050803, + "loss": 1.6523, + "step": 9593 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011050929645945218, + "loss": 1.4668, + "step": 9594 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011037766677816364, + "loss": 1.5918, + "step": 9595 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011024611095757097, + "loss": 1.6133, + "step": 9596 + }, + { + "epoch": 0.85, + "learning_rate": 0.0001101146290085967, + "loss": 1.5684, + "step": 9597 + }, + { + "epoch": 0.85, + "learning_rate": 0.00010998322094215674, + "loss": 1.6211, + "step": 9598 + }, + { + "epoch": 0.85, + "learning_rate": 0.00010985188676916168, + "loss": 1.5586, + "step": 9599 + }, + { + "epoch": 0.85, + "learning_rate": 0.00010972062650051518, + "loss": 1.6445, + "step": 9600 + }, + { + "epoch": 0.85, + "learning_rate": 0.00010958944014711537, + "loss": 1.5391, + "step": 9601 + }, + { + "epoch": 0.85, + "learning_rate": 0.00010945832771985365, + "loss": 1.5352, + "step": 9602 + }, + { + "epoch": 0.85, + "learning_rate": 0.00010932728922961566, + "loss": 1.5742, + "step": 9603 + }, + { + "epoch": 0.85, + "learning_rate": 0.00010919632468728113, + "loss": 1.5215, + "step": 9604 + }, + { + "epoch": 0.85, + "learning_rate": 0.00010906543410372316, + "loss": 1.4824, + "step": 9605 + }, + { + "epoch": 0.85, + "learning_rate": 0.00010893461748980882, + "loss": 1.6406, + "step": 9606 + }, + { + "epoch": 0.85, + "learning_rate": 0.00010880387485639919, + "loss": 1.5215, + "step": 9607 + }, + { + "epoch": 0.85, + "learning_rate": 0.00010867320621434929, + "loss": 1.6387, + "step": 9608 + }, + { + "epoch": 0.85, + "learning_rate": 0.00010854261157450773, + "loss": 1.5508, + "step": 9609 + }, + { + "epoch": 0.85, + "learning_rate": 0.0001084120909477172, + "loss": 1.6719, + "step": 9610 + }, + { + "epoch": 0.85, + "learning_rate": 0.0001082816443448138, + "loss": 1.5137, + "step": 9611 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010815127177662832, + "loss": 1.4883, + "step": 9612 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010802097325398474, + "loss": 1.4961, + "step": 9613 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010789074878770088, + "loss": 1.5859, + "step": 9614 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010776059838858865, + "loss": 1.6016, + "step": 9615 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010763052206745406, + "loss": 1.6543, + "step": 9616 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010750051983509646, + "loss": 1.6289, + "step": 9617 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010737059170230912, + "loss": 1.6074, + "step": 9618 + }, + { + "epoch": 0.86, + "learning_rate": 0.0001072407376798793, + "loss": 1.5254, + "step": 9619 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010711095777858848, + "loss": 1.5195, + "step": 9620 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010698125200921127, + "loss": 1.543, + "step": 9621 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010685162038251661, + "loss": 1.6094, + "step": 9622 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010672206290926678, + "loss": 1.6191, + "step": 9623 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010659257960021884, + "loss": 1.5898, + "step": 9624 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010646317046612274, + "loss": 1.5234, + "step": 9625 + }, + { + "epoch": 0.86, + "learning_rate": 0.0001063338355177228, + "loss": 1.6152, + "step": 9626 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010620457476575662, + "loss": 1.4512, + "step": 9627 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010607538822095674, + "loss": 1.5977, + "step": 9628 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010594627589404838, + "loss": 1.5918, + "step": 9629 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010581723779575125, + "loss": 1.6602, + "step": 9630 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010568827393677838, + "loss": 1.5781, + "step": 9631 + }, + { + "epoch": 0.86, + "learning_rate": 0.0001055593843278374, + "loss": 1.5195, + "step": 9632 + }, + { + "epoch": 0.86, + "learning_rate": 0.0001054305689796291, + "loss": 1.75, + "step": 9633 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010530182790284848, + "loss": 1.5547, + "step": 9634 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010517316110818409, + "loss": 1.6582, + "step": 9635 + }, + { + "epoch": 0.86, + "learning_rate": 0.0001050445686063185, + "loss": 1.6582, + "step": 9636 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010491605040792806, + "loss": 1.6738, + "step": 9637 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010478760652368301, + "loss": 1.5137, + "step": 9638 + }, + { + "epoch": 0.86, + "learning_rate": 0.0001046592369642475, + "loss": 1.543, + "step": 9639 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010453094174027921, + "loss": 1.6992, + "step": 9640 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010440272086242986, + "loss": 1.5508, + "step": 9641 + }, + { + "epoch": 0.86, + "learning_rate": 0.0001042745743413448, + "loss": 1.6602, + "step": 9642 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010414650218766376, + "loss": 1.5371, + "step": 9643 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010401850441201954, + "loss": 1.6016, + "step": 9644 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010389058102503934, + "loss": 1.543, + "step": 9645 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010376273203734376, + "loss": 1.5293, + "step": 9646 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010363495745954766, + "loss": 1.6113, + "step": 9647 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010350725730225941, + "loss": 1.5645, + "step": 9648 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010337963157608132, + "loss": 1.4473, + "step": 9649 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010325208029160926, + "loss": 1.6562, + "step": 9650 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010312460345943353, + "loss": 1.6133, + "step": 9651 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010299720109013765, + "loss": 1.627, + "step": 9652 + }, + { + "epoch": 0.86, + "learning_rate": 0.0001028698731942992, + "loss": 1.5215, + "step": 9653 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010274261978248933, + "loss": 1.6602, + "step": 9654 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010261544086527363, + "loss": 1.5566, + "step": 9655 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010248833645321087, + "loss": 1.6543, + "step": 9656 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010236130655685404, + "loss": 1.6387, + "step": 9657 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010223435118674929, + "loss": 1.5977, + "step": 9658 + }, + { + "epoch": 0.86, + "learning_rate": 0.0001021074703534377, + "loss": 1.4707, + "step": 9659 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010198066406745322, + "loss": 1.6074, + "step": 9660 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010185393233932383, + "loss": 1.5332, + "step": 9661 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010172727517957136, + "loss": 1.5488, + "step": 9662 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010160069259871185, + "loss": 1.6445, + "step": 9663 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010147418460725454, + "loss": 1.5449, + "step": 9664 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010134775121570273, + "loss": 1.6465, + "step": 9665 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010122139243455341, + "loss": 1.6641, + "step": 9666 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010109510827429779, + "loss": 1.5605, + "step": 9667 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010096889874542048, + "loss": 1.5508, + "step": 9668 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010084276385839985, + "loss": 1.6738, + "step": 9669 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010071670362370821, + "loss": 1.6289, + "step": 9670 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010059071805181186, + "loss": 1.5898, + "step": 9671 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010046480715317075, + "loss": 1.5586, + "step": 9672 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010033897093823851, + "loss": 1.5098, + "step": 9673 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010021320941746259, + "loss": 1.5645, + "step": 9674 + }, + { + "epoch": 0.86, + "learning_rate": 0.0001000875226012844, + "loss": 1.6289, + "step": 9675 + }, + { + "epoch": 0.86, + "learning_rate": 9.996191050013903e-05, + "loss": 1.4785, + "step": 9676 + }, + { + "epoch": 0.86, + "learning_rate": 9.983637312445525e-05, + "loss": 1.4883, + "step": 9677 + }, + { + "epoch": 0.86, + "learning_rate": 9.971091048465609e-05, + "loss": 1.627, + "step": 9678 + }, + { + "epoch": 0.86, + "learning_rate": 9.958552259115795e-05, + "loss": 1.627, + "step": 9679 + }, + { + "epoch": 0.86, + "learning_rate": 9.946020945437107e-05, + "loss": 1.5254, + "step": 9680 + }, + { + "epoch": 0.86, + "learning_rate": 9.933497108469946e-05, + "loss": 1.5664, + "step": 9681 + }, + { + "epoch": 0.86, + "learning_rate": 9.920980749254105e-05, + "loss": 1.5664, + "step": 9682 + }, + { + "epoch": 0.86, + "learning_rate": 9.908471868828761e-05, + "loss": 1.6426, + "step": 9683 + }, + { + "epoch": 0.86, + "learning_rate": 9.895970468232474e-05, + "loss": 1.6738, + "step": 9684 + }, + { + "epoch": 0.86, + "learning_rate": 9.883476548503134e-05, + "loss": 1.5371, + "step": 9685 + }, + { + "epoch": 0.86, + "learning_rate": 9.870990110678058e-05, + "loss": 1.5977, + "step": 9686 + }, + { + "epoch": 0.86, + "learning_rate": 9.85851115579396e-05, + "loss": 1.5723, + "step": 9687 + }, + { + "epoch": 0.86, + "learning_rate": 9.846039684886864e-05, + "loss": 1.6387, + "step": 9688 + }, + { + "epoch": 0.86, + "learning_rate": 9.833575698992236e-05, + "loss": 1.5332, + "step": 9689 + }, + { + "epoch": 0.86, + "learning_rate": 9.821119199144868e-05, + "loss": 1.5488, + "step": 9690 + }, + { + "epoch": 0.86, + "learning_rate": 9.808670186378998e-05, + "loss": 1.5371, + "step": 9691 + }, + { + "epoch": 0.86, + "learning_rate": 9.796228661728169e-05, + "loss": 1.5273, + "step": 9692 + }, + { + "epoch": 0.86, + "learning_rate": 9.783794626225362e-05, + "loss": 1.627, + "step": 9693 + }, + { + "epoch": 0.86, + "learning_rate": 9.771368080902865e-05, + "loss": 1.5449, + "step": 9694 + }, + { + "epoch": 0.86, + "learning_rate": 9.75894902679244e-05, + "loss": 1.5195, + "step": 9695 + }, + { + "epoch": 0.86, + "learning_rate": 9.746537464925154e-05, + "loss": 1.4453, + "step": 9696 + }, + { + "epoch": 0.86, + "learning_rate": 9.73413339633148e-05, + "loss": 1.6289, + "step": 9697 + }, + { + "epoch": 0.86, + "learning_rate": 9.721736822041239e-05, + "loss": 1.5352, + "step": 9698 + }, + { + "epoch": 0.86, + "learning_rate": 9.709347743083696e-05, + "loss": 1.6074, + "step": 9699 + }, + { + "epoch": 0.86, + "learning_rate": 9.69696616048743e-05, + "loss": 1.5527, + "step": 9700 + }, + { + "epoch": 0.86, + "learning_rate": 9.684592075280408e-05, + "loss": 1.5703, + "step": 9701 + }, + { + "epoch": 0.86, + "learning_rate": 9.672225488489983e-05, + "loss": 1.7207, + "step": 9702 + }, + { + "epoch": 0.86, + "learning_rate": 9.659866401142925e-05, + "loss": 1.4941, + "step": 9703 + }, + { + "epoch": 0.86, + "learning_rate": 9.647514814265323e-05, + "loss": 1.5547, + "step": 9704 + }, + { + "epoch": 0.86, + "learning_rate": 9.635170728882648e-05, + "loss": 1.5801, + "step": 9705 + }, + { + "epoch": 0.86, + "learning_rate": 9.622834146019776e-05, + "loss": 1.707, + "step": 9706 + }, + { + "epoch": 0.86, + "learning_rate": 9.610505066700958e-05, + "loss": 1.5703, + "step": 9707 + }, + { + "epoch": 0.86, + "learning_rate": 9.598183491949819e-05, + "loss": 1.498, + "step": 9708 + }, + { + "epoch": 0.86, + "learning_rate": 9.585869422789329e-05, + "loss": 1.5156, + "step": 9709 + }, + { + "epoch": 0.86, + "learning_rate": 9.573562860241868e-05, + "loss": 1.666, + "step": 9710 + }, + { + "epoch": 0.86, + "learning_rate": 9.561263805329212e-05, + "loss": 1.5117, + "step": 9711 + }, + { + "epoch": 0.86, + "learning_rate": 9.548972259072463e-05, + "loss": 1.5547, + "step": 9712 + }, + { + "epoch": 0.86, + "learning_rate": 9.536688222492129e-05, + "loss": 1.4785, + "step": 9713 + }, + { + "epoch": 0.86, + "learning_rate": 9.524411696608093e-05, + "loss": 1.6582, + "step": 9714 + }, + { + "epoch": 0.86, + "learning_rate": 9.512142682439606e-05, + "loss": 1.541, + "step": 9715 + }, + { + "epoch": 0.86, + "learning_rate": 9.499881181005287e-05, + "loss": 1.7051, + "step": 9716 + }, + { + "epoch": 0.86, + "learning_rate": 9.487627193323167e-05, + "loss": 1.6211, + "step": 9717 + }, + { + "epoch": 0.86, + "learning_rate": 9.475380720410631e-05, + "loss": 1.498, + "step": 9718 + }, + { + "epoch": 0.86, + "learning_rate": 9.463141763284422e-05, + "loss": 1.541, + "step": 9719 + }, + { + "epoch": 0.86, + "learning_rate": 9.450910322960694e-05, + "loss": 1.6172, + "step": 9720 + }, + { + "epoch": 0.86, + "learning_rate": 9.438686400454932e-05, + "loss": 1.5879, + "step": 9721 + }, + { + "epoch": 0.86, + "learning_rate": 9.426469996782061e-05, + "loss": 1.6777, + "step": 9722 + }, + { + "epoch": 0.86, + "learning_rate": 9.414261112956334e-05, + "loss": 1.5762, + "step": 9723 + }, + { + "epoch": 0.87, + "learning_rate": 9.402059749991376e-05, + "loss": 1.5781, + "step": 9724 + }, + { + "epoch": 0.87, + "learning_rate": 9.389865908900196e-05, + "loss": 1.5938, + "step": 9725 + }, + { + "epoch": 0.87, + "learning_rate": 9.37767959069522e-05, + "loss": 1.5254, + "step": 9726 + }, + { + "epoch": 0.87, + "learning_rate": 9.365500796388194e-05, + "loss": 1.5488, + "step": 9727 + }, + { + "epoch": 0.87, + "learning_rate": 9.353329526990262e-05, + "loss": 1.6641, + "step": 9728 + }, + { + "epoch": 0.87, + "learning_rate": 9.341165783511917e-05, + "loss": 1.6582, + "step": 9729 + }, + { + "epoch": 0.87, + "learning_rate": 9.329009566963099e-05, + "loss": 1.5, + "step": 9730 + }, + { + "epoch": 0.87, + "learning_rate": 9.31686087835304e-05, + "loss": 1.5059, + "step": 9731 + }, + { + "epoch": 0.87, + "learning_rate": 9.304719718690402e-05, + "loss": 1.6289, + "step": 9732 + }, + { + "epoch": 0.87, + "learning_rate": 9.292586088983168e-05, + "loss": 1.6211, + "step": 9733 + }, + { + "epoch": 0.87, + "learning_rate": 9.280459990238765e-05, + "loss": 1.498, + "step": 9734 + }, + { + "epoch": 0.87, + "learning_rate": 9.268341423463956e-05, + "loss": 1.6504, + "step": 9735 + }, + { + "epoch": 0.87, + "learning_rate": 9.256230389664877e-05, + "loss": 1.6406, + "step": 9736 + }, + { + "epoch": 0.87, + "learning_rate": 9.244126889847016e-05, + "loss": 1.5176, + "step": 9737 + }, + { + "epoch": 0.87, + "learning_rate": 9.232030925015311e-05, + "loss": 1.5449, + "step": 9738 + }, + { + "epoch": 0.87, + "learning_rate": 9.219942496174005e-05, + "loss": 1.5957, + "step": 9739 + }, + { + "epoch": 0.87, + "learning_rate": 9.207861604326739e-05, + "loss": 1.6172, + "step": 9740 + }, + { + "epoch": 0.87, + "learning_rate": 9.195788250476511e-05, + "loss": 1.6152, + "step": 9741 + }, + { + "epoch": 0.87, + "learning_rate": 9.18372243562573e-05, + "loss": 1.6191, + "step": 9742 + }, + { + "epoch": 0.87, + "learning_rate": 9.17166416077615e-05, + "loss": 1.5957, + "step": 9743 + }, + { + "epoch": 0.87, + "learning_rate": 9.159613426928903e-05, + "loss": 1.5137, + "step": 9744 + }, + { + "epoch": 0.87, + "learning_rate": 9.14757023508449e-05, + "loss": 1.5879, + "step": 9745 + }, + { + "epoch": 0.87, + "learning_rate": 9.135534586242822e-05, + "loss": 1.5312, + "step": 9746 + }, + { + "epoch": 0.87, + "learning_rate": 9.123506481403144e-05, + "loss": 1.625, + "step": 9747 + }, + { + "epoch": 0.87, + "learning_rate": 9.111485921564078e-05, + "loss": 1.5664, + "step": 9748 + }, + { + "epoch": 0.87, + "learning_rate": 9.099472907723605e-05, + "loss": 1.6992, + "step": 9749 + }, + { + "epoch": 0.87, + "learning_rate": 9.08746744087916e-05, + "loss": 1.5977, + "step": 9750 + }, + { + "epoch": 0.87, + "learning_rate": 9.075469522027469e-05, + "loss": 1.5078, + "step": 9751 + }, + { + "epoch": 0.87, + "learning_rate": 9.063479152164644e-05, + "loss": 1.5273, + "step": 9752 + }, + { + "epoch": 0.87, + "learning_rate": 9.05149633228619e-05, + "loss": 1.5098, + "step": 9753 + }, + { + "epoch": 0.87, + "learning_rate": 9.03952106338698e-05, + "loss": 1.6562, + "step": 9754 + }, + { + "epoch": 0.87, + "learning_rate": 9.027553346461248e-05, + "loss": 1.5801, + "step": 9755 + }, + { + "epoch": 0.87, + "learning_rate": 9.015593182502623e-05, + "loss": 1.459, + "step": 9756 + }, + { + "epoch": 0.87, + "learning_rate": 9.003640572504101e-05, + "loss": 1.6523, + "step": 9757 + }, + { + "epoch": 0.87, + "learning_rate": 8.991695517458031e-05, + "loss": 1.4629, + "step": 9758 + }, + { + "epoch": 0.87, + "learning_rate": 8.979758018356166e-05, + "loss": 1.6797, + "step": 9759 + }, + { + "epoch": 0.87, + "learning_rate": 8.967828076189589e-05, + "loss": 1.6367, + "step": 9760 + }, + { + "epoch": 0.87, + "learning_rate": 8.955905691948773e-05, + "loss": 1.5, + "step": 9761 + }, + { + "epoch": 0.87, + "learning_rate": 8.943990866623619e-05, + "loss": 1.5156, + "step": 9762 + }, + { + "epoch": 0.87, + "learning_rate": 8.932083601203322e-05, + "loss": 1.4668, + "step": 9763 + }, + { + "epoch": 0.87, + "learning_rate": 8.920183896676481e-05, + "loss": 1.5742, + "step": 9764 + }, + { + "epoch": 0.87, + "learning_rate": 8.90829175403105e-05, + "loss": 1.6387, + "step": 9765 + }, + { + "epoch": 0.87, + "learning_rate": 8.896407174254417e-05, + "loss": 1.6367, + "step": 9766 + }, + { + "epoch": 0.87, + "learning_rate": 8.884530158333259e-05, + "loss": 1.5977, + "step": 9767 + }, + { + "epoch": 0.87, + "learning_rate": 8.872660707253678e-05, + "loss": 1.6914, + "step": 9768 + }, + { + "epoch": 0.87, + "learning_rate": 8.860798822001115e-05, + "loss": 1.5391, + "step": 9769 + }, + { + "epoch": 0.87, + "learning_rate": 8.84894450356043e-05, + "loss": 1.5801, + "step": 9770 + }, + { + "epoch": 0.87, + "learning_rate": 8.837097752915801e-05, + "loss": 1.6035, + "step": 9771 + }, + { + "epoch": 0.87, + "learning_rate": 8.82525857105081e-05, + "loss": 1.5508, + "step": 9772 + }, + { + "epoch": 0.87, + "learning_rate": 8.81342695894839e-05, + "loss": 1.5371, + "step": 9773 + }, + { + "epoch": 0.87, + "learning_rate": 8.801602917590878e-05, + "loss": 1.4102, + "step": 9774 + }, + { + "epoch": 0.87, + "learning_rate": 8.789786447959968e-05, + "loss": 1.4492, + "step": 9775 + }, + { + "epoch": 0.87, + "learning_rate": 8.777977551036697e-05, + "loss": 1.5469, + "step": 9776 + }, + { + "epoch": 0.87, + "learning_rate": 8.766176227801481e-05, + "loss": 1.5293, + "step": 9777 + }, + { + "epoch": 0.87, + "learning_rate": 8.754382479234179e-05, + "loss": 1.668, + "step": 9778 + }, + { + "epoch": 0.87, + "learning_rate": 8.742596306313922e-05, + "loss": 1.5781, + "step": 9779 + }, + { + "epoch": 0.87, + "learning_rate": 8.730817710019257e-05, + "loss": 1.5762, + "step": 9780 + }, + { + "epoch": 0.87, + "learning_rate": 8.719046691328103e-05, + "loss": 1.6445, + "step": 9781 + }, + { + "epoch": 0.87, + "learning_rate": 8.707283251217757e-05, + "loss": 1.6328, + "step": 9782 + }, + { + "epoch": 0.87, + "learning_rate": 8.69552739066487e-05, + "loss": 1.6133, + "step": 9783 + }, + { + "epoch": 0.87, + "learning_rate": 8.683779110645473e-05, + "loss": 1.4961, + "step": 9784 + }, + { + "epoch": 0.87, + "learning_rate": 8.672038412134942e-05, + "loss": 1.5, + "step": 9785 + }, + { + "epoch": 0.87, + "learning_rate": 8.660305296108074e-05, + "loss": 1.5605, + "step": 9786 + }, + { + "epoch": 0.87, + "learning_rate": 8.648579763539e-05, + "loss": 1.4961, + "step": 9787 + }, + { + "epoch": 0.87, + "learning_rate": 8.636861815401231e-05, + "loss": 1.6309, + "step": 9788 + }, + { + "epoch": 0.87, + "learning_rate": 8.625151452667623e-05, + "loss": 1.625, + "step": 9789 + }, + { + "epoch": 0.87, + "learning_rate": 8.613448676310465e-05, + "loss": 1.6133, + "step": 9790 + }, + { + "epoch": 0.87, + "learning_rate": 8.601753487301356e-05, + "loss": 1.6641, + "step": 9791 + }, + { + "epoch": 0.87, + "learning_rate": 8.590065886611287e-05, + "loss": 1.541, + "step": 9792 + }, + { + "epoch": 0.87, + "learning_rate": 8.578385875210627e-05, + "loss": 1.7168, + "step": 9793 + }, + { + "epoch": 0.87, + "learning_rate": 8.566713454069097e-05, + "loss": 1.5918, + "step": 9794 + }, + { + "epoch": 0.87, + "learning_rate": 8.555048624155793e-05, + "loss": 1.6211, + "step": 9795 + }, + { + "epoch": 0.87, + "learning_rate": 8.543391386439192e-05, + "loss": 1.582, + "step": 9796 + }, + { + "epoch": 0.87, + "learning_rate": 8.531741741887145e-05, + "loss": 1.5195, + "step": 9797 + }, + { + "epoch": 0.87, + "learning_rate": 8.520099691466854e-05, + "loss": 1.4902, + "step": 9798 + }, + { + "epoch": 0.87, + "learning_rate": 8.50846523614488e-05, + "loss": 1.4668, + "step": 9799 + }, + { + "epoch": 0.87, + "learning_rate": 8.496838376887195e-05, + "loss": 1.6406, + "step": 9800 + }, + { + "epoch": 0.87, + "learning_rate": 8.485219114659082e-05, + "loss": 1.6152, + "step": 9801 + }, + { + "epoch": 0.87, + "learning_rate": 8.473607450425281e-05, + "loss": 1.5977, + "step": 9802 + }, + { + "epoch": 0.87, + "learning_rate": 8.462003385149807e-05, + "loss": 1.7012, + "step": 9803 + }, + { + "epoch": 0.87, + "learning_rate": 8.450406919796106e-05, + "loss": 1.5273, + "step": 9804 + }, + { + "epoch": 0.87, + "learning_rate": 8.438818055326935e-05, + "loss": 1.5449, + "step": 9805 + }, + { + "epoch": 0.87, + "learning_rate": 8.427236792704507e-05, + "loss": 1.584, + "step": 9806 + }, + { + "epoch": 0.87, + "learning_rate": 8.415663132890327e-05, + "loss": 1.4199, + "step": 9807 + }, + { + "epoch": 0.87, + "learning_rate": 8.404097076845285e-05, + "loss": 1.6328, + "step": 9808 + }, + { + "epoch": 0.87, + "learning_rate": 8.392538625529689e-05, + "loss": 1.5254, + "step": 9809 + }, + { + "epoch": 0.87, + "learning_rate": 8.380987779903138e-05, + "loss": 1.5293, + "step": 9810 + }, + { + "epoch": 0.87, + "learning_rate": 8.369444540924665e-05, + "loss": 1.4473, + "step": 9811 + }, + { + "epoch": 0.87, + "learning_rate": 8.357908909552614e-05, + "loss": 1.5957, + "step": 9812 + }, + { + "epoch": 0.87, + "learning_rate": 8.346380886744764e-05, + "loss": 1.6328, + "step": 9813 + }, + { + "epoch": 0.87, + "learning_rate": 8.334860473458206e-05, + "loss": 1.6309, + "step": 9814 + }, + { + "epoch": 0.87, + "learning_rate": 8.323347670649429e-05, + "loss": 1.6504, + "step": 9815 + }, + { + "epoch": 0.87, + "learning_rate": 8.311842479274246e-05, + "loss": 1.5117, + "step": 9816 + }, + { + "epoch": 0.87, + "learning_rate": 8.300344900287937e-05, + "loss": 1.6504, + "step": 9817 + }, + { + "epoch": 0.87, + "learning_rate": 8.28885493464504e-05, + "loss": 1.6406, + "step": 9818 + }, + { + "epoch": 0.87, + "learning_rate": 8.277372583299514e-05, + "loss": 1.625, + "step": 9819 + }, + { + "epoch": 0.87, + "learning_rate": 8.265897847204673e-05, + "loss": 1.5586, + "step": 9820 + }, + { + "epoch": 0.87, + "learning_rate": 8.254430727313234e-05, + "loss": 1.5996, + "step": 9821 + }, + { + "epoch": 0.87, + "learning_rate": 8.242971224577234e-05, + "loss": 1.6152, + "step": 9822 + }, + { + "epoch": 0.87, + "learning_rate": 8.23151933994808e-05, + "loss": 1.5547, + "step": 9823 + }, + { + "epoch": 0.87, + "learning_rate": 8.220075074376565e-05, + "loss": 1.5117, + "step": 9824 + }, + { + "epoch": 0.87, + "learning_rate": 8.208638428812876e-05, + "loss": 1.6719, + "step": 9825 + }, + { + "epoch": 0.87, + "learning_rate": 8.19720940420653e-05, + "loss": 1.4785, + "step": 9826 + }, + { + "epoch": 0.87, + "learning_rate": 8.1857880015064e-05, + "loss": 1.5625, + "step": 9827 + }, + { + "epoch": 0.87, + "learning_rate": 8.17437422166073e-05, + "loss": 1.5254, + "step": 9828 + }, + { + "epoch": 0.87, + "learning_rate": 8.162968065617193e-05, + "loss": 1.5293, + "step": 9829 + }, + { + "epoch": 0.87, + "learning_rate": 8.151569534322767e-05, + "loss": 1.6758, + "step": 9830 + }, + { + "epoch": 0.87, + "learning_rate": 8.140178628723793e-05, + "loss": 1.6152, + "step": 9831 + }, + { + "epoch": 0.87, + "learning_rate": 8.128795349766016e-05, + "loss": 1.5605, + "step": 9832 + }, + { + "epoch": 0.87, + "learning_rate": 8.117419698394534e-05, + "loss": 1.5449, + "step": 9833 + }, + { + "epoch": 0.87, + "learning_rate": 8.106051675553772e-05, + "loss": 1.5352, + "step": 9834 + }, + { + "epoch": 0.87, + "learning_rate": 8.094691282187605e-05, + "loss": 1.5234, + "step": 9835 + }, + { + "epoch": 0.88, + "learning_rate": 8.083338519239213e-05, + "loss": 1.5586, + "step": 9836 + }, + { + "epoch": 0.88, + "learning_rate": 8.071993387651144e-05, + "loss": 1.6445, + "step": 9837 + }, + { + "epoch": 0.88, + "learning_rate": 8.060655888365331e-05, + "loss": 1.5938, + "step": 9838 + }, + { + "epoch": 0.88, + "learning_rate": 8.049326022323066e-05, + "loss": 1.623, + "step": 9839 + }, + { + "epoch": 0.88, + "learning_rate": 8.038003790464999e-05, + "loss": 1.498, + "step": 9840 + }, + { + "epoch": 0.88, + "learning_rate": 8.026689193731184e-05, + "loss": 1.7012, + "step": 9841 + }, + { + "epoch": 0.88, + "learning_rate": 8.015382233060997e-05, + "loss": 1.6582, + "step": 9842 + }, + { + "epoch": 0.88, + "learning_rate": 8.004082909393195e-05, + "loss": 1.5879, + "step": 9843 + }, + { + "epoch": 0.88, + "learning_rate": 7.992791223665885e-05, + "loss": 1.5977, + "step": 9844 + }, + { + "epoch": 0.88, + "learning_rate": 7.981507176816583e-05, + "loss": 1.5352, + "step": 9845 + }, + { + "epoch": 0.88, + "learning_rate": 7.970230769782138e-05, + "loss": 1.5605, + "step": 9846 + }, + { + "epoch": 0.88, + "learning_rate": 7.958962003498771e-05, + "loss": 1.6348, + "step": 9847 + }, + { + "epoch": 0.88, + "learning_rate": 7.947700878902053e-05, + "loss": 1.4941, + "step": 9848 + }, + { + "epoch": 0.88, + "learning_rate": 7.93644739692696e-05, + "loss": 1.5332, + "step": 9849 + }, + { + "epoch": 0.88, + "learning_rate": 7.9252015585078e-05, + "loss": 1.584, + "step": 9850 + }, + { + "epoch": 0.88, + "learning_rate": 7.913963364578259e-05, + "loss": 1.5312, + "step": 9851 + }, + { + "epoch": 0.88, + "learning_rate": 7.902732816071368e-05, + "loss": 1.6523, + "step": 9852 + }, + { + "epoch": 0.88, + "learning_rate": 7.89150991391957e-05, + "loss": 1.4941, + "step": 9853 + }, + { + "epoch": 0.88, + "learning_rate": 7.88029465905462e-05, + "loss": 1.5664, + "step": 9854 + }, + { + "epoch": 0.88, + "learning_rate": 7.869087052407675e-05, + "loss": 1.5488, + "step": 9855 + }, + { + "epoch": 0.88, + "learning_rate": 7.857887094909233e-05, + "loss": 1.582, + "step": 9856 + }, + { + "epoch": 0.88, + "learning_rate": 7.846694787489183e-05, + "loss": 1.4473, + "step": 9857 + }, + { + "epoch": 0.88, + "learning_rate": 7.83551013107675e-05, + "loss": 1.6035, + "step": 9858 + }, + { + "epoch": 0.88, + "learning_rate": 7.824333126600547e-05, + "loss": 1.5996, + "step": 9859 + }, + { + "epoch": 0.88, + "learning_rate": 7.813163774988529e-05, + "loss": 1.627, + "step": 9860 + }, + { + "epoch": 0.88, + "learning_rate": 7.802002077168047e-05, + "loss": 1.5391, + "step": 9861 + }, + { + "epoch": 0.88, + "learning_rate": 7.790848034065801e-05, + "loss": 1.4551, + "step": 9862 + }, + { + "epoch": 0.88, + "learning_rate": 7.779701646607828e-05, + "loss": 1.6836, + "step": 9863 + }, + { + "epoch": 0.88, + "learning_rate": 7.768562915719557e-05, + "loss": 1.5566, + "step": 9864 + }, + { + "epoch": 0.88, + "learning_rate": 7.757431842325812e-05, + "loss": 1.6797, + "step": 9865 + }, + { + "epoch": 0.88, + "learning_rate": 7.746308427350724e-05, + "loss": 1.5859, + "step": 9866 + }, + { + "epoch": 0.88, + "learning_rate": 7.735192671717805e-05, + "loss": 1.541, + "step": 9867 + }, + { + "epoch": 0.88, + "learning_rate": 7.724084576349944e-05, + "loss": 1.4766, + "step": 9868 + }, + { + "epoch": 0.88, + "learning_rate": 7.7129841421694e-05, + "loss": 1.4863, + "step": 9869 + }, + { + "epoch": 0.88, + "learning_rate": 7.701891370097769e-05, + "loss": 1.5234, + "step": 9870 + }, + { + "epoch": 0.88, + "learning_rate": 7.690806261056038e-05, + "loss": 1.5137, + "step": 9871 + }, + { + "epoch": 0.88, + "learning_rate": 7.679728815964548e-05, + "loss": 1.6055, + "step": 9872 + }, + { + "epoch": 0.88, + "learning_rate": 7.668659035742987e-05, + "loss": 1.5391, + "step": 9873 + }, + { + "epoch": 0.88, + "learning_rate": 7.657596921310405e-05, + "loss": 1.6836, + "step": 9874 + }, + { + "epoch": 0.88, + "learning_rate": 7.646542473585272e-05, + "loss": 1.6094, + "step": 9875 + }, + { + "epoch": 0.88, + "learning_rate": 7.635495693485373e-05, + "loss": 1.6074, + "step": 9876 + }, + { + "epoch": 0.88, + "learning_rate": 7.62445658192784e-05, + "loss": 1.5254, + "step": 9877 + }, + { + "epoch": 0.88, + "learning_rate": 7.61342513982921e-05, + "loss": 1.5566, + "step": 9878 + }, + { + "epoch": 0.88, + "learning_rate": 7.602401368105372e-05, + "loss": 1.4434, + "step": 9879 + }, + { + "epoch": 0.88, + "learning_rate": 7.591385267671547e-05, + "loss": 1.5332, + "step": 9880 + }, + { + "epoch": 0.88, + "learning_rate": 7.580376839442371e-05, + "loss": 1.6523, + "step": 9881 + }, + { + "epoch": 0.88, + "learning_rate": 7.569376084331803e-05, + "loss": 1.5645, + "step": 9882 + }, + { + "epoch": 0.88, + "learning_rate": 7.558383003253188e-05, + "loss": 1.6348, + "step": 9883 + }, + { + "epoch": 0.88, + "learning_rate": 7.547397597119199e-05, + "loss": 1.5996, + "step": 9884 + }, + { + "epoch": 0.88, + "learning_rate": 7.536419866841926e-05, + "loss": 1.4766, + "step": 9885 + }, + { + "epoch": 0.88, + "learning_rate": 7.525449813332797e-05, + "loss": 1.6426, + "step": 9886 + }, + { + "epoch": 0.88, + "learning_rate": 7.514487437502571e-05, + "loss": 1.4922, + "step": 9887 + }, + { + "epoch": 0.88, + "learning_rate": 7.503532740261388e-05, + "loss": 1.5547, + "step": 9888 + }, + { + "epoch": 0.88, + "learning_rate": 7.492585722518808e-05, + "loss": 1.5293, + "step": 9889 + }, + { + "epoch": 0.88, + "learning_rate": 7.481646385183671e-05, + "loss": 1.6406, + "step": 9890 + }, + { + "epoch": 0.88, + "learning_rate": 7.470714729164207e-05, + "loss": 1.457, + "step": 9891 + }, + { + "epoch": 0.88, + "learning_rate": 7.459790755368035e-05, + "loss": 1.5957, + "step": 9892 + }, + { + "epoch": 0.88, + "learning_rate": 7.448874464702115e-05, + "loss": 1.457, + "step": 9893 + }, + { + "epoch": 0.88, + "learning_rate": 7.437965858072748e-05, + "loss": 1.5117, + "step": 9894 + }, + { + "epoch": 0.88, + "learning_rate": 7.42706493638563e-05, + "loss": 1.5371, + "step": 9895 + }, + { + "epoch": 0.88, + "learning_rate": 7.416171700545815e-05, + "loss": 1.6172, + "step": 9896 + }, + { + "epoch": 0.88, + "learning_rate": 7.405286151457713e-05, + "loss": 1.6582, + "step": 9897 + }, + { + "epoch": 0.88, + "learning_rate": 7.39440829002509e-05, + "loss": 1.5215, + "step": 9898 + }, + { + "epoch": 0.88, + "learning_rate": 7.383538117151056e-05, + "loss": 1.4805, + "step": 9899 + }, + { + "epoch": 0.88, + "learning_rate": 7.372675633738135e-05, + "loss": 1.7109, + "step": 9900 + }, + { + "epoch": 0.88, + "learning_rate": 7.361820840688172e-05, + "loss": 1.543, + "step": 9901 + }, + { + "epoch": 0.88, + "learning_rate": 7.35097373890239e-05, + "loss": 1.5645, + "step": 9902 + }, + { + "epoch": 0.88, + "learning_rate": 7.340134329281344e-05, + "loss": 1.5625, + "step": 9903 + }, + { + "epoch": 0.88, + "learning_rate": 7.329302612724997e-05, + "loss": 1.5605, + "step": 9904 + }, + { + "epoch": 0.88, + "learning_rate": 7.318478590132649e-05, + "loss": 1.5215, + "step": 9905 + }, + { + "epoch": 0.88, + "learning_rate": 7.30766226240296e-05, + "loss": 1.5703, + "step": 9906 + }, + { + "epoch": 0.88, + "learning_rate": 7.296853630433919e-05, + "loss": 1.6211, + "step": 9907 + }, + { + "epoch": 0.88, + "learning_rate": 7.286052695122968e-05, + "loss": 1.5469, + "step": 9908 + }, + { + "epoch": 0.88, + "learning_rate": 7.275259457366822e-05, + "loss": 1.6035, + "step": 9909 + }, + { + "epoch": 0.88, + "learning_rate": 7.264473918061588e-05, + "loss": 1.7109, + "step": 9910 + }, + { + "epoch": 0.88, + "learning_rate": 7.253696078102734e-05, + "loss": 1.752, + "step": 9911 + }, + { + "epoch": 0.88, + "learning_rate": 7.242925938385093e-05, + "loss": 1.6484, + "step": 9912 + }, + { + "epoch": 0.88, + "learning_rate": 7.232163499802824e-05, + "loss": 1.4062, + "step": 9913 + }, + { + "epoch": 0.88, + "learning_rate": 7.221408763249526e-05, + "loss": 1.541, + "step": 9914 + }, + { + "epoch": 0.88, + "learning_rate": 7.210661729618073e-05, + "loss": 1.3906, + "step": 9915 + }, + { + "epoch": 0.88, + "learning_rate": 7.19992239980075e-05, + "loss": 1.5898, + "step": 9916 + }, + { + "epoch": 0.88, + "learning_rate": 7.189190774689192e-05, + "loss": 1.4902, + "step": 9917 + }, + { + "epoch": 0.88, + "learning_rate": 7.17846685517437e-05, + "loss": 1.5625, + "step": 9918 + }, + { + "epoch": 0.88, + "learning_rate": 7.167750642146642e-05, + "loss": 1.5859, + "step": 9919 + }, + { + "epoch": 0.88, + "learning_rate": 7.157042136495728e-05, + "loss": 1.4766, + "step": 9920 + }, + { + "epoch": 0.88, + "learning_rate": 7.146341339110695e-05, + "loss": 1.5098, + "step": 9921 + }, + { + "epoch": 0.88, + "learning_rate": 7.135648250879979e-05, + "loss": 1.625, + "step": 9922 + }, + { + "epoch": 0.88, + "learning_rate": 7.124962872691342e-05, + "loss": 1.625, + "step": 9923 + }, + { + "epoch": 0.88, + "learning_rate": 7.11428520543198e-05, + "loss": 1.5449, + "step": 9924 + }, + { + "epoch": 0.88, + "learning_rate": 7.103615249988382e-05, + "loss": 1.4277, + "step": 9925 + }, + { + "epoch": 0.88, + "learning_rate": 7.092953007246416e-05, + "loss": 1.5508, + "step": 9926 + }, + { + "epoch": 0.88, + "learning_rate": 7.082298478091309e-05, + "loss": 1.5059, + "step": 9927 + }, + { + "epoch": 0.88, + "learning_rate": 7.071651663407663e-05, + "loss": 1.5625, + "step": 9928 + }, + { + "epoch": 0.88, + "learning_rate": 7.061012564079428e-05, + "loss": 1.7188, + "step": 9929 + }, + { + "epoch": 0.88, + "learning_rate": 7.050381180989918e-05, + "loss": 1.5273, + "step": 9930 + }, + { + "epoch": 0.88, + "learning_rate": 7.039757515021761e-05, + "loss": 1.5977, + "step": 9931 + }, + { + "epoch": 0.88, + "learning_rate": 7.029141567057029e-05, + "loss": 1.5645, + "step": 9932 + }, + { + "epoch": 0.88, + "learning_rate": 7.018533337977095e-05, + "loss": 1.5879, + "step": 9933 + }, + { + "epoch": 0.88, + "learning_rate": 7.007932828662711e-05, + "loss": 1.6523, + "step": 9934 + }, + { + "epoch": 0.88, + "learning_rate": 6.997340039993948e-05, + "loss": 1.6895, + "step": 9935 + }, + { + "epoch": 0.88, + "learning_rate": 6.986754972850317e-05, + "loss": 1.5293, + "step": 9936 + }, + { + "epoch": 0.88, + "learning_rate": 6.976177628110625e-05, + "loss": 1.5254, + "step": 9937 + }, + { + "epoch": 0.88, + "learning_rate": 6.965608006653035e-05, + "loss": 1.457, + "step": 9938 + }, + { + "epoch": 0.88, + "learning_rate": 6.9550461093551e-05, + "loss": 1.5293, + "step": 9939 + }, + { + "epoch": 0.88, + "learning_rate": 6.944491937093733e-05, + "loss": 1.4805, + "step": 9940 + }, + { + "epoch": 0.88, + "learning_rate": 6.933945490745175e-05, + "loss": 1.6309, + "step": 9941 + }, + { + "epoch": 0.88, + "learning_rate": 6.923406771185048e-05, + "loss": 1.5508, + "step": 9942 + }, + { + "epoch": 0.88, + "learning_rate": 6.912875779288308e-05, + "loss": 1.5469, + "step": 9943 + }, + { + "epoch": 0.88, + "learning_rate": 6.902352515929322e-05, + "loss": 1.4902, + "step": 9944 + }, + { + "epoch": 0.88, + "learning_rate": 6.891836981981769e-05, + "loss": 1.5039, + "step": 9945 + }, + { + "epoch": 0.88, + "learning_rate": 6.881329178318696e-05, + "loss": 1.5254, + "step": 9946 + }, + { + "epoch": 0.88, + "learning_rate": 6.870829105812482e-05, + "loss": 1.6289, + "step": 9947 + }, + { + "epoch": 0.88, + "learning_rate": 6.860336765334952e-05, + "loss": 1.6055, + "step": 9948 + }, + { + "epoch": 0.89, + "learning_rate": 6.849852157757197e-05, + "loss": 1.5684, + "step": 9949 + }, + { + "epoch": 0.89, + "learning_rate": 6.839375283949689e-05, + "loss": 1.5254, + "step": 9950 + }, + { + "epoch": 0.89, + "learning_rate": 6.828906144782298e-05, + "loss": 1.4824, + "step": 9951 + }, + { + "epoch": 0.89, + "learning_rate": 6.818444741124196e-05, + "loss": 1.5879, + "step": 9952 + }, + { + "epoch": 0.89, + "learning_rate": 6.807991073843933e-05, + "loss": 1.6621, + "step": 9953 + }, + { + "epoch": 0.89, + "learning_rate": 6.797545143809447e-05, + "loss": 1.584, + "step": 9954 + }, + { + "epoch": 0.89, + "learning_rate": 6.787106951888011e-05, + "loss": 1.5957, + "step": 9955 + }, + { + "epoch": 0.89, + "learning_rate": 6.776676498946232e-05, + "loss": 1.541, + "step": 9956 + }, + { + "epoch": 0.89, + "learning_rate": 6.766253785850107e-05, + "loss": 1.5742, + "step": 9957 + }, + { + "epoch": 0.89, + "learning_rate": 6.755838813464987e-05, + "loss": 1.5801, + "step": 9958 + }, + { + "epoch": 0.89, + "learning_rate": 6.745431582655537e-05, + "loss": 1.5449, + "step": 9959 + }, + { + "epoch": 0.89, + "learning_rate": 6.735032094285864e-05, + "loss": 1.5605, + "step": 9960 + }, + { + "epoch": 0.89, + "learning_rate": 6.724640349219358e-05, + "loss": 1.6484, + "step": 9961 + }, + { + "epoch": 0.89, + "learning_rate": 6.714256348318793e-05, + "loss": 1.4473, + "step": 9962 + }, + { + "epoch": 0.89, + "learning_rate": 6.70388009244629e-05, + "loss": 1.6309, + "step": 9963 + }, + { + "epoch": 0.89, + "learning_rate": 6.693511582463352e-05, + "loss": 1.6309, + "step": 9964 + }, + { + "epoch": 0.89, + "learning_rate": 6.68315081923082e-05, + "loss": 1.4746, + "step": 9965 + }, + { + "epoch": 0.89, + "learning_rate": 6.672797803608887e-05, + "loss": 1.4648, + "step": 9966 + }, + { + "epoch": 0.89, + "learning_rate": 6.662452536457097e-05, + "loss": 1.6758, + "step": 9967 + }, + { + "epoch": 0.89, + "learning_rate": 6.652115018634397e-05, + "loss": 1.5605, + "step": 9968 + }, + { + "epoch": 0.89, + "learning_rate": 6.641785250999033e-05, + "loss": 1.6367, + "step": 9969 + }, + { + "epoch": 0.89, + "learning_rate": 6.631463234408641e-05, + "loss": 1.625, + "step": 9970 + }, + { + "epoch": 0.89, + "learning_rate": 6.621148969720192e-05, + "loss": 1.6016, + "step": 9971 + }, + { + "epoch": 0.89, + "learning_rate": 6.610842457790045e-05, + "loss": 1.584, + "step": 9972 + }, + { + "epoch": 0.89, + "learning_rate": 6.600543699473882e-05, + "loss": 1.6035, + "step": 9973 + }, + { + "epoch": 0.89, + "learning_rate": 6.590252695626742e-05, + "loss": 1.5586, + "step": 9974 + }, + { + "epoch": 0.89, + "learning_rate": 6.579969447103062e-05, + "loss": 1.6113, + "step": 9975 + }, + { + "epoch": 0.89, + "learning_rate": 6.569693954756594e-05, + "loss": 1.5215, + "step": 9976 + }, + { + "epoch": 0.89, + "learning_rate": 6.559426219440468e-05, + "loss": 1.4922, + "step": 9977 + }, + { + "epoch": 0.89, + "learning_rate": 6.549166242007132e-05, + "loss": 1.5645, + "step": 9978 + }, + { + "epoch": 0.89, + "learning_rate": 6.538914023308451e-05, + "loss": 1.6094, + "step": 9979 + }, + { + "epoch": 0.89, + "learning_rate": 6.528669564195589e-05, + "loss": 1.6875, + "step": 9980 + }, + { + "epoch": 0.89, + "learning_rate": 6.518432865519108e-05, + "loss": 1.6777, + "step": 9981 + }, + { + "epoch": 0.89, + "learning_rate": 6.508203928128886e-05, + "loss": 1.5586, + "step": 9982 + }, + { + "epoch": 0.89, + "learning_rate": 6.49798275287421e-05, + "loss": 1.543, + "step": 9983 + }, + { + "epoch": 0.89, + "learning_rate": 6.487769340603666e-05, + "loss": 1.5762, + "step": 9984 + }, + { + "epoch": 0.89, + "learning_rate": 6.477563692165233e-05, + "loss": 1.6016, + "step": 9985 + }, + { + "epoch": 0.89, + "learning_rate": 6.4673658084062e-05, + "loss": 1.6309, + "step": 9986 + }, + { + "epoch": 0.89, + "learning_rate": 6.4571756901733e-05, + "loss": 1.4883, + "step": 9987 + }, + { + "epoch": 0.89, + "learning_rate": 6.446993338312534e-05, + "loss": 1.5508, + "step": 9988 + }, + { + "epoch": 0.89, + "learning_rate": 6.436818753669282e-05, + "loss": 1.6426, + "step": 9989 + }, + { + "epoch": 0.89, + "learning_rate": 6.426651937088301e-05, + "loss": 1.543, + "step": 9990 + }, + { + "epoch": 0.89, + "learning_rate": 6.416492889413682e-05, + "loss": 1.6055, + "step": 9991 + }, + { + "epoch": 0.89, + "learning_rate": 6.406341611488875e-05, + "loss": 1.5352, + "step": 9992 + }, + { + "epoch": 0.89, + "learning_rate": 6.396198104156692e-05, + "loss": 1.5859, + "step": 9993 + }, + { + "epoch": 0.89, + "learning_rate": 6.386062368259304e-05, + "loss": 1.6211, + "step": 9994 + }, + { + "epoch": 0.89, + "learning_rate": 6.375934404638217e-05, + "loss": 1.6172, + "step": 9995 + }, + { + "epoch": 0.89, + "learning_rate": 6.365814214134303e-05, + "loss": 1.5684, + "step": 9996 + }, + { + "epoch": 0.89, + "learning_rate": 6.355701797587799e-05, + "loss": 1.5234, + "step": 9997 + }, + { + "epoch": 0.89, + "learning_rate": 6.345597155838257e-05, + "loss": 1.5859, + "step": 9998 + }, + { + "epoch": 0.89, + "learning_rate": 6.33550028972465e-05, + "loss": 1.5918, + "step": 9999 + }, + { + "epoch": 0.89, + "learning_rate": 6.325411200085251e-05, + "loss": 1.6719, + "step": 10000 + }, + { + "epoch": 0.89, + "learning_rate": 6.315329887757714e-05, + "loss": 1.6934, + "step": 10001 + }, + { + "epoch": 0.89, + "learning_rate": 6.305256353579003e-05, + "loss": 1.5625, + "step": 10002 + }, + { + "epoch": 0.89, + "learning_rate": 6.295190598385514e-05, + "loss": 1.6562, + "step": 10003 + }, + { + "epoch": 0.89, + "learning_rate": 6.285132623012946e-05, + "loss": 1.6211, + "step": 10004 + }, + { + "epoch": 0.89, + "learning_rate": 6.275082428296341e-05, + "loss": 1.5234, + "step": 10005 + }, + { + "epoch": 0.89, + "learning_rate": 6.265040015070111e-05, + "loss": 1.5215, + "step": 10006 + }, + { + "epoch": 0.89, + "learning_rate": 6.255005384168056e-05, + "loss": 1.6328, + "step": 10007 + }, + { + "epoch": 0.89, + "learning_rate": 6.244978536423285e-05, + "loss": 1.6094, + "step": 10008 + }, + { + "epoch": 0.89, + "learning_rate": 6.234959472668266e-05, + "loss": 1.5293, + "step": 10009 + }, + { + "epoch": 0.89, + "learning_rate": 6.224948193734826e-05, + "loss": 1.6016, + "step": 10010 + }, + { + "epoch": 0.89, + "learning_rate": 6.214944700454162e-05, + "loss": 1.5156, + "step": 10011 + }, + { + "epoch": 0.89, + "learning_rate": 6.204948993656823e-05, + "loss": 1.6699, + "step": 10012 + }, + { + "epoch": 0.89, + "learning_rate": 6.194961074172678e-05, + "loss": 1.6113, + "step": 10013 + }, + { + "epoch": 0.89, + "learning_rate": 6.184980942830953e-05, + "loss": 1.541, + "step": 10014 + }, + { + "epoch": 0.89, + "learning_rate": 6.175008600460297e-05, + "loss": 1.7168, + "step": 10015 + }, + { + "epoch": 0.89, + "learning_rate": 6.165044047888646e-05, + "loss": 1.502, + "step": 10016 + }, + { + "epoch": 0.89, + "learning_rate": 6.155087285943284e-05, + "loss": 1.5332, + "step": 10017 + }, + { + "epoch": 0.89, + "learning_rate": 6.14513831545086e-05, + "loss": 1.5508, + "step": 10018 + }, + { + "epoch": 0.89, + "learning_rate": 6.135197137237436e-05, + "loss": 1.6855, + "step": 10019 + }, + { + "epoch": 0.89, + "learning_rate": 6.12526375212834e-05, + "loss": 1.623, + "step": 10020 + }, + { + "epoch": 0.89, + "learning_rate": 6.115338160948292e-05, + "loss": 1.5723, + "step": 10021 + }, + { + "epoch": 0.89, + "learning_rate": 6.105420364521342e-05, + "loss": 1.5547, + "step": 10022 + }, + { + "epoch": 0.89, + "learning_rate": 6.095510363670964e-05, + "loss": 1.4863, + "step": 10023 + }, + { + "epoch": 0.89, + "learning_rate": 6.085608159219902e-05, + "loss": 1.6875, + "step": 10024 + }, + { + "epoch": 0.89, + "learning_rate": 6.075713751990275e-05, + "loss": 1.6328, + "step": 10025 + }, + { + "epoch": 0.89, + "learning_rate": 6.065827142803582e-05, + "loss": 1.5547, + "step": 10026 + }, + { + "epoch": 0.89, + "learning_rate": 6.055948332480654e-05, + "loss": 1.5332, + "step": 10027 + }, + { + "epoch": 0.89, + "learning_rate": 6.04607732184167e-05, + "loss": 1.6504, + "step": 10028 + }, + { + "epoch": 0.89, + "learning_rate": 6.036214111706173e-05, + "loss": 1.5352, + "step": 10029 + }, + { + "epoch": 0.89, + "learning_rate": 6.026358702893064e-05, + "loss": 1.6328, + "step": 10030 + }, + { + "epoch": 0.89, + "learning_rate": 6.016511096220556e-05, + "loss": 1.4922, + "step": 10031 + }, + { + "epoch": 0.89, + "learning_rate": 6.006671292506272e-05, + "loss": 1.5859, + "step": 10032 + }, + { + "epoch": 0.89, + "learning_rate": 5.996839292567158e-05, + "loss": 1.5273, + "step": 10033 + }, + { + "epoch": 0.89, + "learning_rate": 5.987015097219506e-05, + "loss": 1.5195, + "step": 10034 + }, + { + "epoch": 0.89, + "learning_rate": 5.9771987072789635e-05, + "loss": 1.6309, + "step": 10035 + }, + { + "epoch": 0.89, + "learning_rate": 5.9673901235605454e-05, + "loss": 1.582, + "step": 10036 + }, + { + "epoch": 0.89, + "learning_rate": 5.9575893468786e-05, + "loss": 1.5371, + "step": 10037 + }, + { + "epoch": 0.89, + "learning_rate": 5.9477963780468105e-05, + "loss": 1.5645, + "step": 10038 + }, + { + "epoch": 0.89, + "learning_rate": 5.9380112178782916e-05, + "loss": 1.6738, + "step": 10039 + }, + { + "epoch": 0.89, + "learning_rate": 5.928233867185406e-05, + "loss": 1.6992, + "step": 10040 + }, + { + "epoch": 0.89, + "learning_rate": 5.918464326779949e-05, + "loss": 1.5938, + "step": 10041 + }, + { + "epoch": 0.89, + "learning_rate": 5.908702597472993e-05, + "loss": 1.5898, + "step": 10042 + }, + { + "epoch": 0.89, + "learning_rate": 5.8989486800750555e-05, + "loss": 1.6426, + "step": 10043 + }, + { + "epoch": 0.89, + "learning_rate": 5.889202575395913e-05, + "loss": 1.6406, + "step": 10044 + }, + { + "epoch": 0.89, + "learning_rate": 5.879464284244762e-05, + "loss": 1.627, + "step": 10045 + }, + { + "epoch": 0.89, + "learning_rate": 5.8697338074300777e-05, + "loss": 1.6113, + "step": 10046 + }, + { + "epoch": 0.89, + "learning_rate": 5.860011145759791e-05, + "loss": 1.6094, + "step": 10047 + }, + { + "epoch": 0.89, + "learning_rate": 5.85029630004108e-05, + "loss": 1.5098, + "step": 10048 + }, + { + "epoch": 0.89, + "learning_rate": 5.840589271080543e-05, + "loss": 1.5801, + "step": 10049 + }, + { + "epoch": 0.89, + "learning_rate": 5.8308900596840685e-05, + "loss": 1.5781, + "step": 10050 + }, + { + "epoch": 0.89, + "learning_rate": 5.8211986666569684e-05, + "loss": 1.5723, + "step": 10051 + }, + { + "epoch": 0.89, + "learning_rate": 5.811515092803865e-05, + "loss": 1.5332, + "step": 10052 + }, + { + "epoch": 0.89, + "learning_rate": 5.801839338928716e-05, + "loss": 1.4648, + "step": 10053 + }, + { + "epoch": 0.89, + "learning_rate": 5.792171405834845e-05, + "loss": 1.5859, + "step": 10054 + }, + { + "epoch": 0.89, + "learning_rate": 5.782511294324955e-05, + "loss": 1.543, + "step": 10055 + }, + { + "epoch": 0.89, + "learning_rate": 5.77285900520107e-05, + "loss": 1.5254, + "step": 10056 + }, + { + "epoch": 0.89, + "learning_rate": 5.7632145392645494e-05, + "loss": 1.5547, + "step": 10057 + }, + { + "epoch": 0.89, + "learning_rate": 5.7535778973161315e-05, + "loss": 1.582, + "step": 10058 + }, + { + "epoch": 0.89, + "learning_rate": 5.743949080155908e-05, + "loss": 1.5605, + "step": 10059 + }, + { + "epoch": 0.89, + "learning_rate": 5.734328088583307e-05, + "loss": 1.5371, + "step": 10060 + }, + { + "epoch": 0.9, + "learning_rate": 5.7247149233970785e-05, + "loss": 1.498, + "step": 10061 + }, + { + "epoch": 0.9, + "learning_rate": 5.715109585395395e-05, + "loss": 1.5039, + "step": 10062 + }, + { + "epoch": 0.9, + "learning_rate": 5.7055120753757296e-05, + "loss": 1.6973, + "step": 10063 + }, + { + "epoch": 0.9, + "learning_rate": 5.695922394134889e-05, + "loss": 1.6094, + "step": 10064 + }, + { + "epoch": 0.9, + "learning_rate": 5.686340542469071e-05, + "loss": 1.498, + "step": 10065 + }, + { + "epoch": 0.9, + "learning_rate": 5.676766521173804e-05, + "loss": 1.6387, + "step": 10066 + }, + { + "epoch": 0.9, + "learning_rate": 5.667200331043987e-05, + "loss": 1.5801, + "step": 10067 + }, + { + "epoch": 0.9, + "learning_rate": 5.657641972873817e-05, + "loss": 1.502, + "step": 10068 + }, + { + "epoch": 0.9, + "learning_rate": 5.648091447456905e-05, + "loss": 1.4863, + "step": 10069 + }, + { + "epoch": 0.9, + "learning_rate": 5.638548755586159e-05, + "loss": 1.6152, + "step": 10070 + }, + { + "epoch": 0.9, + "learning_rate": 5.629013898053858e-05, + "loss": 1.5586, + "step": 10071 + }, + { + "epoch": 0.9, + "learning_rate": 5.619486875651658e-05, + "loss": 1.5762, + "step": 10072 + }, + { + "epoch": 0.9, + "learning_rate": 5.609967689170514e-05, + "loss": 1.5801, + "step": 10073 + }, + { + "epoch": 0.9, + "learning_rate": 5.6004563394007724e-05, + "loss": 1.5762, + "step": 10074 + }, + { + "epoch": 0.9, + "learning_rate": 5.5909528271320896e-05, + "loss": 1.459, + "step": 10075 + }, + { + "epoch": 0.9, + "learning_rate": 5.581457153153502e-05, + "loss": 1.7305, + "step": 10076 + }, + { + "epoch": 0.9, + "learning_rate": 5.571969318253378e-05, + "loss": 1.5254, + "step": 10077 + }, + { + "epoch": 0.9, + "learning_rate": 5.562489323219466e-05, + "loss": 1.6289, + "step": 10078 + }, + { + "epoch": 0.9, + "learning_rate": 5.553017168838826e-05, + "loss": 1.6152, + "step": 10079 + }, + { + "epoch": 0.9, + "learning_rate": 5.543552855897882e-05, + "loss": 1.5898, + "step": 10080 + }, + { + "epoch": 0.9, + "learning_rate": 5.534096385182386e-05, + "loss": 1.5312, + "step": 10081 + }, + { + "epoch": 0.9, + "learning_rate": 5.5246477574774966e-05, + "loss": 1.6934, + "step": 10082 + }, + { + "epoch": 0.9, + "learning_rate": 5.5152069735676655e-05, + "loss": 1.6328, + "step": 10083 + }, + { + "epoch": 0.9, + "learning_rate": 5.505774034236721e-05, + "loss": 1.6133, + "step": 10084 + }, + { + "epoch": 0.9, + "learning_rate": 5.496348940267803e-05, + "loss": 1.4883, + "step": 10085 + }, + { + "epoch": 0.9, + "learning_rate": 5.4869316924434646e-05, + "loss": 1.4629, + "step": 10086 + }, + { + "epoch": 0.9, + "learning_rate": 5.4775222915455446e-05, + "loss": 1.5312, + "step": 10087 + }, + { + "epoch": 0.9, + "learning_rate": 5.4681207383552754e-05, + "loss": 1.7148, + "step": 10088 + }, + { + "epoch": 0.9, + "learning_rate": 5.458727033653188e-05, + "loss": 1.5664, + "step": 10089 + }, + { + "epoch": 0.9, + "learning_rate": 5.449341178219236e-05, + "loss": 1.7129, + "step": 10090 + }, + { + "epoch": 0.9, + "learning_rate": 5.4399631728326516e-05, + "loss": 1.5723, + "step": 10091 + }, + { + "epoch": 0.9, + "learning_rate": 5.4305930182720457e-05, + "loss": 1.6016, + "step": 10092 + }, + { + "epoch": 0.9, + "learning_rate": 5.4212307153153615e-05, + "loss": 1.6562, + "step": 10093 + }, + { + "epoch": 0.9, + "learning_rate": 5.411876264739923e-05, + "loss": 1.584, + "step": 10094 + }, + { + "epoch": 0.9, + "learning_rate": 5.402529667322375e-05, + "loss": 1.707, + "step": 10095 + }, + { + "epoch": 0.9, + "learning_rate": 5.39319092383872e-05, + "loss": 1.5977, + "step": 10096 + }, + { + "epoch": 0.9, + "learning_rate": 5.3838600350642695e-05, + "loss": 1.6484, + "step": 10097 + }, + { + "epoch": 0.9, + "learning_rate": 5.374537001773772e-05, + "loss": 1.5996, + "step": 10098 + }, + { + "epoch": 0.9, + "learning_rate": 5.365221824741251e-05, + "loss": 1.6836, + "step": 10099 + }, + { + "epoch": 0.9, + "learning_rate": 5.3559145047400895e-05, + "loss": 1.6055, + "step": 10100 + }, + { + "epoch": 0.9, + "learning_rate": 5.346615042543002e-05, + "loss": 1.5898, + "step": 10101 + }, + { + "epoch": 0.9, + "learning_rate": 5.337323438922126e-05, + "loss": 1.4453, + "step": 10102 + }, + { + "epoch": 0.9, + "learning_rate": 5.328039694648867e-05, + "loss": 1.4688, + "step": 10103 + }, + { + "epoch": 0.9, + "learning_rate": 5.3187638104940094e-05, + "loss": 1.6289, + "step": 10104 + }, + { + "epoch": 0.9, + "learning_rate": 5.309495787227669e-05, + "loss": 1.6152, + "step": 10105 + }, + { + "epoch": 0.9, + "learning_rate": 5.300235625619343e-05, + "loss": 1.5352, + "step": 10106 + }, + { + "epoch": 0.9, + "learning_rate": 5.290983326437848e-05, + "loss": 1.5781, + "step": 10107 + }, + { + "epoch": 0.9, + "learning_rate": 5.2817388904513485e-05, + "loss": 1.5332, + "step": 10108 + }, + { + "epoch": 0.9, + "learning_rate": 5.2725023184273634e-05, + "loss": 1.5371, + "step": 10109 + }, + { + "epoch": 0.9, + "learning_rate": 5.263273611132735e-05, + "loss": 1.6113, + "step": 10110 + }, + { + "epoch": 0.9, + "learning_rate": 5.254052769333717e-05, + "loss": 1.5605, + "step": 10111 + }, + { + "epoch": 0.9, + "learning_rate": 5.244839793795853e-05, + "loss": 1.5449, + "step": 10112 + }, + { + "epoch": 0.9, + "learning_rate": 5.23563468528403e-05, + "loss": 1.623, + "step": 10113 + }, + { + "epoch": 0.9, + "learning_rate": 5.226437444562526e-05, + "loss": 1.7305, + "step": 10114 + }, + { + "epoch": 0.9, + "learning_rate": 5.2172480723949086e-05, + "loss": 1.5371, + "step": 10115 + }, + { + "epoch": 0.9, + "learning_rate": 5.208066569544156e-05, + "loss": 1.5527, + "step": 10116 + }, + { + "epoch": 0.9, + "learning_rate": 5.198892936772526e-05, + "loss": 1.6445, + "step": 10117 + }, + { + "epoch": 0.9, + "learning_rate": 5.189727174841696e-05, + "loss": 1.4824, + "step": 10118 + }, + { + "epoch": 0.9, + "learning_rate": 5.1805692845126265e-05, + "loss": 1.6797, + "step": 10119 + }, + { + "epoch": 0.9, + "learning_rate": 5.171419266545663e-05, + "loss": 1.6797, + "step": 10120 + }, + { + "epoch": 0.9, + "learning_rate": 5.162277121700454e-05, + "loss": 1.6621, + "step": 10121 + }, + { + "epoch": 0.9, + "learning_rate": 5.153142850736059e-05, + "loss": 1.5684, + "step": 10122 + }, + { + "epoch": 0.9, + "learning_rate": 5.144016454410849e-05, + "loss": 1.5039, + "step": 10123 + }, + { + "epoch": 0.9, + "learning_rate": 5.134897933482519e-05, + "loss": 1.4863, + "step": 10124 + }, + { + "epoch": 0.9, + "learning_rate": 5.125787288708117e-05, + "loss": 1.666, + "step": 10125 + }, + { + "epoch": 0.9, + "learning_rate": 5.116684520844106e-05, + "loss": 1.6914, + "step": 10126 + }, + { + "epoch": 0.9, + "learning_rate": 5.1075896306462035e-05, + "loss": 1.6094, + "step": 10127 + }, + { + "epoch": 0.9, + "learning_rate": 5.098502618869516e-05, + "loss": 1.6113, + "step": 10128 + }, + { + "epoch": 0.9, + "learning_rate": 5.0894234862684853e-05, + "loss": 1.5859, + "step": 10129 + }, + { + "epoch": 0.9, + "learning_rate": 5.08035223359693e-05, + "loss": 1.5879, + "step": 10130 + }, + { + "epoch": 0.9, + "learning_rate": 5.07128886160797e-05, + "loss": 1.7773, + "step": 10131 + }, + { + "epoch": 0.9, + "learning_rate": 5.062233371054081e-05, + "loss": 1.6094, + "step": 10132 + }, + { + "epoch": 0.9, + "learning_rate": 5.0531857626870957e-05, + "loss": 1.5898, + "step": 10133 + }, + { + "epoch": 0.9, + "learning_rate": 5.0441460372582125e-05, + "loss": 1.7285, + "step": 10134 + }, + { + "epoch": 0.9, + "learning_rate": 5.0351141955179314e-05, + "loss": 1.5234, + "step": 10135 + }, + { + "epoch": 0.9, + "learning_rate": 5.02609023821613e-05, + "loss": 1.5098, + "step": 10136 + }, + { + "epoch": 0.9, + "learning_rate": 5.017074166101998e-05, + "loss": 1.5566, + "step": 10137 + }, + { + "epoch": 0.9, + "learning_rate": 5.008065979924126e-05, + "loss": 1.7285, + "step": 10138 + }, + { + "epoch": 0.9, + "learning_rate": 4.999065680430403e-05, + "loss": 1.5879, + "step": 10139 + }, + { + "epoch": 0.9, + "learning_rate": 4.990073268368078e-05, + "loss": 1.5957, + "step": 10140 + }, + { + "epoch": 0.9, + "learning_rate": 4.981088744483719e-05, + "loss": 1.6133, + "step": 10141 + }, + { + "epoch": 0.9, + "learning_rate": 4.972112109523297e-05, + "loss": 1.5293, + "step": 10142 + }, + { + "epoch": 0.9, + "learning_rate": 4.9631433642320924e-05, + "loss": 1.6133, + "step": 10143 + }, + { + "epoch": 0.9, + "learning_rate": 4.9541825093546985e-05, + "loss": 1.5391, + "step": 10144 + }, + { + "epoch": 0.9, + "learning_rate": 4.9452295456351435e-05, + "loss": 1.6309, + "step": 10145 + }, + { + "epoch": 0.9, + "learning_rate": 4.9362844738167104e-05, + "loss": 1.6016, + "step": 10146 + }, + { + "epoch": 0.9, + "learning_rate": 4.927347294642059e-05, + "loss": 1.4297, + "step": 10147 + }, + { + "epoch": 0.9, + "learning_rate": 4.9184180088532095e-05, + "loss": 1.4902, + "step": 10148 + }, + { + "epoch": 0.9, + "learning_rate": 4.909496617191522e-05, + "loss": 1.6387, + "step": 10149 + }, + { + "epoch": 0.9, + "learning_rate": 4.90058312039765e-05, + "loss": 1.6406, + "step": 10150 + }, + { + "epoch": 0.9, + "learning_rate": 4.89167751921169e-05, + "loss": 1.6465, + "step": 10151 + }, + { + "epoch": 0.9, + "learning_rate": 4.882779814372995e-05, + "loss": 1.6523, + "step": 10152 + }, + { + "epoch": 0.9, + "learning_rate": 4.8738900066203074e-05, + "loss": 1.6094, + "step": 10153 + }, + { + "epoch": 0.9, + "learning_rate": 4.8650080966916924e-05, + "loss": 1.582, + "step": 10154 + }, + { + "epoch": 0.9, + "learning_rate": 4.8561340853245813e-05, + "loss": 1.5586, + "step": 10155 + }, + { + "epoch": 0.9, + "learning_rate": 4.847267973255709e-05, + "loss": 1.6152, + "step": 10156 + }, + { + "epoch": 0.9, + "learning_rate": 4.838409761221207e-05, + "loss": 1.5801, + "step": 10157 + }, + { + "epoch": 0.9, + "learning_rate": 4.829559449956533e-05, + "loss": 1.6406, + "step": 10158 + }, + { + "epoch": 0.9, + "learning_rate": 4.820717040196465e-05, + "loss": 1.5156, + "step": 10159 + }, + { + "epoch": 0.9, + "learning_rate": 4.811882532675138e-05, + "loss": 1.4922, + "step": 10160 + }, + { + "epoch": 0.9, + "learning_rate": 4.8030559281260544e-05, + "loss": 1.5723, + "step": 10161 + }, + { + "epoch": 0.9, + "learning_rate": 4.794237227282039e-05, + "loss": 1.6406, + "step": 10162 + }, + { + "epoch": 0.9, + "learning_rate": 4.78542643087525e-05, + "loss": 1.6465, + "step": 10163 + }, + { + "epoch": 0.9, + "learning_rate": 4.7766235396371926e-05, + "loss": 1.5762, + "step": 10164 + }, + { + "epoch": 0.9, + "learning_rate": 4.7678285542987475e-05, + "loss": 1.5039, + "step": 10165 + }, + { + "epoch": 0.9, + "learning_rate": 4.7590414755901204e-05, + "loss": 1.5625, + "step": 10166 + }, + { + "epoch": 0.9, + "learning_rate": 4.750262304240827e-05, + "loss": 1.7012, + "step": 10167 + }, + { + "epoch": 0.9, + "learning_rate": 4.741491040979773e-05, + "loss": 1.5918, + "step": 10168 + }, + { + "epoch": 0.9, + "learning_rate": 4.7327276865351984e-05, + "loss": 1.6562, + "step": 10169 + }, + { + "epoch": 0.9, + "learning_rate": 4.7239722416346775e-05, + "loss": 1.5059, + "step": 10170 + }, + { + "epoch": 0.9, + "learning_rate": 4.715224707005117e-05, + "loss": 1.752, + "step": 10171 + }, + { + "epoch": 0.9, + "learning_rate": 4.706485083372769e-05, + "loss": 1.623, + "step": 10172 + }, + { + "epoch": 0.9, + "learning_rate": 4.697753371463265e-05, + "loss": 1.6094, + "step": 10173 + }, + { + "epoch": 0.91, + "learning_rate": 4.6890295720015465e-05, + "loss": 1.6543, + "step": 10174 + }, + { + "epoch": 0.91, + "learning_rate": 4.6803136857119013e-05, + "loss": 1.5332, + "step": 10175 + }, + { + "epoch": 0.91, + "learning_rate": 4.6716057133179614e-05, + "loss": 1.5176, + "step": 10176 + }, + { + "epoch": 0.91, + "learning_rate": 4.662905655542704e-05, + "loss": 1.5273, + "step": 10177 + }, + { + "epoch": 0.91, + "learning_rate": 4.654213513108463e-05, + "loss": 1.5488, + "step": 10178 + }, + { + "epoch": 0.91, + "learning_rate": 4.6455292867368826e-05, + "loss": 1.4922, + "step": 10179 + }, + { + "epoch": 0.91, + "learning_rate": 4.6368529771489644e-05, + "loss": 1.5879, + "step": 10180 + }, + { + "epoch": 0.91, + "learning_rate": 4.628184585065087e-05, + "loss": 1.5254, + "step": 10181 + }, + { + "epoch": 0.91, + "learning_rate": 4.619524111204931e-05, + "loss": 1.6875, + "step": 10182 + }, + { + "epoch": 0.91, + "learning_rate": 4.61087155628751e-05, + "loss": 1.5645, + "step": 10183 + }, + { + "epoch": 0.91, + "learning_rate": 4.602226921031205e-05, + "loss": 1.5586, + "step": 10184 + }, + { + "epoch": 0.91, + "learning_rate": 4.5935902061537635e-05, + "loss": 1.6953, + "step": 10185 + }, + { + "epoch": 0.91, + "learning_rate": 4.584961412372224e-05, + "loss": 1.5566, + "step": 10186 + }, + { + "epoch": 0.91, + "learning_rate": 4.57634054040299e-05, + "loss": 1.5293, + "step": 10187 + }, + { + "epoch": 0.91, + "learning_rate": 4.567727590961812e-05, + "loss": 1.6113, + "step": 10188 + }, + { + "epoch": 0.91, + "learning_rate": 4.559122564763773e-05, + "loss": 1.5586, + "step": 10189 + }, + { + "epoch": 0.91, + "learning_rate": 4.550525462523314e-05, + "loss": 1.5684, + "step": 10190 + }, + { + "epoch": 0.91, + "learning_rate": 4.541936284954196e-05, + "loss": 1.6348, + "step": 10191 + }, + { + "epoch": 0.91, + "learning_rate": 4.5333550327695484e-05, + "loss": 1.5078, + "step": 10192 + }, + { + "epoch": 0.91, + "learning_rate": 4.524781706681802e-05, + "loss": 1.4902, + "step": 10193 + }, + { + "epoch": 0.91, + "learning_rate": 4.516216307402787e-05, + "loss": 1.543, + "step": 10194 + }, + { + "epoch": 0.91, + "learning_rate": 4.507658835643624e-05, + "loss": 1.7051, + "step": 10195 + }, + { + "epoch": 0.91, + "learning_rate": 4.499109292114789e-05, + "loss": 1.5469, + "step": 10196 + }, + { + "epoch": 0.91, + "learning_rate": 4.490567677526125e-05, + "loss": 1.5586, + "step": 10197 + }, + { + "epoch": 0.91, + "learning_rate": 4.482033992586787e-05, + "loss": 1.4766, + "step": 10198 + }, + { + "epoch": 0.91, + "learning_rate": 4.4735082380052864e-05, + "loss": 1.584, + "step": 10199 + }, + { + "epoch": 0.91, + "learning_rate": 4.464990414489456e-05, + "loss": 1.582, + "step": 10200 + }, + { + "epoch": 0.91, + "learning_rate": 4.45648052274652e-05, + "loss": 1.6055, + "step": 10201 + }, + { + "epoch": 0.91, + "learning_rate": 4.4479785634829794e-05, + "loss": 1.498, + "step": 10202 + }, + { + "epoch": 0.91, + "learning_rate": 4.4394845374047256e-05, + "loss": 1.5879, + "step": 10203 + }, + { + "epoch": 0.91, + "learning_rate": 4.4309984452169495e-05, + "loss": 1.5742, + "step": 10204 + }, + { + "epoch": 0.91, + "learning_rate": 4.422520287624232e-05, + "loss": 1.6113, + "step": 10205 + }, + { + "epoch": 0.91, + "learning_rate": 4.414050065330466e-05, + "loss": 1.5195, + "step": 10206 + }, + { + "epoch": 0.91, + "learning_rate": 4.405587779038889e-05, + "loss": 1.4531, + "step": 10207 + }, + { + "epoch": 0.91, + "learning_rate": 4.39713342945206e-05, + "loss": 1.5938, + "step": 10208 + }, + { + "epoch": 0.91, + "learning_rate": 4.3886870172719416e-05, + "loss": 1.582, + "step": 10209 + }, + { + "epoch": 0.91, + "learning_rate": 4.3802485431997494e-05, + "loss": 1.5879, + "step": 10210 + }, + { + "epoch": 0.91, + "learning_rate": 4.371818007936124e-05, + "loss": 1.6289, + "step": 10211 + }, + { + "epoch": 0.91, + "learning_rate": 4.3633954121809704e-05, + "loss": 1.4902, + "step": 10212 + }, + { + "epoch": 0.91, + "learning_rate": 4.354980756633609e-05, + "loss": 1.5586, + "step": 10213 + }, + { + "epoch": 0.91, + "learning_rate": 4.346574041992646e-05, + "loss": 1.5469, + "step": 10214 + }, + { + "epoch": 0.91, + "learning_rate": 4.338175268956046e-05, + "loss": 1.498, + "step": 10215 + }, + { + "epoch": 0.91, + "learning_rate": 4.329784438221118e-05, + "loss": 1.5352, + "step": 10216 + }, + { + "epoch": 0.91, + "learning_rate": 4.321401550484516e-05, + "loss": 1.5566, + "step": 10217 + }, + { + "epoch": 0.91, + "learning_rate": 4.313026606442227e-05, + "loss": 1.6133, + "step": 10218 + }, + { + "epoch": 0.91, + "learning_rate": 4.304659606789574e-05, + "loss": 1.6543, + "step": 10219 + }, + { + "epoch": 0.91, + "learning_rate": 4.2963005522211996e-05, + "loss": 1.7109, + "step": 10220 + }, + { + "epoch": 0.91, + "learning_rate": 4.28794944343116e-05, + "loss": 1.6133, + "step": 10221 + }, + { + "epoch": 0.91, + "learning_rate": 4.279606281112791e-05, + "loss": 1.627, + "step": 10222 + }, + { + "epoch": 0.91, + "learning_rate": 4.2712710659587594e-05, + "loss": 1.584, + "step": 10223 + }, + { + "epoch": 0.91, + "learning_rate": 4.262943798661101e-05, + "loss": 1.543, + "step": 10224 + }, + { + "epoch": 0.91, + "learning_rate": 4.254624479911207e-05, + "loss": 1.6523, + "step": 10225 + }, + { + "epoch": 0.91, + "learning_rate": 4.2463131103997575e-05, + "loss": 1.498, + "step": 10226 + }, + { + "epoch": 0.91, + "learning_rate": 4.238009690816835e-05, + "loss": 1.5449, + "step": 10227 + }, + { + "epoch": 0.91, + "learning_rate": 4.2297142218517994e-05, + "loss": 1.4512, + "step": 10228 + }, + { + "epoch": 0.91, + "learning_rate": 4.2214267041933874e-05, + "loss": 1.5605, + "step": 10229 + }, + { + "epoch": 0.91, + "learning_rate": 4.2131471385296824e-05, + "loss": 1.4941, + "step": 10230 + }, + { + "epoch": 0.91, + "learning_rate": 4.20487552554808e-05, + "loss": 1.6621, + "step": 10231 + }, + { + "epoch": 0.91, + "learning_rate": 4.1966118659353405e-05, + "loss": 1.5586, + "step": 10232 + }, + { + "epoch": 0.91, + "learning_rate": 4.188356160377538e-05, + "loss": 1.5156, + "step": 10233 + }, + { + "epoch": 0.91, + "learning_rate": 4.1801084095601016e-05, + "loss": 1.5137, + "step": 10234 + }, + { + "epoch": 0.91, + "learning_rate": 4.1718686141677955e-05, + "loss": 1.498, + "step": 10235 + }, + { + "epoch": 0.91, + "learning_rate": 4.16363677488476e-05, + "loss": 1.5762, + "step": 10236 + }, + { + "epoch": 0.91, + "learning_rate": 4.155412892394406e-05, + "loss": 1.4746, + "step": 10237 + }, + { + "epoch": 0.91, + "learning_rate": 4.1471969673795296e-05, + "loss": 1.5254, + "step": 10238 + }, + { + "epoch": 0.91, + "learning_rate": 4.138989000522253e-05, + "loss": 1.666, + "step": 10239 + }, + { + "epoch": 0.91, + "learning_rate": 4.130788992504053e-05, + "loss": 1.4297, + "step": 10240 + }, + { + "epoch": 0.91, + "learning_rate": 4.1225969440057296e-05, + "loss": 1.5254, + "step": 10241 + }, + { + "epoch": 0.91, + "learning_rate": 4.114412855707428e-05, + "loss": 1.5195, + "step": 10242 + }, + { + "epoch": 0.91, + "learning_rate": 4.106236728288604e-05, + "loss": 1.5664, + "step": 10243 + }, + { + "epoch": 0.91, + "learning_rate": 4.0980685624281034e-05, + "loss": 1.5371, + "step": 10244 + }, + { + "epoch": 0.91, + "learning_rate": 4.089908358804095e-05, + "loss": 1.4863, + "step": 10245 + }, + { + "epoch": 0.91, + "learning_rate": 4.08175611809406e-05, + "loss": 1.6602, + "step": 10246 + }, + { + "epoch": 0.91, + "learning_rate": 4.073611840974833e-05, + "loss": 1.5059, + "step": 10247 + }, + { + "epoch": 0.91, + "learning_rate": 4.0654755281226085e-05, + "loss": 1.3457, + "step": 10248 + }, + { + "epoch": 0.91, + "learning_rate": 4.057347180212889e-05, + "loss": 1.5234, + "step": 10249 + }, + { + "epoch": 0.91, + "learning_rate": 4.049226797920535e-05, + "loss": 1.5, + "step": 10250 + }, + { + "epoch": 0.91, + "learning_rate": 4.0411143819197304e-05, + "loss": 1.5273, + "step": 10251 + }, + { + "epoch": 0.91, + "learning_rate": 4.033009932884024e-05, + "loss": 1.6582, + "step": 10252 + }, + { + "epoch": 0.91, + "learning_rate": 4.0249134514862675e-05, + "loss": 1.6484, + "step": 10253 + }, + { + "epoch": 0.91, + "learning_rate": 4.016824938398689e-05, + "loss": 1.7734, + "step": 10254 + }, + { + "epoch": 0.91, + "learning_rate": 4.008744394292807e-05, + "loss": 1.5977, + "step": 10255 + }, + { + "epoch": 0.91, + "learning_rate": 4.0006718198395296e-05, + "loss": 1.5449, + "step": 10256 + }, + { + "epoch": 0.91, + "learning_rate": 3.9926072157090765e-05, + "loss": 1.6387, + "step": 10257 + }, + { + "epoch": 0.91, + "learning_rate": 3.9845505825710116e-05, + "loss": 1.5527, + "step": 10258 + }, + { + "epoch": 0.91, + "learning_rate": 3.976501921094211e-05, + "loss": 1.541, + "step": 10259 + }, + { + "epoch": 0.91, + "learning_rate": 3.968461231946952e-05, + "loss": 1.6445, + "step": 10260 + }, + { + "epoch": 0.91, + "learning_rate": 3.960428515796799e-05, + "loss": 1.543, + "step": 10261 + }, + { + "epoch": 0.91, + "learning_rate": 3.9524037733106424e-05, + "loss": 1.5156, + "step": 10262 + }, + { + "epoch": 0.91, + "learning_rate": 3.944387005154748e-05, + "loss": 1.5996, + "step": 10263 + }, + { + "epoch": 0.91, + "learning_rate": 3.936378211994729e-05, + "loss": 1.502, + "step": 10264 + }, + { + "epoch": 0.91, + "learning_rate": 3.928377394495486e-05, + "loss": 1.6074, + "step": 10265 + }, + { + "epoch": 0.91, + "learning_rate": 3.920384553321299e-05, + "loss": 1.5195, + "step": 10266 + }, + { + "epoch": 0.91, + "learning_rate": 3.9123996891357594e-05, + "loss": 1.541, + "step": 10267 + }, + { + "epoch": 0.91, + "learning_rate": 3.9044228026018146e-05, + "loss": 1.4551, + "step": 10268 + }, + { + "epoch": 0.91, + "learning_rate": 3.896453894381746e-05, + "loss": 1.5137, + "step": 10269 + }, + { + "epoch": 0.91, + "learning_rate": 3.88849296513718e-05, + "loss": 1.6523, + "step": 10270 + }, + { + "epoch": 0.91, + "learning_rate": 3.880540015529055e-05, + "loss": 1.6016, + "step": 10271 + }, + { + "epoch": 0.91, + "learning_rate": 3.872595046217664e-05, + "loss": 1.5195, + "step": 10272 + }, + { + "epoch": 0.91, + "learning_rate": 3.8646580578626464e-05, + "loss": 1.6523, + "step": 10273 + }, + { + "epoch": 0.91, + "learning_rate": 3.856729051122965e-05, + "loss": 1.4766, + "step": 10274 + }, + { + "epoch": 0.91, + "learning_rate": 3.8488080266569136e-05, + "loss": 1.6426, + "step": 10275 + }, + { + "epoch": 0.91, + "learning_rate": 3.8408949851221456e-05, + "loss": 1.5977, + "step": 10276 + }, + { + "epoch": 0.91, + "learning_rate": 3.832989927175634e-05, + "loss": 1.5215, + "step": 10277 + }, + { + "epoch": 0.91, + "learning_rate": 3.8250928534736995e-05, + "loss": 1.5664, + "step": 10278 + }, + { + "epoch": 0.91, + "learning_rate": 3.8172037646719834e-05, + "loss": 1.5098, + "step": 10279 + }, + { + "epoch": 0.91, + "learning_rate": 3.809322661425496e-05, + "loss": 1.5781, + "step": 10280 + }, + { + "epoch": 0.91, + "learning_rate": 3.801449544388547e-05, + "loss": 1.5879, + "step": 10281 + }, + { + "epoch": 0.91, + "learning_rate": 3.7935844142148125e-05, + "loss": 1.5215, + "step": 10282 + }, + { + "epoch": 0.91, + "learning_rate": 3.785727271557271e-05, + "loss": 1.5781, + "step": 10283 + }, + { + "epoch": 0.91, + "learning_rate": 3.777878117068278e-05, + "loss": 1.668, + "step": 10284 + }, + { + "epoch": 0.91, + "learning_rate": 3.7700369513995135e-05, + "loss": 1.3711, + "step": 10285 + }, + { + "epoch": 0.92, + "learning_rate": 3.762203775201978e-05, + "loss": 1.627, + "step": 10286 + }, + { + "epoch": 0.92, + "learning_rate": 3.7543785891260065e-05, + "loss": 1.668, + "step": 10287 + }, + { + "epoch": 0.92, + "learning_rate": 3.746561393821313e-05, + "loss": 1.5977, + "step": 10288 + }, + { + "epoch": 0.92, + "learning_rate": 3.738752189936911e-05, + "loss": 1.5195, + "step": 10289 + }, + { + "epoch": 0.92, + "learning_rate": 3.730950978121139e-05, + "loss": 1.4961, + "step": 10290 + }, + { + "epoch": 0.92, + "learning_rate": 3.723157759021689e-05, + "loss": 1.4707, + "step": 10291 + }, + { + "epoch": 0.92, + "learning_rate": 3.715372533285621e-05, + "loss": 1.5156, + "step": 10292 + }, + { + "epoch": 0.92, + "learning_rate": 3.7075953015592854e-05, + "loss": 1.5391, + "step": 10293 + }, + { + "epoch": 0.92, + "learning_rate": 3.699826064488388e-05, + "loss": 1.6191, + "step": 10294 + }, + { + "epoch": 0.92, + "learning_rate": 3.6920648227179446e-05, + "loss": 1.4512, + "step": 10295 + }, + { + "epoch": 0.92, + "learning_rate": 3.684311576892363e-05, + "loss": 1.4004, + "step": 10296 + }, + { + "epoch": 0.92, + "learning_rate": 3.6765663276553396e-05, + "loss": 1.625, + "step": 10297 + }, + { + "epoch": 0.92, + "learning_rate": 3.6688290756499374e-05, + "loss": 1.4961, + "step": 10298 + }, + { + "epoch": 0.92, + "learning_rate": 3.6610998215185095e-05, + "loss": 1.5586, + "step": 10299 + }, + { + "epoch": 0.92, + "learning_rate": 3.6533785659027986e-05, + "loss": 1.5527, + "step": 10300 + }, + { + "epoch": 0.92, + "learning_rate": 3.6456653094438574e-05, + "loss": 1.5762, + "step": 10301 + }, + { + "epoch": 0.92, + "learning_rate": 3.6379600527820856e-05, + "loss": 1.625, + "step": 10302 + }, + { + "epoch": 0.92, + "learning_rate": 3.630262796557171e-05, + "loss": 1.6387, + "step": 10303 + }, + { + "epoch": 0.92, + "learning_rate": 3.622573541408225e-05, + "loss": 1.5918, + "step": 10304 + }, + { + "epoch": 0.92, + "learning_rate": 3.614892287973637e-05, + "loss": 1.5215, + "step": 10305 + }, + { + "epoch": 0.92, + "learning_rate": 3.607219036891118e-05, + "loss": 1.666, + "step": 10306 + }, + { + "epoch": 0.92, + "learning_rate": 3.599553788797738e-05, + "loss": 1.6875, + "step": 10307 + }, + { + "epoch": 0.92, + "learning_rate": 3.591896544329931e-05, + "loss": 1.5879, + "step": 10308 + }, + { + "epoch": 0.92, + "learning_rate": 3.584247304123422e-05, + "loss": 1.6543, + "step": 10309 + }, + { + "epoch": 0.92, + "learning_rate": 3.576606068813293e-05, + "loss": 1.5371, + "step": 10310 + }, + { + "epoch": 0.92, + "learning_rate": 3.568972839033946e-05, + "loss": 1.6523, + "step": 10311 + }, + { + "epoch": 0.92, + "learning_rate": 3.5613476154191304e-05, + "loss": 1.5645, + "step": 10312 + }, + { + "epoch": 0.92, + "learning_rate": 3.553730398601929e-05, + "loss": 1.5566, + "step": 10313 + }, + { + "epoch": 0.92, + "learning_rate": 3.546121189214757e-05, + "loss": 1.5762, + "step": 10314 + }, + { + "epoch": 0.92, + "learning_rate": 3.538519987889388e-05, + "loss": 1.4844, + "step": 10315 + }, + { + "epoch": 0.92, + "learning_rate": 3.5309267952568835e-05, + "loss": 1.6973, + "step": 10316 + }, + { + "epoch": 0.92, + "learning_rate": 3.5233416119476834e-05, + "loss": 1.7441, + "step": 10317 + }, + { + "epoch": 0.92, + "learning_rate": 3.5157644385915174e-05, + "loss": 1.5781, + "step": 10318 + }, + { + "epoch": 0.92, + "learning_rate": 3.508195275817527e-05, + "loss": 1.4727, + "step": 10319 + }, + { + "epoch": 0.92, + "learning_rate": 3.50063412425411e-05, + "loss": 1.5332, + "step": 10320 + }, + { + "epoch": 0.92, + "learning_rate": 3.493080984529029e-05, + "loss": 1.5078, + "step": 10321 + }, + { + "epoch": 0.92, + "learning_rate": 3.485535857269373e-05, + "loss": 1.5684, + "step": 10322 + }, + { + "epoch": 0.92, + "learning_rate": 3.477998743101607e-05, + "loss": 1.623, + "step": 10323 + }, + { + "epoch": 0.92, + "learning_rate": 3.470469642651475e-05, + "loss": 1.5527, + "step": 10324 + }, + { + "epoch": 0.92, + "learning_rate": 3.4629485565440764e-05, + "loss": 1.541, + "step": 10325 + }, + { + "epoch": 0.92, + "learning_rate": 3.455435485403846e-05, + "loss": 1.5254, + "step": 10326 + }, + { + "epoch": 0.92, + "learning_rate": 3.447930429854585e-05, + "loss": 1.4961, + "step": 10327 + }, + { + "epoch": 0.92, + "learning_rate": 3.440433390519371e-05, + "loss": 1.6582, + "step": 10328 + }, + { + "epoch": 0.92, + "learning_rate": 3.432944368020641e-05, + "loss": 1.4844, + "step": 10329 + }, + { + "epoch": 0.92, + "learning_rate": 3.4254633629801745e-05, + "loss": 1.3613, + "step": 10330 + }, + { + "epoch": 0.92, + "learning_rate": 3.417990376019098e-05, + "loss": 1.6016, + "step": 10331 + }, + { + "epoch": 0.92, + "learning_rate": 3.410525407757836e-05, + "loss": 1.5957, + "step": 10332 + }, + { + "epoch": 0.92, + "learning_rate": 3.403068458816172e-05, + "loss": 1.7285, + "step": 10333 + }, + { + "epoch": 0.92, + "learning_rate": 3.3956195298132096e-05, + "loss": 1.5547, + "step": 10334 + }, + { + "epoch": 0.92, + "learning_rate": 3.3881786213673995e-05, + "loss": 1.416, + "step": 10335 + }, + { + "epoch": 0.92, + "learning_rate": 3.3807457340965354e-05, + "loss": 1.6133, + "step": 10336 + }, + { + "epoch": 0.92, + "learning_rate": 3.373320868617713e-05, + "loss": 1.5898, + "step": 10337 + }, + { + "epoch": 0.92, + "learning_rate": 3.365904025547373e-05, + "loss": 1.668, + "step": 10338 + }, + { + "epoch": 0.92, + "learning_rate": 3.358495205501322e-05, + "loss": 1.4297, + "step": 10339 + }, + { + "epoch": 0.92, + "learning_rate": 3.3510944090946685e-05, + "loss": 1.6211, + "step": 10340 + }, + { + "epoch": 0.92, + "learning_rate": 3.3437016369418535e-05, + "loss": 1.5391, + "step": 10341 + }, + { + "epoch": 0.92, + "learning_rate": 3.3363168896566545e-05, + "loss": 1.4707, + "step": 10342 + }, + { + "epoch": 0.92, + "learning_rate": 3.328940167852201e-05, + "loss": 1.5508, + "step": 10343 + }, + { + "epoch": 0.92, + "learning_rate": 3.32157147214095e-05, + "loss": 1.4395, + "step": 10344 + }, + { + "epoch": 0.92, + "learning_rate": 3.314210803134676e-05, + "loss": 1.5625, + "step": 10345 + }, + { + "epoch": 0.92, + "learning_rate": 3.306858161444493e-05, + "loss": 1.582, + "step": 10346 + }, + { + "epoch": 0.92, + "learning_rate": 3.2995135476808433e-05, + "loss": 1.5918, + "step": 10347 + }, + { + "epoch": 0.92, + "learning_rate": 3.292176962453541e-05, + "loss": 1.5371, + "step": 10348 + }, + { + "epoch": 0.92, + "learning_rate": 3.284848406371688e-05, + "loss": 1.5312, + "step": 10349 + }, + { + "epoch": 0.92, + "learning_rate": 3.277527880043729e-05, + "loss": 1.6523, + "step": 10350 + }, + { + "epoch": 0.92, + "learning_rate": 3.2702153840774686e-05, + "loss": 1.6777, + "step": 10351 + }, + { + "epoch": 0.92, + "learning_rate": 3.262910919080009e-05, + "loss": 1.6289, + "step": 10352 + }, + { + "epoch": 0.92, + "learning_rate": 3.2556144856577984e-05, + "loss": 1.5703, + "step": 10353 + }, + { + "epoch": 0.92, + "learning_rate": 3.248326084416631e-05, + "loss": 1.4902, + "step": 10354 + }, + { + "epoch": 0.92, + "learning_rate": 3.2410457159616217e-05, + "loss": 1.6035, + "step": 10355 + }, + { + "epoch": 0.92, + "learning_rate": 3.233773380897231e-05, + "loss": 1.6562, + "step": 10356 + }, + { + "epoch": 0.92, + "learning_rate": 3.2265090798272225e-05, + "loss": 1.6562, + "step": 10357 + }, + { + "epoch": 0.92, + "learning_rate": 3.219252813354723e-05, + "loss": 1.6113, + "step": 10358 + }, + { + "epoch": 0.92, + "learning_rate": 3.2120045820821954e-05, + "loss": 1.5762, + "step": 10359 + }, + { + "epoch": 0.92, + "learning_rate": 3.204764386611414e-05, + "loss": 1.5547, + "step": 10360 + }, + { + "epoch": 0.92, + "learning_rate": 3.197532227543487e-05, + "loss": 1.5938, + "step": 10361 + }, + { + "epoch": 0.92, + "learning_rate": 3.190308105478868e-05, + "loss": 1.5879, + "step": 10362 + }, + { + "epoch": 0.92, + "learning_rate": 3.183092021017331e-05, + "loss": 1.4473, + "step": 10363 + }, + { + "epoch": 0.92, + "learning_rate": 3.175883974758009e-05, + "loss": 1.7129, + "step": 10364 + }, + { + "epoch": 0.92, + "learning_rate": 3.168683967299335e-05, + "loss": 1.4355, + "step": 10365 + }, + { + "epoch": 0.92, + "learning_rate": 3.1614919992390746e-05, + "loss": 1.5801, + "step": 10366 + }, + { + "epoch": 0.92, + "learning_rate": 3.154308071174372e-05, + "loss": 1.5977, + "step": 10367 + }, + { + "epoch": 0.92, + "learning_rate": 3.14713218370164e-05, + "loss": 1.5195, + "step": 10368 + }, + { + "epoch": 0.92, + "learning_rate": 3.139964337416678e-05, + "loss": 1.541, + "step": 10369 + }, + { + "epoch": 0.92, + "learning_rate": 3.1328045329145686e-05, + "loss": 1.5488, + "step": 10370 + }, + { + "epoch": 0.92, + "learning_rate": 3.125652770789789e-05, + "loss": 1.5137, + "step": 10371 + }, + { + "epoch": 0.92, + "learning_rate": 3.118509051636087e-05, + "loss": 1.4688, + "step": 10372 + }, + { + "epoch": 0.92, + "learning_rate": 3.111373376046578e-05, + "loss": 1.6172, + "step": 10373 + }, + { + "epoch": 0.92, + "learning_rate": 3.104245744613676e-05, + "loss": 1.6426, + "step": 10374 + }, + { + "epoch": 0.92, + "learning_rate": 3.097126157929198e-05, + "loss": 1.5723, + "step": 10375 + }, + { + "epoch": 0.92, + "learning_rate": 3.090014616584202e-05, + "loss": 1.5215, + "step": 10376 + }, + { + "epoch": 0.92, + "learning_rate": 3.082911121169152e-05, + "loss": 1.4219, + "step": 10377 + }, + { + "epoch": 0.92, + "learning_rate": 3.075815672273785e-05, + "loss": 1.5859, + "step": 10378 + }, + { + "epoch": 0.92, + "learning_rate": 3.068728270487231e-05, + "loss": 1.6426, + "step": 10379 + }, + { + "epoch": 0.92, + "learning_rate": 3.061648916397897e-05, + "loss": 1.541, + "step": 10380 + }, + { + "epoch": 0.92, + "learning_rate": 3.0545776105935676e-05, + "loss": 1.5703, + "step": 10381 + }, + { + "epoch": 0.92, + "learning_rate": 3.047514353661296e-05, + "loss": 1.5449, + "step": 10382 + }, + { + "epoch": 0.92, + "learning_rate": 3.0404591461875464e-05, + "loss": 1.541, + "step": 10383 + }, + { + "epoch": 0.92, + "learning_rate": 3.0334119887580614e-05, + "loss": 1.5645, + "step": 10384 + }, + { + "epoch": 0.92, + "learning_rate": 3.0263728819579396e-05, + "loss": 1.4805, + "step": 10385 + }, + { + "epoch": 0.92, + "learning_rate": 3.019341826371569e-05, + "loss": 1.5, + "step": 10386 + }, + { + "epoch": 0.92, + "learning_rate": 3.012318822582738e-05, + "loss": 1.5957, + "step": 10387 + }, + { + "epoch": 0.92, + "learning_rate": 3.005303871174525e-05, + "loss": 1.6406, + "step": 10388 + }, + { + "epoch": 0.92, + "learning_rate": 2.9982969727293306e-05, + "loss": 1.6133, + "step": 10389 + }, + { + "epoch": 0.92, + "learning_rate": 2.991298127828901e-05, + "loss": 1.5273, + "step": 10390 + }, + { + "epoch": 0.92, + "learning_rate": 2.9843073370543262e-05, + "loss": 1.6152, + "step": 10391 + }, + { + "epoch": 0.92, + "learning_rate": 2.9773246009860088e-05, + "loss": 1.6855, + "step": 10392 + }, + { + "epoch": 0.92, + "learning_rate": 2.9703499202036855e-05, + "loss": 1.5508, + "step": 10393 + }, + { + "epoch": 0.92, + "learning_rate": 2.963383295286426e-05, + "loss": 1.6211, + "step": 10394 + }, + { + "epoch": 0.92, + "learning_rate": 2.9564247268126344e-05, + "loss": 1.5879, + "step": 10395 + }, + { + "epoch": 0.92, + "learning_rate": 2.9494742153600594e-05, + "loss": 1.6523, + "step": 10396 + }, + { + "epoch": 0.92, + "learning_rate": 2.94253176150574e-05, + "loss": 1.5781, + "step": 10397 + }, + { + "epoch": 0.93, + "learning_rate": 2.9355973658260926e-05, + "loss": 1.5508, + "step": 10398 + }, + { + "epoch": 0.93, + "learning_rate": 2.9286710288968454e-05, + "loss": 1.5996, + "step": 10399 + }, + { + "epoch": 0.93, + "learning_rate": 2.9217527512930386e-05, + "loss": 1.5762, + "step": 10400 + }, + { + "epoch": 0.93, + "learning_rate": 2.9148425335890572e-05, + "loss": 1.5215, + "step": 10401 + }, + { + "epoch": 0.93, + "learning_rate": 2.9079403763586422e-05, + "loss": 1.5449, + "step": 10402 + }, + { + "epoch": 0.93, + "learning_rate": 2.901046280174846e-05, + "loss": 1.582, + "step": 10403 + }, + { + "epoch": 0.93, + "learning_rate": 2.8941602456100225e-05, + "loss": 1.5059, + "step": 10404 + }, + { + "epoch": 0.93, + "learning_rate": 2.8872822732358916e-05, + "loss": 1.498, + "step": 10405 + }, + { + "epoch": 0.93, + "learning_rate": 2.8804123636235192e-05, + "loss": 1.5723, + "step": 10406 + }, + { + "epoch": 0.93, + "learning_rate": 2.8735505173432598e-05, + "loss": 1.7012, + "step": 10407 + }, + { + "epoch": 0.93, + "learning_rate": 2.866696734964813e-05, + "loss": 1.5352, + "step": 10408 + }, + { + "epoch": 0.93, + "learning_rate": 2.8598510170572023e-05, + "loss": 1.5137, + "step": 10409 + }, + { + "epoch": 0.93, + "learning_rate": 2.853013364188828e-05, + "loss": 1.5508, + "step": 10410 + }, + { + "epoch": 0.93, + "learning_rate": 2.8461837769273578e-05, + "loss": 1.5391, + "step": 10411 + }, + { + "epoch": 0.93, + "learning_rate": 2.839362255839817e-05, + "loss": 1.6074, + "step": 10412 + }, + { + "epoch": 0.93, + "learning_rate": 2.8325488014925627e-05, + "loss": 1.5996, + "step": 10413 + }, + { + "epoch": 0.93, + "learning_rate": 2.8257434144512984e-05, + "loss": 1.5605, + "step": 10414 + }, + { + "epoch": 0.93, + "learning_rate": 2.8189460952810163e-05, + "loss": 1.6113, + "step": 10415 + }, + { + "epoch": 0.93, + "learning_rate": 2.8121568445460767e-05, + "loss": 1.582, + "step": 10416 + }, + { + "epoch": 0.93, + "learning_rate": 2.805375662810139e-05, + "loss": 1.6719, + "step": 10417 + }, + { + "epoch": 0.93, + "learning_rate": 2.79860255063622e-05, + "loss": 1.5449, + "step": 10418 + }, + { + "epoch": 0.93, + "learning_rate": 2.79183750858667e-05, + "loss": 1.6582, + "step": 10419 + }, + { + "epoch": 0.93, + "learning_rate": 2.785080537223139e-05, + "loss": 1.5938, + "step": 10420 + }, + { + "epoch": 0.93, + "learning_rate": 2.7783316371066126e-05, + "loss": 1.6094, + "step": 10421 + }, + { + "epoch": 0.93, + "learning_rate": 2.7715908087974416e-05, + "loss": 1.7324, + "step": 10422 + }, + { + "epoch": 0.93, + "learning_rate": 2.764858052855268e-05, + "loss": 1.4961, + "step": 10423 + }, + { + "epoch": 0.93, + "learning_rate": 2.7581333698390778e-05, + "loss": 1.5664, + "step": 10424 + }, + { + "epoch": 0.93, + "learning_rate": 2.7514167603071905e-05, + "loss": 1.5449, + "step": 10425 + }, + { + "epoch": 0.93, + "learning_rate": 2.7447082248172384e-05, + "loss": 1.6074, + "step": 10426 + }, + { + "epoch": 0.93, + "learning_rate": 2.7380077639262202e-05, + "loss": 1.5586, + "step": 10427 + }, + { + "epoch": 0.93, + "learning_rate": 2.7313153781904133e-05, + "loss": 1.5449, + "step": 10428 + }, + { + "epoch": 0.93, + "learning_rate": 2.724631068165473e-05, + "loss": 1.4883, + "step": 10429 + }, + { + "epoch": 0.93, + "learning_rate": 2.7179548344063554e-05, + "loss": 1.541, + "step": 10430 + }, + { + "epoch": 0.93, + "learning_rate": 2.7112866774673507e-05, + "loss": 1.6387, + "step": 10431 + }, + { + "epoch": 0.93, + "learning_rate": 2.7046265979020823e-05, + "loss": 1.5137, + "step": 10432 + }, + { + "epoch": 0.93, + "learning_rate": 2.697974596263486e-05, + "loss": 1.459, + "step": 10433 + }, + { + "epoch": 0.93, + "learning_rate": 2.6913306731038756e-05, + "loss": 1.4902, + "step": 10434 + }, + { + "epoch": 0.93, + "learning_rate": 2.684694828974843e-05, + "loss": 1.541, + "step": 10435 + }, + { + "epoch": 0.93, + "learning_rate": 2.6780670644273254e-05, + "loss": 1.6621, + "step": 10436 + }, + { + "epoch": 0.93, + "learning_rate": 2.6714473800115825e-05, + "loss": 1.5469, + "step": 10437 + }, + { + "epoch": 0.93, + "learning_rate": 2.6648357762772413e-05, + "loss": 1.6094, + "step": 10438 + }, + { + "epoch": 0.93, + "learning_rate": 2.6582322537732075e-05, + "loss": 1.5488, + "step": 10439 + }, + { + "epoch": 0.93, + "learning_rate": 2.651636813047742e-05, + "loss": 1.6055, + "step": 10440 + }, + { + "epoch": 0.93, + "learning_rate": 2.6450494546484184e-05, + "loss": 1.6211, + "step": 10441 + }, + { + "epoch": 0.93, + "learning_rate": 2.6384701791221764e-05, + "loss": 1.5957, + "step": 10442 + }, + { + "epoch": 0.93, + "learning_rate": 2.6318989870152464e-05, + "loss": 1.4531, + "step": 10443 + }, + { + "epoch": 0.93, + "learning_rate": 2.6253358788731917e-05, + "loss": 1.6172, + "step": 10444 + }, + { + "epoch": 0.93, + "learning_rate": 2.6187808552409096e-05, + "loss": 1.4473, + "step": 10445 + }, + { + "epoch": 0.93, + "learning_rate": 2.612233916662654e-05, + "loss": 1.5977, + "step": 10446 + }, + { + "epoch": 0.93, + "learning_rate": 2.6056950636819676e-05, + "loss": 1.8281, + "step": 10447 + }, + { + "epoch": 0.93, + "learning_rate": 2.5991642968417273e-05, + "loss": 1.5117, + "step": 10448 + }, + { + "epoch": 0.93, + "learning_rate": 2.592641616684155e-05, + "loss": 1.5176, + "step": 10449 + }, + { + "epoch": 0.93, + "learning_rate": 2.586127023750817e-05, + "loss": 1.541, + "step": 10450 + }, + { + "epoch": 0.93, + "learning_rate": 2.5796205185825594e-05, + "loss": 1.5293, + "step": 10451 + }, + { + "epoch": 0.93, + "learning_rate": 2.5731221017195937e-05, + "loss": 1.6152, + "step": 10452 + }, + { + "epoch": 0.93, + "learning_rate": 2.5666317737014333e-05, + "loss": 1.4941, + "step": 10453 + }, + { + "epoch": 0.93, + "learning_rate": 2.560149535066969e-05, + "loss": 1.4609, + "step": 10454 + }, + { + "epoch": 0.93, + "learning_rate": 2.5536753863543595e-05, + "loss": 1.6094, + "step": 10455 + }, + { + "epoch": 0.93, + "learning_rate": 2.54720932810113e-05, + "loss": 1.6465, + "step": 10456 + }, + { + "epoch": 0.93, + "learning_rate": 2.540751360844118e-05, + "loss": 1.4941, + "step": 10457 + }, + { + "epoch": 0.93, + "learning_rate": 2.5343014851195056e-05, + "loss": 1.5645, + "step": 10458 + }, + { + "epoch": 0.93, + "learning_rate": 2.5278597014627758e-05, + "loss": 1.5859, + "step": 10459 + }, + { + "epoch": 0.93, + "learning_rate": 2.5214260104087783e-05, + "loss": 1.6074, + "step": 10460 + }, + { + "epoch": 0.93, + "learning_rate": 2.5150004124916413e-05, + "loss": 1.5547, + "step": 10461 + }, + { + "epoch": 0.93, + "learning_rate": 2.508582908244883e-05, + "loss": 1.6953, + "step": 10462 + }, + { + "epoch": 0.93, + "learning_rate": 2.502173498201288e-05, + "loss": 1.5977, + "step": 10463 + }, + { + "epoch": 0.93, + "learning_rate": 2.495772182893008e-05, + "loss": 1.4316, + "step": 10464 + }, + { + "epoch": 0.93, + "learning_rate": 2.489378962851485e-05, + "loss": 1.6172, + "step": 10465 + }, + { + "epoch": 0.93, + "learning_rate": 2.4829938386075613e-05, + "loss": 1.502, + "step": 10466 + }, + { + "epoch": 0.93, + "learning_rate": 2.4766168106913345e-05, + "loss": 1.5332, + "step": 10467 + }, + { + "epoch": 0.93, + "learning_rate": 2.470247879632248e-05, + "loss": 1.5293, + "step": 10468 + }, + { + "epoch": 0.93, + "learning_rate": 2.463887045959101e-05, + "loss": 1.5117, + "step": 10469 + }, + { + "epoch": 0.93, + "learning_rate": 2.4575343101999825e-05, + "loss": 1.6348, + "step": 10470 + }, + { + "epoch": 0.93, + "learning_rate": 2.4511896728823368e-05, + "loss": 1.5273, + "step": 10471 + }, + { + "epoch": 0.93, + "learning_rate": 2.4448531345329206e-05, + "loss": 1.6035, + "step": 10472 + }, + { + "epoch": 0.93, + "learning_rate": 2.4385246956778128e-05, + "loss": 1.6523, + "step": 10473 + }, + { + "epoch": 0.93, + "learning_rate": 2.43220435684246e-05, + "loss": 1.543, + "step": 10474 + }, + { + "epoch": 0.93, + "learning_rate": 2.425892118551598e-05, + "loss": 1.5215, + "step": 10475 + }, + { + "epoch": 0.93, + "learning_rate": 2.4195879813292744e-05, + "loss": 1.5293, + "step": 10476 + }, + { + "epoch": 0.93, + "learning_rate": 2.4132919456989145e-05, + "loss": 1.6699, + "step": 10477 + }, + { + "epoch": 0.93, + "learning_rate": 2.4070040121832338e-05, + "loss": 1.5586, + "step": 10478 + }, + { + "epoch": 0.93, + "learning_rate": 2.4007241813042923e-05, + "loss": 1.5938, + "step": 10479 + }, + { + "epoch": 0.93, + "learning_rate": 2.3944524535834733e-05, + "loss": 1.6289, + "step": 10480 + }, + { + "epoch": 0.93, + "learning_rate": 2.388188829541471e-05, + "loss": 1.5469, + "step": 10481 + }, + { + "epoch": 0.93, + "learning_rate": 2.3819333096983366e-05, + "loss": 1.6445, + "step": 10482 + }, + { + "epoch": 0.93, + "learning_rate": 2.3756858945734317e-05, + "loss": 1.5273, + "step": 10483 + }, + { + "epoch": 0.93, + "learning_rate": 2.3694465846854307e-05, + "loss": 1.5117, + "step": 10484 + }, + { + "epoch": 0.93, + "learning_rate": 2.3632153805523747e-05, + "loss": 1.6934, + "step": 10485 + }, + { + "epoch": 0.93, + "learning_rate": 2.3569922826916057e-05, + "loss": 1.4473, + "step": 10486 + }, + { + "epoch": 0.93, + "learning_rate": 2.3507772916197767e-05, + "loss": 1.6738, + "step": 10487 + }, + { + "epoch": 0.93, + "learning_rate": 2.344570407852886e-05, + "loss": 1.6934, + "step": 10488 + }, + { + "epoch": 0.93, + "learning_rate": 2.3383716319062774e-05, + "loss": 1.4922, + "step": 10489 + }, + { + "epoch": 0.93, + "learning_rate": 2.3321809642945944e-05, + "loss": 1.6797, + "step": 10490 + }, + { + "epoch": 0.93, + "learning_rate": 2.325998405531815e-05, + "loss": 1.5742, + "step": 10491 + }, + { + "epoch": 0.93, + "learning_rate": 2.3198239561312394e-05, + "loss": 1.5449, + "step": 10492 + }, + { + "epoch": 0.93, + "learning_rate": 2.3136576166055025e-05, + "loss": 1.6699, + "step": 10493 + }, + { + "epoch": 0.93, + "learning_rate": 2.3074993874665828e-05, + "loss": 1.6406, + "step": 10494 + }, + { + "epoch": 0.93, + "learning_rate": 2.301349269225739e-05, + "loss": 1.5977, + "step": 10495 + }, + { + "epoch": 0.93, + "learning_rate": 2.295207262393584e-05, + "loss": 1.5684, + "step": 10496 + }, + { + "epoch": 0.93, + "learning_rate": 2.289073367480077e-05, + "loss": 1.6875, + "step": 10497 + }, + { + "epoch": 0.93, + "learning_rate": 2.282947584994477e-05, + "loss": 1.4766, + "step": 10498 + }, + { + "epoch": 0.93, + "learning_rate": 2.2768299154453664e-05, + "loss": 1.623, + "step": 10499 + }, + { + "epoch": 0.93, + "learning_rate": 2.27072035934065e-05, + "loss": 1.6582, + "step": 10500 + }, + { + "epoch": 0.93, + "learning_rate": 2.2646189171876107e-05, + "loss": 1.6484, + "step": 10501 + }, + { + "epoch": 0.93, + "learning_rate": 2.258525589492799e-05, + "loss": 1.5156, + "step": 10502 + }, + { + "epoch": 0.93, + "learning_rate": 2.2524403767621104e-05, + "loss": 1.7129, + "step": 10503 + }, + { + "epoch": 0.93, + "learning_rate": 2.246363279500763e-05, + "loss": 1.6172, + "step": 10504 + }, + { + "epoch": 0.93, + "learning_rate": 2.240294298213308e-05, + "loss": 1.5879, + "step": 10505 + }, + { + "epoch": 0.93, + "learning_rate": 2.234233433403643e-05, + "loss": 1.6621, + "step": 10506 + }, + { + "epoch": 0.93, + "learning_rate": 2.2281806855749432e-05, + "loss": 1.6523, + "step": 10507 + }, + { + "epoch": 0.93, + "learning_rate": 2.222136055229751e-05, + "loss": 1.6406, + "step": 10508 + }, + { + "epoch": 0.93, + "learning_rate": 2.2160995428699204e-05, + "loss": 1.5098, + "step": 10509 + }, + { + "epoch": 0.93, + "learning_rate": 2.2100711489966175e-05, + "loss": 1.5039, + "step": 10510 + }, + { + "epoch": 0.94, + "learning_rate": 2.204050874110364e-05, + "loss": 1.5, + "step": 10511 + }, + { + "epoch": 0.94, + "learning_rate": 2.1980387187109817e-05, + "loss": 1.5312, + "step": 10512 + }, + { + "epoch": 0.94, + "learning_rate": 2.192034683297639e-05, + "loss": 1.4785, + "step": 10513 + }, + { + "epoch": 0.94, + "learning_rate": 2.1860387683688142e-05, + "loss": 1.6094, + "step": 10514 + }, + { + "epoch": 0.94, + "learning_rate": 2.1800509744223097e-05, + "loss": 1.4844, + "step": 10515 + }, + { + "epoch": 0.94, + "learning_rate": 2.1740713019552714e-05, + "loss": 1.5879, + "step": 10516 + }, + { + "epoch": 0.94, + "learning_rate": 2.1680997514641477e-05, + "loss": 1.5312, + "step": 10517 + }, + { + "epoch": 0.94, + "learning_rate": 2.1621363234447524e-05, + "loss": 1.4961, + "step": 10518 + }, + { + "epoch": 0.94, + "learning_rate": 2.156181018392167e-05, + "loss": 1.4863, + "step": 10519 + }, + { + "epoch": 0.94, + "learning_rate": 2.15023383680083e-05, + "loss": 1.6387, + "step": 10520 + }, + { + "epoch": 0.94, + "learning_rate": 2.144294779164535e-05, + "loss": 1.5742, + "step": 10521 + }, + { + "epoch": 0.94, + "learning_rate": 2.1383638459763433e-05, + "loss": 1.4785, + "step": 10522 + }, + { + "epoch": 0.94, + "learning_rate": 2.1324410377286828e-05, + "loss": 1.6426, + "step": 10523 + }, + { + "epoch": 0.94, + "learning_rate": 2.126526354913283e-05, + "loss": 1.6465, + "step": 10524 + }, + { + "epoch": 0.94, + "learning_rate": 2.1206197980212174e-05, + "loss": 1.4746, + "step": 10525 + }, + { + "epoch": 0.94, + "learning_rate": 2.114721367542882e-05, + "loss": 1.5801, + "step": 10526 + }, + { + "epoch": 0.94, + "learning_rate": 2.1088310639679753e-05, + "loss": 1.4766, + "step": 10527 + }, + { + "epoch": 0.94, + "learning_rate": 2.1029488877855497e-05, + "loss": 1.5234, + "step": 10528 + }, + { + "epoch": 0.94, + "learning_rate": 2.09707483948397e-05, + "loss": 1.6875, + "step": 10529 + }, + { + "epoch": 0.94, + "learning_rate": 2.0912089195509244e-05, + "loss": 1.582, + "step": 10530 + }, + { + "epoch": 0.94, + "learning_rate": 2.085351128473445e-05, + "loss": 1.5566, + "step": 10531 + }, + { + "epoch": 0.94, + "learning_rate": 2.079501466737843e-05, + "loss": 1.625, + "step": 10532 + }, + { + "epoch": 0.94, + "learning_rate": 2.073659934829808e-05, + "loss": 1.6816, + "step": 10533 + }, + { + "epoch": 0.94, + "learning_rate": 2.0678265332343405e-05, + "loss": 1.4824, + "step": 10534 + }, + { + "epoch": 0.94, + "learning_rate": 2.062001262435742e-05, + "loss": 1.6113, + "step": 10535 + }, + { + "epoch": 0.94, + "learning_rate": 2.056184122917637e-05, + "loss": 1.7168, + "step": 10536 + }, + { + "epoch": 0.94, + "learning_rate": 2.050375115163028e-05, + "loss": 1.6172, + "step": 10537 + }, + { + "epoch": 0.94, + "learning_rate": 2.0445742396541955e-05, + "loss": 1.6816, + "step": 10538 + }, + { + "epoch": 0.94, + "learning_rate": 2.0387814968727438e-05, + "loss": 1.6875, + "step": 10539 + }, + { + "epoch": 0.94, + "learning_rate": 2.0329968872996095e-05, + "loss": 1.5176, + "step": 10540 + }, + { + "epoch": 0.94, + "learning_rate": 2.0272204114150873e-05, + "loss": 1.4688, + "step": 10541 + }, + { + "epoch": 0.94, + "learning_rate": 2.0214520696987372e-05, + "loss": 1.7383, + "step": 10542 + }, + { + "epoch": 0.94, + "learning_rate": 2.0156918626294985e-05, + "loss": 1.543, + "step": 10543 + }, + { + "epoch": 0.94, + "learning_rate": 2.0099397906855887e-05, + "loss": 1.6055, + "step": 10544 + }, + { + "epoch": 0.94, + "learning_rate": 2.0041958543445925e-05, + "loss": 1.4707, + "step": 10545 + }, + { + "epoch": 0.94, + "learning_rate": 1.9984600540833954e-05, + "loss": 1.6758, + "step": 10546 + }, + { + "epoch": 0.94, + "learning_rate": 1.9927323903782045e-05, + "loss": 1.623, + "step": 10547 + }, + { + "epoch": 0.94, + "learning_rate": 1.9870128637045627e-05, + "loss": 1.5977, + "step": 10548 + }, + { + "epoch": 0.94, + "learning_rate": 1.9813014745373225e-05, + "loss": 1.6309, + "step": 10549 + }, + { + "epoch": 0.94, + "learning_rate": 1.975598223350672e-05, + "loss": 1.6719, + "step": 10550 + }, + { + "epoch": 0.94, + "learning_rate": 1.969903110618132e-05, + "loss": 1.6113, + "step": 10551 + }, + { + "epoch": 0.94, + "learning_rate": 1.9642161368125357e-05, + "loss": 1.5625, + "step": 10552 + }, + { + "epoch": 0.94, + "learning_rate": 1.958537302406038e-05, + "loss": 1.6562, + "step": 10553 + }, + { + "epoch": 0.94, + "learning_rate": 1.9528666078701296e-05, + "loss": 1.5488, + "step": 10554 + }, + { + "epoch": 0.94, + "learning_rate": 1.9472040536756107e-05, + "loss": 1.627, + "step": 10555 + }, + { + "epoch": 0.94, + "learning_rate": 1.941549640292617e-05, + "loss": 1.5801, + "step": 10556 + }, + { + "epoch": 0.94, + "learning_rate": 1.935903368190606e-05, + "loss": 1.6367, + "step": 10557 + }, + { + "epoch": 0.94, + "learning_rate": 1.930265237838358e-05, + "loss": 1.4766, + "step": 10558 + }, + { + "epoch": 0.94, + "learning_rate": 1.9246352497039765e-05, + "loss": 1.5879, + "step": 10559 + }, + { + "epoch": 0.94, + "learning_rate": 1.9190134042548878e-05, + "loss": 1.5547, + "step": 10560 + }, + { + "epoch": 0.94, + "learning_rate": 1.9133997019578518e-05, + "loss": 1.6641, + "step": 10561 + }, + { + "epoch": 0.94, + "learning_rate": 1.9077941432789402e-05, + "loss": 1.5449, + "step": 10562 + }, + { + "epoch": 0.94, + "learning_rate": 1.9021967286835583e-05, + "loss": 1.4922, + "step": 10563 + }, + { + "epoch": 0.94, + "learning_rate": 1.8966074586364125e-05, + "loss": 1.5703, + "step": 10564 + }, + { + "epoch": 0.94, + "learning_rate": 1.8910263336015754e-05, + "loss": 1.75, + "step": 10565 + }, + { + "epoch": 0.94, + "learning_rate": 1.88545335404241e-05, + "loss": 1.6738, + "step": 10566 + }, + { + "epoch": 0.94, + "learning_rate": 1.879888520421591e-05, + "loss": 1.5488, + "step": 10567 + }, + { + "epoch": 0.94, + "learning_rate": 1.8743318332011596e-05, + "loss": 1.4609, + "step": 10568 + }, + { + "epoch": 0.94, + "learning_rate": 1.8687832928424687e-05, + "loss": 1.502, + "step": 10569 + }, + { + "epoch": 0.94, + "learning_rate": 1.8632428998061613e-05, + "loss": 1.5527, + "step": 10570 + }, + { + "epoch": 0.94, + "learning_rate": 1.8577106545522247e-05, + "loss": 1.5156, + "step": 10571 + }, + { + "epoch": 0.94, + "learning_rate": 1.852186557539992e-05, + "loss": 1.6367, + "step": 10572 + }, + { + "epoch": 0.94, + "learning_rate": 1.846670609228096e-05, + "loss": 1.6445, + "step": 10573 + }, + { + "epoch": 0.94, + "learning_rate": 1.8411628100744925e-05, + "loss": 1.5879, + "step": 10574 + }, + { + "epoch": 0.94, + "learning_rate": 1.8356631605364603e-05, + "loss": 1.5352, + "step": 10575 + }, + { + "epoch": 0.94, + "learning_rate": 1.8301716610706122e-05, + "loss": 1.6738, + "step": 10576 + }, + { + "epoch": 0.94, + "learning_rate": 1.8246883121328826e-05, + "loss": 1.541, + "step": 10577 + }, + { + "epoch": 0.94, + "learning_rate": 1.8192131141785194e-05, + "loss": 1.541, + "step": 10578 + }, + { + "epoch": 0.94, + "learning_rate": 1.813746067662092e-05, + "loss": 1.5625, + "step": 10579 + }, + { + "epoch": 0.94, + "learning_rate": 1.808287173037515e-05, + "loss": 1.5859, + "step": 10580 + }, + { + "epoch": 0.94, + "learning_rate": 1.8028364307580147e-05, + "loss": 1.5684, + "step": 10581 + }, + { + "epoch": 0.94, + "learning_rate": 1.7973938412761183e-05, + "loss": 1.6797, + "step": 10582 + }, + { + "epoch": 0.94, + "learning_rate": 1.7919594050437082e-05, + "loss": 1.4746, + "step": 10583 + }, + { + "epoch": 0.94, + "learning_rate": 1.786533122511991e-05, + "loss": 1.6191, + "step": 10584 + }, + { + "epoch": 0.94, + "learning_rate": 1.7811149941314498e-05, + "loss": 1.7031, + "step": 10585 + }, + { + "epoch": 0.94, + "learning_rate": 1.7757050203519587e-05, + "loss": 1.5645, + "step": 10586 + }, + { + "epoch": 0.94, + "learning_rate": 1.7703032016226472e-05, + "loss": 1.5156, + "step": 10587 + }, + { + "epoch": 0.94, + "learning_rate": 1.7649095383920234e-05, + "loss": 1.5879, + "step": 10588 + }, + { + "epoch": 0.94, + "learning_rate": 1.7595240311078952e-05, + "loss": 1.6191, + "step": 10589 + }, + { + "epoch": 0.94, + "learning_rate": 1.754146680217372e-05, + "loss": 1.5918, + "step": 10590 + }, + { + "epoch": 0.94, + "learning_rate": 1.7487774861669193e-05, + "loss": 1.4453, + "step": 10591 + }, + { + "epoch": 0.94, + "learning_rate": 1.743416449402313e-05, + "loss": 1.5742, + "step": 10592 + }, + { + "epoch": 0.94, + "learning_rate": 1.738063570368664e-05, + "loss": 1.6953, + "step": 10593 + }, + { + "epoch": 0.94, + "learning_rate": 1.732718849510373e-05, + "loss": 1.6875, + "step": 10594 + }, + { + "epoch": 0.94, + "learning_rate": 1.727382287271195e-05, + "loss": 1.5566, + "step": 10595 + }, + { + "epoch": 0.94, + "learning_rate": 1.722053884094199e-05, + "loss": 1.543, + "step": 10596 + }, + { + "epoch": 0.94, + "learning_rate": 1.7167336404217637e-05, + "loss": 1.4766, + "step": 10597 + }, + { + "epoch": 0.94, + "learning_rate": 1.711421556695625e-05, + "loss": 1.5391, + "step": 10598 + }, + { + "epoch": 0.94, + "learning_rate": 1.7061176333567852e-05, + "loss": 1.5645, + "step": 10599 + }, + { + "epoch": 0.94, + "learning_rate": 1.7008218708456148e-05, + "loss": 1.5801, + "step": 10600 + }, + { + "epoch": 0.94, + "learning_rate": 1.695534269601806e-05, + "loss": 1.5488, + "step": 10601 + }, + { + "epoch": 0.94, + "learning_rate": 1.6902548300643527e-05, + "loss": 1.6797, + "step": 10602 + }, + { + "epoch": 0.94, + "learning_rate": 1.6849835526715707e-05, + "loss": 1.5801, + "step": 10603 + }, + { + "epoch": 0.94, + "learning_rate": 1.6797204378611208e-05, + "loss": 1.5898, + "step": 10604 + }, + { + "epoch": 0.94, + "learning_rate": 1.6744654860699536e-05, + "loss": 1.6035, + "step": 10605 + }, + { + "epoch": 0.94, + "learning_rate": 1.669218697734387e-05, + "loss": 1.6387, + "step": 10606 + }, + { + "epoch": 0.94, + "learning_rate": 1.6639800732899946e-05, + "loss": 1.5078, + "step": 10607 + }, + { + "epoch": 0.94, + "learning_rate": 1.6587496131717505e-05, + "loss": 1.541, + "step": 10608 + }, + { + "epoch": 0.94, + "learning_rate": 1.6535273178139076e-05, + "loss": 1.5312, + "step": 10609 + }, + { + "epoch": 0.94, + "learning_rate": 1.648313187650019e-05, + "loss": 1.5859, + "step": 10610 + }, + { + "epoch": 0.94, + "learning_rate": 1.6431072231130052e-05, + "loss": 1.4648, + "step": 10611 + }, + { + "epoch": 0.94, + "learning_rate": 1.637909424635098e-05, + "loss": 1.623, + "step": 10612 + }, + { + "epoch": 0.94, + "learning_rate": 1.63271979264783e-05, + "loss": 1.6797, + "step": 10613 + }, + { + "epoch": 0.94, + "learning_rate": 1.6275383275820677e-05, + "loss": 1.6465, + "step": 10614 + }, + { + "epoch": 0.94, + "learning_rate": 1.6223650298680116e-05, + "loss": 1.5059, + "step": 10615 + }, + { + "epoch": 0.94, + "learning_rate": 1.6171998999351733e-05, + "loss": 1.5371, + "step": 10616 + }, + { + "epoch": 0.94, + "learning_rate": 1.6120429382123768e-05, + "loss": 1.5586, + "step": 10617 + }, + { + "epoch": 0.94, + "learning_rate": 1.6068941451277907e-05, + "loss": 1.5332, + "step": 10618 + }, + { + "epoch": 0.94, + "learning_rate": 1.601753521108862e-05, + "loss": 1.5645, + "step": 10619 + }, + { + "epoch": 0.94, + "learning_rate": 1.5966210665824377e-05, + "loss": 1.4453, + "step": 10620 + }, + { + "epoch": 0.94, + "learning_rate": 1.5914967819745997e-05, + "loss": 1.3887, + "step": 10621 + }, + { + "epoch": 0.94, + "learning_rate": 1.5863806677108182e-05, + "loss": 1.5703, + "step": 10622 + }, + { + "epoch": 0.95, + "learning_rate": 1.5812727242158208e-05, + "loss": 1.498, + "step": 10623 + }, + { + "epoch": 0.95, + "learning_rate": 1.5761729519137347e-05, + "loss": 1.5684, + "step": 10624 + }, + { + "epoch": 0.95, + "learning_rate": 1.5710813512279432e-05, + "loss": 1.6035, + "step": 10625 + }, + { + "epoch": 0.95, + "learning_rate": 1.565997922581186e-05, + "loss": 1.6602, + "step": 10626 + }, + { + "epoch": 0.95, + "learning_rate": 1.5609226663955033e-05, + "loss": 1.5195, + "step": 10627 + }, + { + "epoch": 0.95, + "learning_rate": 1.5558555830922805e-05, + "loss": 1.4805, + "step": 10628 + }, + { + "epoch": 0.95, + "learning_rate": 1.550796673092192e-05, + "loss": 1.7539, + "step": 10629 + }, + { + "epoch": 0.95, + "learning_rate": 1.5457459368152683e-05, + "loss": 1.5918, + "step": 10630 + }, + { + "epoch": 0.95, + "learning_rate": 1.5407033746808408e-05, + "loss": 1.5469, + "step": 10631 + }, + { + "epoch": 0.95, + "learning_rate": 1.5356689871075637e-05, + "loss": 1.543, + "step": 10632 + }, + { + "epoch": 0.95, + "learning_rate": 1.5306427745134244e-05, + "loss": 1.5938, + "step": 10633 + }, + { + "epoch": 0.95, + "learning_rate": 1.5256247373157227e-05, + "loss": 1.5039, + "step": 10634 + }, + { + "epoch": 0.95, + "learning_rate": 1.520614875931059e-05, + "loss": 1.5488, + "step": 10635 + }, + { + "epoch": 0.95, + "learning_rate": 1.5156131907754e-05, + "loss": 1.6406, + "step": 10636 + }, + { + "epoch": 0.95, + "learning_rate": 1.5106196822640138e-05, + "loss": 1.6797, + "step": 10637 + }, + { + "epoch": 0.95, + "learning_rate": 1.505634350811469e-05, + "loss": 1.6211, + "step": 10638 + }, + { + "epoch": 0.95, + "learning_rate": 1.5006571968316563e-05, + "loss": 1.5371, + "step": 10639 + }, + { + "epoch": 0.95, + "learning_rate": 1.4956882207378453e-05, + "loss": 1.6211, + "step": 10640 + }, + { + "epoch": 0.95, + "learning_rate": 1.4907274229425505e-05, + "loss": 1.459, + "step": 10641 + }, + { + "epoch": 0.95, + "learning_rate": 1.4857748038576647e-05, + "loss": 1.6953, + "step": 10642 + }, + { + "epoch": 0.95, + "learning_rate": 1.4808303638943476e-05, + "loss": 1.6055, + "step": 10643 + }, + { + "epoch": 0.95, + "learning_rate": 1.475894103463138e-05, + "loss": 1.5449, + "step": 10644 + }, + { + "epoch": 0.95, + "learning_rate": 1.470966022973852e-05, + "loss": 1.4941, + "step": 10645 + }, + { + "epoch": 0.95, + "learning_rate": 1.4660461228356515e-05, + "loss": 1.623, + "step": 10646 + }, + { + "epoch": 0.95, + "learning_rate": 1.4611344034570095e-05, + "loss": 1.5488, + "step": 10647 + }, + { + "epoch": 0.95, + "learning_rate": 1.4562308652457112e-05, + "loss": 1.5098, + "step": 10648 + }, + { + "epoch": 0.95, + "learning_rate": 1.4513355086088864e-05, + "loss": 1.627, + "step": 10649 + }, + { + "epoch": 0.95, + "learning_rate": 1.4464483339529544e-05, + "loss": 1.6309, + "step": 10650 + }, + { + "epoch": 0.95, + "learning_rate": 1.4415693416836795e-05, + "loss": 1.5176, + "step": 10651 + }, + { + "epoch": 0.95, + "learning_rate": 1.4366985322061488e-05, + "loss": 1.7461, + "step": 10652 + }, + { + "epoch": 0.95, + "learning_rate": 1.43183590592475e-05, + "loss": 1.6895, + "step": 10653 + }, + { + "epoch": 0.95, + "learning_rate": 1.4269814632431932e-05, + "loss": 1.6152, + "step": 10654 + }, + { + "epoch": 0.95, + "learning_rate": 1.4221352045645341e-05, + "loss": 1.5859, + "step": 10655 + }, + { + "epoch": 0.95, + "learning_rate": 1.4172971302911286e-05, + "loss": 1.5938, + "step": 10656 + }, + { + "epoch": 0.95, + "learning_rate": 1.4124672408246442e-05, + "loss": 1.5254, + "step": 10657 + }, + { + "epoch": 0.95, + "learning_rate": 1.4076455365660823e-05, + "loss": 1.5059, + "step": 10658 + }, + { + "epoch": 0.95, + "learning_rate": 1.4028320179157894e-05, + "loss": 1.5508, + "step": 10659 + }, + { + "epoch": 0.95, + "learning_rate": 1.3980266852733903e-05, + "loss": 1.4941, + "step": 10660 + }, + { + "epoch": 0.95, + "learning_rate": 1.3932295390378324e-05, + "loss": 1.5488, + "step": 10661 + }, + { + "epoch": 0.95, + "learning_rate": 1.3884405796074084e-05, + "loss": 1.5605, + "step": 10662 + }, + { + "epoch": 0.95, + "learning_rate": 1.3836598073797336e-05, + "loss": 1.6758, + "step": 10663 + }, + { + "epoch": 0.95, + "learning_rate": 1.3788872227517235e-05, + "loss": 1.6719, + "step": 10664 + }, + { + "epoch": 0.95, + "learning_rate": 1.374122826119617e-05, + "loss": 1.4883, + "step": 10665 + }, + { + "epoch": 0.95, + "learning_rate": 1.3693666178789754e-05, + "loss": 1.7246, + "step": 10666 + }, + { + "epoch": 0.95, + "learning_rate": 1.3646185984246717e-05, + "loss": 1.6875, + "step": 10667 + }, + { + "epoch": 0.95, + "learning_rate": 1.359878768150935e-05, + "loss": 1.4844, + "step": 10668 + }, + { + "epoch": 0.95, + "learning_rate": 1.3551471274512727e-05, + "loss": 1.5996, + "step": 10669 + }, + { + "epoch": 0.95, + "learning_rate": 1.3504236767185263e-05, + "loss": 1.5195, + "step": 10670 + }, + { + "epoch": 0.95, + "learning_rate": 1.3457084163448597e-05, + "loss": 1.5547, + "step": 10671 + }, + { + "epoch": 0.95, + "learning_rate": 1.3410013467217708e-05, + "loss": 1.5762, + "step": 10672 + }, + { + "epoch": 0.95, + "learning_rate": 1.336302468240047e-05, + "loss": 1.5605, + "step": 10673 + }, + { + "epoch": 0.95, + "learning_rate": 1.3316117812898098e-05, + "loss": 1.5586, + "step": 10674 + }, + { + "epoch": 0.95, + "learning_rate": 1.3269292862605253e-05, + "loss": 1.5312, + "step": 10675 + }, + { + "epoch": 0.95, + "learning_rate": 1.322254983540927e-05, + "loss": 1.584, + "step": 10676 + }, + { + "epoch": 0.95, + "learning_rate": 1.3175888735191155e-05, + "loss": 1.5195, + "step": 10677 + }, + { + "epoch": 0.95, + "learning_rate": 1.312930956582492e-05, + "loss": 1.5801, + "step": 10678 + }, + { + "epoch": 0.95, + "learning_rate": 1.3082812331177696e-05, + "loss": 1.582, + "step": 10679 + }, + { + "epoch": 0.95, + "learning_rate": 1.3036397035110059e-05, + "loss": 1.6797, + "step": 10680 + }, + { + "epoch": 0.95, + "learning_rate": 1.2990063681475594e-05, + "loss": 1.668, + "step": 10681 + }, + { + "epoch": 0.95, + "learning_rate": 1.2943812274120893e-05, + "loss": 1.502, + "step": 10682 + }, + { + "epoch": 0.95, + "learning_rate": 1.2897642816886323e-05, + "loss": 1.5449, + "step": 10683 + }, + { + "epoch": 0.95, + "learning_rate": 1.2851555313604824e-05, + "loss": 1.5488, + "step": 10684 + }, + { + "epoch": 0.95, + "learning_rate": 1.2805549768102997e-05, + "loss": 1.5391, + "step": 10685 + }, + { + "epoch": 0.95, + "learning_rate": 1.2759626184200123e-05, + "loss": 1.459, + "step": 10686 + }, + { + "epoch": 0.95, + "learning_rate": 1.2713784565709375e-05, + "loss": 1.6816, + "step": 10687 + }, + { + "epoch": 0.95, + "learning_rate": 1.2668024916436593e-05, + "loss": 1.6387, + "step": 10688 + }, + { + "epoch": 0.95, + "learning_rate": 1.2622347240180854e-05, + "loss": 1.6367, + "step": 10689 + }, + { + "epoch": 0.95, + "learning_rate": 1.2576751540734677e-05, + "loss": 1.5996, + "step": 10690 + }, + { + "epoch": 0.95, + "learning_rate": 1.253123782188359e-05, + "loss": 1.6094, + "step": 10691 + }, + { + "epoch": 0.95, + "learning_rate": 1.2485806087406348e-05, + "loss": 1.5605, + "step": 10692 + }, + { + "epoch": 0.95, + "learning_rate": 1.2440456341075047e-05, + "loss": 1.582, + "step": 10693 + }, + { + "epoch": 0.95, + "learning_rate": 1.2395188586654448e-05, + "loss": 1.5957, + "step": 10694 + }, + { + "epoch": 0.95, + "learning_rate": 1.2350002827903329e-05, + "loss": 1.5781, + "step": 10695 + }, + { + "epoch": 0.95, + "learning_rate": 1.2304899068573128e-05, + "loss": 1.748, + "step": 10696 + }, + { + "epoch": 0.95, + "learning_rate": 1.2259877312408407e-05, + "loss": 1.5918, + "step": 10697 + }, + { + "epoch": 0.95, + "learning_rate": 1.2214937563147177e-05, + "loss": 1.5312, + "step": 10698 + }, + { + "epoch": 0.95, + "learning_rate": 1.2170079824520563e-05, + "loss": 1.5605, + "step": 10699 + }, + { + "epoch": 0.95, + "learning_rate": 1.212530410025292e-05, + "loss": 1.418, + "step": 10700 + }, + { + "epoch": 0.95, + "learning_rate": 1.208061039406172e-05, + "loss": 1.5098, + "step": 10701 + }, + { + "epoch": 0.95, + "learning_rate": 1.2035998709657548e-05, + "loss": 1.6816, + "step": 10702 + }, + { + "epoch": 0.95, + "learning_rate": 1.199146905074433e-05, + "loss": 1.582, + "step": 10703 + }, + { + "epoch": 0.95, + "learning_rate": 1.1947021421019222e-05, + "loss": 1.5117, + "step": 10704 + }, + { + "epoch": 0.95, + "learning_rate": 1.190265582417238e-05, + "loss": 1.498, + "step": 10705 + }, + { + "epoch": 0.95, + "learning_rate": 1.1858372263887307e-05, + "loss": 1.5312, + "step": 10706 + }, + { + "epoch": 0.95, + "learning_rate": 1.1814170743840503e-05, + "loss": 1.5039, + "step": 10707 + }, + { + "epoch": 0.95, + "learning_rate": 1.177005126770192e-05, + "loss": 1.5371, + "step": 10708 + }, + { + "epoch": 0.95, + "learning_rate": 1.1726013839134519e-05, + "loss": 1.6191, + "step": 10709 + }, + { + "epoch": 0.95, + "learning_rate": 1.1682058461794599e-05, + "loss": 1.6504, + "step": 10710 + }, + { + "epoch": 0.95, + "learning_rate": 1.1638185139331459e-05, + "loss": 1.75, + "step": 10711 + }, + { + "epoch": 0.95, + "learning_rate": 1.1594393875387632e-05, + "loss": 1.5352, + "step": 10712 + }, + { + "epoch": 0.95, + "learning_rate": 1.1550684673598988e-05, + "loss": 1.6035, + "step": 10713 + }, + { + "epoch": 0.95, + "learning_rate": 1.150705753759429e-05, + "loss": 1.543, + "step": 10714 + }, + { + "epoch": 0.95, + "learning_rate": 1.1463512470995751e-05, + "loss": 1.6309, + "step": 10715 + }, + { + "epoch": 0.95, + "learning_rate": 1.1420049477418926e-05, + "loss": 1.6367, + "step": 10716 + }, + { + "epoch": 0.95, + "learning_rate": 1.1376668560471926e-05, + "loss": 1.5176, + "step": 10717 + }, + { + "epoch": 0.95, + "learning_rate": 1.1333369723756759e-05, + "loss": 1.543, + "step": 10718 + }, + { + "epoch": 0.95, + "learning_rate": 1.1290152970868106e-05, + "loss": 1.5723, + "step": 10719 + }, + { + "epoch": 0.95, + "learning_rate": 1.1247018305394207e-05, + "loss": 1.6641, + "step": 10720 + }, + { + "epoch": 0.95, + "learning_rate": 1.1203965730916088e-05, + "loss": 1.5645, + "step": 10721 + }, + { + "epoch": 0.95, + "learning_rate": 1.1160995251008332e-05, + "loss": 1.752, + "step": 10722 + }, + { + "epoch": 0.95, + "learning_rate": 1.1118106869238532e-05, + "loss": 1.5508, + "step": 10723 + }, + { + "epoch": 0.95, + "learning_rate": 1.1075300589167392e-05, + "loss": 1.6543, + "step": 10724 + }, + { + "epoch": 0.95, + "learning_rate": 1.1032576414349071e-05, + "loss": 1.6895, + "step": 10725 + }, + { + "epoch": 0.95, + "learning_rate": 1.0989934348330622e-05, + "loss": 1.5996, + "step": 10726 + }, + { + "epoch": 0.95, + "learning_rate": 1.094737439465232e-05, + "loss": 1.7871, + "step": 10727 + }, + { + "epoch": 0.95, + "learning_rate": 1.0904896556847898e-05, + "loss": 1.5352, + "step": 10728 + }, + { + "epoch": 0.95, + "learning_rate": 1.0862500838443979e-05, + "loss": 1.5, + "step": 10729 + }, + { + "epoch": 0.95, + "learning_rate": 1.08201872429603e-05, + "loss": 1.4688, + "step": 10730 + }, + { + "epoch": 0.95, + "learning_rate": 1.0777955773910053e-05, + "loss": 1.7285, + "step": 10731 + }, + { + "epoch": 0.95, + "learning_rate": 1.0735806434799655e-05, + "loss": 1.623, + "step": 10732 + }, + { + "epoch": 0.95, + "learning_rate": 1.0693739229128308e-05, + "loss": 1.5488, + "step": 10733 + }, + { + "epoch": 0.95, + "learning_rate": 1.0651754160388771e-05, + "loss": 1.459, + "step": 10734 + }, + { + "epoch": 0.95, + "learning_rate": 1.0609851232066813e-05, + "loss": 1.5293, + "step": 10735 + }, + { + "epoch": 0.96, + "learning_rate": 1.0568030447641319e-05, + "loss": 1.6406, + "step": 10736 + }, + { + "epoch": 0.96, + "learning_rate": 1.0526291810584621e-05, + "loss": 1.6035, + "step": 10737 + }, + { + "epoch": 0.96, + "learning_rate": 1.0484635324361947e-05, + "loss": 1.5391, + "step": 10738 + }, + { + "epoch": 0.96, + "learning_rate": 1.0443060992431864e-05, + "loss": 1.625, + "step": 10739 + }, + { + "epoch": 0.96, + "learning_rate": 1.0401568818246054e-05, + "loss": 1.6797, + "step": 10740 + }, + { + "epoch": 0.96, + "learning_rate": 1.036015880524932e-05, + "loss": 1.5566, + "step": 10741 + }, + { + "epoch": 0.96, + "learning_rate": 1.0318830956879799e-05, + "loss": 1.5156, + "step": 10742 + }, + { + "epoch": 0.96, + "learning_rate": 1.0277585276568746e-05, + "loss": 1.709, + "step": 10743 + }, + { + "epoch": 0.96, + "learning_rate": 1.0236421767740645e-05, + "loss": 1.584, + "step": 10744 + }, + { + "epoch": 0.96, + "learning_rate": 1.0195340433812872e-05, + "loss": 1.5586, + "step": 10745 + }, + { + "epoch": 0.96, + "learning_rate": 1.0154341278196366e-05, + "loss": 1.4004, + "step": 10746 + }, + { + "epoch": 0.96, + "learning_rate": 1.0113424304295071e-05, + "loss": 1.5352, + "step": 10747 + }, + { + "epoch": 0.96, + "learning_rate": 1.0072589515506048e-05, + "loss": 1.5898, + "step": 10748 + }, + { + "epoch": 0.96, + "learning_rate": 1.0031836915219472e-05, + "loss": 1.6445, + "step": 10749 + }, + { + "epoch": 0.96, + "learning_rate": 9.991166506819083e-06, + "loss": 1.5156, + "step": 10750 + }, + { + "epoch": 0.96, + "learning_rate": 9.9505782936814e-06, + "loss": 1.6582, + "step": 10751 + }, + { + "epoch": 0.96, + "learning_rate": 9.910072279176175e-06, + "loss": 1.543, + "step": 10752 + }, + { + "epoch": 0.96, + "learning_rate": 9.869648466666493e-06, + "loss": 1.5664, + "step": 10753 + }, + { + "epoch": 0.96, + "learning_rate": 9.829306859508557e-06, + "loss": 1.5781, + "step": 10754 + }, + { + "epoch": 0.96, + "learning_rate": 9.789047461051803e-06, + "loss": 1.5664, + "step": 10755 + }, + { + "epoch": 0.96, + "learning_rate": 9.748870274638555e-06, + "loss": 1.5918, + "step": 10756 + }, + { + "epoch": 0.96, + "learning_rate": 9.708775303604588e-06, + "loss": 1.4219, + "step": 10757 + }, + { + "epoch": 0.96, + "learning_rate": 9.668762551278797e-06, + "loss": 1.5957, + "step": 10758 + }, + { + "epoch": 0.96, + "learning_rate": 9.628832020983302e-06, + "loss": 1.5547, + "step": 10759 + }, + { + "epoch": 0.96, + "learning_rate": 9.588983716033228e-06, + "loss": 1.5527, + "step": 10760 + }, + { + "epoch": 0.96, + "learning_rate": 9.549217639736929e-06, + "loss": 1.5234, + "step": 10761 + }, + { + "epoch": 0.96, + "learning_rate": 9.509533795396097e-06, + "loss": 1.7285, + "step": 10762 + }, + { + "epoch": 0.96, + "learning_rate": 9.46993218630543e-06, + "loss": 1.7871, + "step": 10763 + }, + { + "epoch": 0.96, + "learning_rate": 9.430412815752854e-06, + "loss": 1.5938, + "step": 10764 + }, + { + "epoch": 0.96, + "learning_rate": 9.390975687019298e-06, + "loss": 1.6855, + "step": 10765 + }, + { + "epoch": 0.96, + "learning_rate": 9.351620803379368e-06, + "loss": 1.5996, + "step": 10766 + }, + { + "epoch": 0.96, + "learning_rate": 9.312348168100226e-06, + "loss": 1.6445, + "step": 10767 + }, + { + "epoch": 0.96, + "learning_rate": 9.273157784442488e-06, + "loss": 1.4355, + "step": 10768 + }, + { + "epoch": 0.96, + "learning_rate": 9.234049655659992e-06, + "loss": 1.5918, + "step": 10769 + }, + { + "epoch": 0.96, + "learning_rate": 9.19502378499959e-06, + "loss": 1.6699, + "step": 10770 + }, + { + "epoch": 0.96, + "learning_rate": 9.156080175701576e-06, + "loss": 1.5273, + "step": 10771 + }, + { + "epoch": 0.96, + "learning_rate": 9.117218830999141e-06, + "loss": 1.5586, + "step": 10772 + }, + { + "epoch": 0.96, + "learning_rate": 9.078439754118594e-06, + "loss": 1.5801, + "step": 10773 + }, + { + "epoch": 0.96, + "learning_rate": 9.039742948279806e-06, + "loss": 1.6387, + "step": 10774 + }, + { + "epoch": 0.96, + "learning_rate": 9.001128416695537e-06, + "loss": 1.5078, + "step": 10775 + }, + { + "epoch": 0.96, + "learning_rate": 8.962596162571557e-06, + "loss": 1.5527, + "step": 10776 + }, + { + "epoch": 0.96, + "learning_rate": 8.924146189107086e-06, + "loss": 1.6504, + "step": 10777 + }, + { + "epoch": 0.96, + "learning_rate": 8.885778499494457e-06, + "loss": 1.5859, + "step": 10778 + }, + { + "epoch": 0.96, + "learning_rate": 8.847493096919235e-06, + "loss": 1.5312, + "step": 10779 + }, + { + "epoch": 0.96, + "learning_rate": 8.809289984559877e-06, + "loss": 1.6836, + "step": 10780 + }, + { + "epoch": 0.96, + "learning_rate": 8.771169165588177e-06, + "loss": 1.5312, + "step": 10781 + }, + { + "epoch": 0.96, + "learning_rate": 8.733130643169274e-06, + "loss": 1.5059, + "step": 10782 + }, + { + "epoch": 0.96, + "learning_rate": 8.695174420461305e-06, + "loss": 1.4844, + "step": 10783 + }, + { + "epoch": 0.96, + "learning_rate": 8.657300500615418e-06, + "loss": 1.5645, + "step": 10784 + }, + { + "epoch": 0.96, + "learning_rate": 8.619508886776205e-06, + "loss": 1.541, + "step": 10785 + }, + { + "epoch": 0.96, + "learning_rate": 8.581799582081274e-06, + "loss": 1.6016, + "step": 10786 + }, + { + "epoch": 0.96, + "learning_rate": 8.544172589661448e-06, + "loss": 1.666, + "step": 10787 + }, + { + "epoch": 0.96, + "learning_rate": 8.506627912640563e-06, + "loss": 1.627, + "step": 10788 + }, + { + "epoch": 0.96, + "learning_rate": 8.469165554136016e-06, + "loss": 1.6172, + "step": 10789 + }, + { + "epoch": 0.96, + "learning_rate": 8.431785517257873e-06, + "loss": 1.5957, + "step": 10790 + }, + { + "epoch": 0.96, + "learning_rate": 8.394487805109763e-06, + "loss": 1.4863, + "step": 10791 + }, + { + "epoch": 0.96, + "learning_rate": 8.357272420788208e-06, + "loss": 1.5449, + "step": 10792 + }, + { + "epoch": 0.96, + "learning_rate": 8.320139367383074e-06, + "loss": 1.6875, + "step": 10793 + }, + { + "epoch": 0.96, + "learning_rate": 8.283088647977222e-06, + "loss": 1.543, + "step": 10794 + }, + { + "epoch": 0.96, + "learning_rate": 8.246120265646973e-06, + "loss": 1.5449, + "step": 10795 + }, + { + "epoch": 0.96, + "learning_rate": 8.209234223461538e-06, + "loss": 1.6797, + "step": 10796 + }, + { + "epoch": 0.96, + "learning_rate": 8.172430524483243e-06, + "loss": 1.6445, + "step": 10797 + }, + { + "epoch": 0.96, + "learning_rate": 8.135709171767869e-06, + "loss": 1.6074, + "step": 10798 + }, + { + "epoch": 0.96, + "learning_rate": 8.099070168364086e-06, + "loss": 1.668, + "step": 10799 + }, + { + "epoch": 0.96, + "learning_rate": 8.062513517314019e-06, + "loss": 1.6094, + "step": 10800 + }, + { + "epoch": 0.96, + "learning_rate": 8.02603922165246e-06, + "loss": 1.5488, + "step": 10801 + }, + { + "epoch": 0.96, + "learning_rate": 7.989647284407986e-06, + "loss": 1.5918, + "step": 10802 + }, + { + "epoch": 0.96, + "learning_rate": 7.953337708601848e-06, + "loss": 1.5625, + "step": 10803 + }, + { + "epoch": 0.96, + "learning_rate": 7.917110497248747e-06, + "loss": 1.5938, + "step": 10804 + }, + { + "epoch": 0.96, + "learning_rate": 7.880965653356386e-06, + "loss": 1.418, + "step": 10805 + }, + { + "epoch": 0.96, + "learning_rate": 7.8449031799257e-06, + "loss": 1.5586, + "step": 10806 + }, + { + "epoch": 0.96, + "learning_rate": 7.808923079950735e-06, + "loss": 1.5117, + "step": 10807 + }, + { + "epoch": 0.96, + "learning_rate": 7.773025356418772e-06, + "loss": 1.5, + "step": 10808 + }, + { + "epoch": 0.96, + "learning_rate": 7.737210012310203e-06, + "loss": 1.4473, + "step": 10809 + }, + { + "epoch": 0.96, + "learning_rate": 7.701477050598537e-06, + "loss": 1.5488, + "step": 10810 + }, + { + "epoch": 0.96, + "learning_rate": 7.665826474250625e-06, + "loss": 1.4414, + "step": 10811 + }, + { + "epoch": 0.96, + "learning_rate": 7.630258286226211e-06, + "loss": 1.5586, + "step": 10812 + }, + { + "epoch": 0.96, + "learning_rate": 7.594772489478263e-06, + "loss": 1.5957, + "step": 10813 + }, + { + "epoch": 0.96, + "learning_rate": 7.559369086953205e-06, + "loss": 1.5605, + "step": 10814 + }, + { + "epoch": 0.96, + "learning_rate": 7.524048081590351e-06, + "loss": 1.709, + "step": 10815 + }, + { + "epoch": 0.96, + "learning_rate": 7.488809476322134e-06, + "loss": 1.5273, + "step": 10816 + }, + { + "epoch": 0.96, + "learning_rate": 7.453653274074213e-06, + "loss": 1.6621, + "step": 10817 + }, + { + "epoch": 0.96, + "learning_rate": 7.418579477765586e-06, + "loss": 1.6387, + "step": 10818 + }, + { + "epoch": 0.96, + "learning_rate": 7.383588090308035e-06, + "loss": 1.5176, + "step": 10819 + }, + { + "epoch": 0.96, + "learning_rate": 7.348679114606793e-06, + "loss": 1.582, + "step": 10820 + }, + { + "epoch": 0.96, + "learning_rate": 7.313852553560208e-06, + "loss": 1.5547, + "step": 10821 + }, + { + "epoch": 0.96, + "learning_rate": 7.279108410059743e-06, + "loss": 1.6562, + "step": 10822 + }, + { + "epoch": 0.96, + "learning_rate": 7.2444466869900915e-06, + "loss": 1.6367, + "step": 10823 + }, + { + "epoch": 0.96, + "learning_rate": 7.209867387228841e-06, + "loss": 1.6035, + "step": 10824 + }, + { + "epoch": 0.96, + "learning_rate": 7.175370513647139e-06, + "loss": 1.5254, + "step": 10825 + }, + { + "epoch": 0.96, + "learning_rate": 7.140956069108917e-06, + "loss": 1.5391, + "step": 10826 + }, + { + "epoch": 0.96, + "learning_rate": 7.106624056471445e-06, + "loss": 1.5117, + "step": 10827 + }, + { + "epoch": 0.96, + "learning_rate": 7.0723744785852195e-06, + "loss": 1.584, + "step": 10828 + }, + { + "epoch": 0.96, + "learning_rate": 7.038207338293745e-06, + "loss": 1.6836, + "step": 10829 + }, + { + "epoch": 0.96, + "learning_rate": 7.004122638433863e-06, + "loss": 1.6113, + "step": 10830 + }, + { + "epoch": 0.96, + "learning_rate": 6.970120381835199e-06, + "loss": 1.5332, + "step": 10831 + }, + { + "epoch": 0.96, + "learning_rate": 6.9362005713209385e-06, + "loss": 1.6855, + "step": 10832 + }, + { + "epoch": 0.96, + "learning_rate": 6.902363209707385e-06, + "loss": 1.5586, + "step": 10833 + }, + { + "epoch": 0.96, + "learning_rate": 6.8686082998036245e-06, + "loss": 1.5117, + "step": 10834 + }, + { + "epoch": 0.96, + "learning_rate": 6.834935844412305e-06, + "loss": 1.6113, + "step": 10835 + }, + { + "epoch": 0.96, + "learning_rate": 6.801345846329077e-06, + "loss": 1.5391, + "step": 10836 + }, + { + "epoch": 0.96, + "learning_rate": 6.7678383083426e-06, + "loss": 1.4785, + "step": 10837 + }, + { + "epoch": 0.96, + "learning_rate": 6.734413233235093e-06, + "loss": 1.5684, + "step": 10838 + }, + { + "epoch": 0.96, + "learning_rate": 6.701070623781447e-06, + "loss": 1.4902, + "step": 10839 + }, + { + "epoch": 0.96, + "learning_rate": 6.667810482749892e-06, + "loss": 1.5273, + "step": 10840 + }, + { + "epoch": 0.96, + "learning_rate": 6.634632812902109e-06, + "loss": 1.541, + "step": 10841 + }, + { + "epoch": 0.96, + "learning_rate": 6.601537616992448e-06, + "loss": 1.6211, + "step": 10842 + }, + { + "epoch": 0.96, + "learning_rate": 6.568524897768602e-06, + "loss": 1.6309, + "step": 10843 + }, + { + "epoch": 0.96, + "learning_rate": 6.5355946579714885e-06, + "loss": 1.541, + "step": 10844 + }, + { + "epoch": 0.96, + "learning_rate": 6.502746900335366e-06, + "loss": 1.4082, + "step": 10845 + }, + { + "epoch": 0.96, + "learning_rate": 6.469981627587052e-06, + "loss": 1.6895, + "step": 10846 + }, + { + "epoch": 0.96, + "learning_rate": 6.437298842447148e-06, + "loss": 1.4844, + "step": 10847 + }, + { + "epoch": 0.97, + "learning_rate": 6.4046985476289285e-06, + "loss": 1.7324, + "step": 10848 + }, + { + "epoch": 0.97, + "learning_rate": 6.372180745839118e-06, + "loss": 1.6777, + "step": 10849 + }, + { + "epoch": 0.97, + "learning_rate": 6.339745439777556e-06, + "loss": 1.5273, + "step": 10850 + }, + { + "epoch": 0.97, + "learning_rate": 6.3073926321370875e-06, + "loss": 1.627, + "step": 10851 + }, + { + "epoch": 0.97, + "learning_rate": 6.275122325603788e-06, + "loss": 1.5547, + "step": 10852 + }, + { + "epoch": 0.97, + "learning_rate": 6.242934522857069e-06, + "loss": 1.6426, + "step": 10853 + }, + { + "epoch": 0.97, + "learning_rate": 6.2108292265691255e-06, + "loss": 1.6797, + "step": 10854 + }, + { + "epoch": 0.97, + "learning_rate": 6.1788064394056045e-06, + "loss": 1.5527, + "step": 10855 + }, + { + "epoch": 0.97, + "learning_rate": 6.146866164025044e-06, + "loss": 1.5391, + "step": 10856 + }, + { + "epoch": 0.97, + "learning_rate": 6.1150084030794364e-06, + "loss": 1.4844, + "step": 10857 + }, + { + "epoch": 0.97, + "learning_rate": 6.083233159213663e-06, + "loss": 1.5469, + "step": 10858 + }, + { + "epoch": 0.97, + "learning_rate": 6.05154043506595e-06, + "loss": 1.6758, + "step": 10859 + }, + { + "epoch": 0.97, + "learning_rate": 6.019930233267412e-06, + "loss": 1.5918, + "step": 10860 + }, + { + "epoch": 0.97, + "learning_rate": 5.988402556442618e-06, + "loss": 1.5977, + "step": 10861 + }, + { + "epoch": 0.97, + "learning_rate": 5.956957407209251e-06, + "loss": 1.666, + "step": 10862 + }, + { + "epoch": 0.97, + "learning_rate": 5.925594788177779e-06, + "loss": 1.5762, + "step": 10863 + }, + { + "epoch": 0.97, + "learning_rate": 5.8943147019522305e-06, + "loss": 1.5781, + "step": 10864 + }, + { + "epoch": 0.97, + "learning_rate": 5.8631171511296374e-06, + "loss": 1.6641, + "step": 10865 + }, + { + "epoch": 0.97, + "learning_rate": 5.832002138300152e-06, + "loss": 1.5957, + "step": 10866 + }, + { + "epoch": 0.97, + "learning_rate": 5.800969666046929e-06, + "loss": 1.4609, + "step": 10867 + }, + { + "epoch": 0.97, + "learning_rate": 5.770019736946686e-06, + "loss": 1.5586, + "step": 10868 + }, + { + "epoch": 0.97, + "learning_rate": 5.739152353568922e-06, + "loss": 1.5234, + "step": 10869 + }, + { + "epoch": 0.97, + "learning_rate": 5.708367518476476e-06, + "loss": 1.6914, + "step": 10870 + }, + { + "epoch": 0.97, + "learning_rate": 5.677665234225083e-06, + "loss": 1.6445, + "step": 10871 + }, + { + "epoch": 0.97, + "learning_rate": 5.647045503363812e-06, + "loss": 1.5215, + "step": 10872 + }, + { + "epoch": 0.97, + "learning_rate": 5.616508328434966e-06, + "loss": 1.6133, + "step": 10873 + }, + { + "epoch": 0.97, + "learning_rate": 5.586053711973959e-06, + "loss": 1.5547, + "step": 10874 + }, + { + "epoch": 0.97, + "learning_rate": 5.555681656509104e-06, + "loss": 1.5059, + "step": 10875 + }, + { + "epoch": 0.97, + "learning_rate": 5.525392164562048e-06, + "loss": 1.5566, + "step": 10876 + }, + { + "epoch": 0.97, + "learning_rate": 5.4951852386476706e-06, + "loss": 1.6582, + "step": 10877 + }, + { + "epoch": 0.97, + "learning_rate": 5.465060881273854e-06, + "loss": 1.5078, + "step": 10878 + }, + { + "epoch": 0.97, + "learning_rate": 5.435019094941595e-06, + "loss": 1.5469, + "step": 10879 + }, + { + "epoch": 0.97, + "learning_rate": 5.405059882145236e-06, + "loss": 1.5254, + "step": 10880 + }, + { + "epoch": 0.97, + "learning_rate": 5.375183245372007e-06, + "loss": 1.5176, + "step": 10881 + }, + { + "epoch": 0.97, + "learning_rate": 5.345389187102479e-06, + "loss": 1.5156, + "step": 10882 + }, + { + "epoch": 0.97, + "learning_rate": 5.3156777098103405e-06, + "loss": 1.6445, + "step": 10883 + }, + { + "epoch": 0.97, + "learning_rate": 5.286048815962286e-06, + "loss": 1.4707, + "step": 10884 + }, + { + "epoch": 0.97, + "learning_rate": 5.256502508018346e-06, + "loss": 1.5176, + "step": 10885 + }, + { + "epoch": 0.97, + "learning_rate": 5.227038788431449e-06, + "loss": 1.4746, + "step": 10886 + }, + { + "epoch": 0.97, + "learning_rate": 5.197657659647969e-06, + "loss": 1.4824, + "step": 10887 + }, + { + "epoch": 0.97, + "learning_rate": 5.16835912410718e-06, + "loss": 1.5312, + "step": 10888 + }, + { + "epoch": 0.97, + "learning_rate": 5.13914318424169e-06, + "loss": 1.5879, + "step": 10889 + }, + { + "epoch": 0.97, + "learning_rate": 5.110009842477003e-06, + "loss": 1.4922, + "step": 10890 + }, + { + "epoch": 0.97, + "learning_rate": 5.080959101231963e-06, + "loss": 1.5391, + "step": 10891 + }, + { + "epoch": 0.97, + "learning_rate": 5.051990962918418e-06, + "loss": 1.5547, + "step": 10892 + }, + { + "epoch": 0.97, + "learning_rate": 5.0231054299416655e-06, + "loss": 1.5801, + "step": 10893 + }, + { + "epoch": 0.97, + "learning_rate": 4.994302504699788e-06, + "loss": 1.5723, + "step": 10894 + }, + { + "epoch": 0.97, + "learning_rate": 4.9655821895840945e-06, + "loss": 1.6328, + "step": 10895 + }, + { + "epoch": 0.97, + "learning_rate": 4.936944486979012e-06, + "loss": 1.6211, + "step": 10896 + }, + { + "epoch": 0.97, + "learning_rate": 4.908389399262525e-06, + "loss": 1.6855, + "step": 10897 + }, + { + "epoch": 0.97, + "learning_rate": 4.879916928805073e-06, + "loss": 1.5156, + "step": 10898 + }, + { + "epoch": 0.97, + "learning_rate": 4.851527077970652e-06, + "loss": 1.5586, + "step": 10899 + }, + { + "epoch": 0.97, + "learning_rate": 4.823219849116378e-06, + "loss": 1.6016, + "step": 10900 + }, + { + "epoch": 0.97, + "learning_rate": 4.794995244592482e-06, + "loss": 1.5254, + "step": 10901 + }, + { + "epoch": 0.97, + "learning_rate": 4.7668532667422e-06, + "loss": 1.502, + "step": 10902 + }, + { + "epoch": 0.97, + "learning_rate": 4.738793917902106e-06, + "loss": 1.4453, + "step": 10903 + }, + { + "epoch": 0.97, + "learning_rate": 4.7108172004017846e-06, + "loss": 1.625, + "step": 10904 + }, + { + "epoch": 0.97, + "learning_rate": 4.682923116564042e-06, + "loss": 1.4434, + "step": 10905 + }, + { + "epoch": 0.97, + "learning_rate": 4.655111668704582e-06, + "loss": 1.584, + "step": 10906 + }, + { + "epoch": 0.97, + "learning_rate": 4.627382859132778e-06, + "loss": 1.5449, + "step": 10907 + }, + { + "epoch": 0.97, + "learning_rate": 4.599736690150569e-06, + "loss": 1.6387, + "step": 10908 + }, + { + "epoch": 0.97, + "learning_rate": 4.572173164053228e-06, + "loss": 1.4688, + "step": 10909 + }, + { + "epoch": 0.97, + "learning_rate": 4.544692283129482e-06, + "loss": 1.4746, + "step": 10910 + }, + { + "epoch": 0.97, + "learning_rate": 4.517294049660614e-06, + "loss": 1.5801, + "step": 10911 + }, + { + "epoch": 0.97, + "learning_rate": 4.489978465921474e-06, + "loss": 1.5625, + "step": 10912 + }, + { + "epoch": 0.97, + "learning_rate": 4.462745534180135e-06, + "loss": 1.6523, + "step": 10913 + }, + { + "epoch": 0.97, + "learning_rate": 4.4355952566972336e-06, + "loss": 1.6895, + "step": 10914 + }, + { + "epoch": 0.97, + "learning_rate": 4.408527635727189e-06, + "loss": 1.502, + "step": 10915 + }, + { + "epoch": 0.97, + "learning_rate": 4.381542673517203e-06, + "loss": 1.5566, + "step": 10916 + }, + { + "epoch": 0.97, + "learning_rate": 4.354640372307705e-06, + "loss": 1.6934, + "step": 10917 + }, + { + "epoch": 0.97, + "learning_rate": 4.327820734332244e-06, + "loss": 1.6191, + "step": 10918 + }, + { + "epoch": 0.97, + "learning_rate": 4.301083761817482e-06, + "loss": 1.5625, + "step": 10919 + }, + { + "epoch": 0.97, + "learning_rate": 4.274429456983309e-06, + "loss": 1.7012, + "step": 10920 + }, + { + "epoch": 0.97, + "learning_rate": 4.247857822042733e-06, + "loss": 1.6816, + "step": 10921 + }, + { + "epoch": 0.97, + "learning_rate": 4.221368859201657e-06, + "loss": 1.4609, + "step": 10922 + }, + { + "epoch": 0.97, + "learning_rate": 4.194962570659544e-06, + "loss": 1.4395, + "step": 10923 + }, + { + "epoch": 0.97, + "learning_rate": 4.168638958608639e-06, + "loss": 1.6445, + "step": 10924 + }, + { + "epoch": 0.97, + "learning_rate": 4.1423980252345285e-06, + "loss": 1.6094, + "step": 10925 + }, + { + "epoch": 0.97, + "learning_rate": 4.116239772715802e-06, + "loss": 1.6094, + "step": 10926 + }, + { + "epoch": 0.97, + "learning_rate": 4.090164203224278e-06, + "loss": 1.5293, + "step": 10927 + }, + { + "epoch": 0.97, + "learning_rate": 4.064171318924781e-06, + "loss": 1.5312, + "step": 10928 + }, + { + "epoch": 0.97, + "learning_rate": 4.038261121975584e-06, + "loss": 1.5117, + "step": 10929 + }, + { + "epoch": 0.97, + "learning_rate": 4.012433614527633e-06, + "loss": 1.7168, + "step": 10930 + }, + { + "epoch": 0.97, + "learning_rate": 3.986688798725435e-06, + "loss": 1.5586, + "step": 10931 + }, + { + "epoch": 0.97, + "learning_rate": 3.961026676706392e-06, + "loss": 1.6016, + "step": 10932 + }, + { + "epoch": 0.97, + "learning_rate": 3.93544725060102e-06, + "loss": 1.6172, + "step": 10933 + }, + { + "epoch": 0.97, + "learning_rate": 3.909950522533068e-06, + "loss": 1.5703, + "step": 10934 + }, + { + "epoch": 0.97, + "learning_rate": 3.884536494619506e-06, + "loss": 1.6465, + "step": 10935 + }, + { + "epoch": 0.97, + "learning_rate": 3.859205168970204e-06, + "loss": 1.5957, + "step": 10936 + }, + { + "epoch": 0.97, + "learning_rate": 3.833956547688367e-06, + "loss": 1.6699, + "step": 10937 + }, + { + "epoch": 0.97, + "learning_rate": 3.808790632870207e-06, + "loss": 1.6895, + "step": 10938 + }, + { + "epoch": 0.97, + "learning_rate": 3.783707426605054e-06, + "loss": 1.5996, + "step": 10939 + }, + { + "epoch": 0.97, + "learning_rate": 3.758706930975575e-06, + "loss": 1.6113, + "step": 10940 + }, + { + "epoch": 0.97, + "learning_rate": 3.7337891480574426e-06, + "loss": 1.6992, + "step": 10941 + }, + { + "epoch": 0.97, + "learning_rate": 3.7089540799193356e-06, + "loss": 1.6035, + "step": 10942 + }, + { + "epoch": 0.97, + "learning_rate": 3.684201728623271e-06, + "loss": 1.6582, + "step": 10943 + }, + { + "epoch": 0.97, + "learning_rate": 3.6595320962241607e-06, + "loss": 1.5605, + "step": 10944 + }, + { + "epoch": 0.97, + "learning_rate": 3.6349451847703664e-06, + "loss": 1.543, + "step": 10945 + }, + { + "epoch": 0.97, + "learning_rate": 3.6104409963031436e-06, + "loss": 1.5742, + "step": 10946 + }, + { + "epoch": 0.97, + "learning_rate": 3.5860195328568655e-06, + "loss": 1.5371, + "step": 10947 + }, + { + "epoch": 0.97, + "learning_rate": 3.5616807964592434e-06, + "loss": 1.5312, + "step": 10948 + }, + { + "epoch": 0.97, + "learning_rate": 3.537424789130994e-06, + "loss": 1.5723, + "step": 10949 + }, + { + "epoch": 0.97, + "learning_rate": 3.513251512885951e-06, + "loss": 1.6426, + "step": 10950 + }, + { + "epoch": 0.97, + "learning_rate": 3.4891609697309536e-06, + "loss": 1.5234, + "step": 10951 + }, + { + "epoch": 0.97, + "learning_rate": 3.46515316166629e-06, + "loss": 1.5273, + "step": 10952 + }, + { + "epoch": 0.97, + "learning_rate": 3.4412280906851444e-06, + "loss": 1.4492, + "step": 10953 + }, + { + "epoch": 0.97, + "learning_rate": 3.4173857587738166e-06, + "loss": 1.543, + "step": 10954 + }, + { + "epoch": 0.97, + "learning_rate": 3.393626167911945e-06, + "loss": 1.5059, + "step": 10955 + }, + { + "epoch": 0.97, + "learning_rate": 3.3699493200720633e-06, + "loss": 1.5605, + "step": 10956 + }, + { + "epoch": 0.97, + "learning_rate": 3.346355217220043e-06, + "loss": 1.5371, + "step": 10957 + }, + { + "epoch": 0.97, + "learning_rate": 3.3228438613146507e-06, + "loss": 1.584, + "step": 10958 + }, + { + "epoch": 0.97, + "learning_rate": 3.2994152543079915e-06, + "loss": 1.5332, + "step": 10959 + }, + { + "epoch": 0.98, + "learning_rate": 3.276069398145176e-06, + "loss": 1.4941, + "step": 10960 + }, + { + "epoch": 0.98, + "learning_rate": 3.252806294764543e-06, + "loss": 1.5703, + "step": 10961 + }, + { + "epoch": 0.98, + "learning_rate": 3.2296259460974366e-06, + "loss": 1.6133, + "step": 10962 + }, + { + "epoch": 0.98, + "learning_rate": 3.2065283540684274e-06, + "loss": 1.5469, + "step": 10963 + }, + { + "epoch": 0.98, + "learning_rate": 3.183513520595205e-06, + "loss": 1.4336, + "step": 10964 + }, + { + "epoch": 0.98, + "learning_rate": 3.160581447588684e-06, + "loss": 1.5508, + "step": 10965 + }, + { + "epoch": 0.98, + "learning_rate": 3.1377321369525645e-06, + "loss": 1.6387, + "step": 10966 + }, + { + "epoch": 0.98, + "learning_rate": 3.114965590583996e-06, + "loss": 1.6074, + "step": 10967 + }, + { + "epoch": 0.98, + "learning_rate": 3.0922818103732433e-06, + "loss": 1.5449, + "step": 10968 + }, + { + "epoch": 0.98, + "learning_rate": 3.069680798203578e-06, + "loss": 1.5664, + "step": 10969 + }, + { + "epoch": 0.98, + "learning_rate": 3.0471625559513883e-06, + "loss": 1.5332, + "step": 10970 + }, + { + "epoch": 0.98, + "learning_rate": 3.0247270854862896e-06, + "loss": 1.498, + "step": 10971 + }, + { + "epoch": 0.98, + "learning_rate": 3.002374388671014e-06, + "loss": 1.6094, + "step": 10972 + }, + { + "epoch": 0.98, + "learning_rate": 2.9801044673612996e-06, + "loss": 1.5801, + "step": 10973 + }, + { + "epoch": 0.98, + "learning_rate": 2.957917323406223e-06, + "loss": 1.5703, + "step": 10974 + }, + { + "epoch": 0.98, + "learning_rate": 2.9358129586477546e-06, + "loss": 1.5098, + "step": 10975 + }, + { + "epoch": 0.98, + "learning_rate": 2.913791374921093e-06, + "loss": 1.4863, + "step": 10976 + }, + { + "epoch": 0.98, + "learning_rate": 2.8918525740547763e-06, + "loss": 1.5488, + "step": 10977 + }, + { + "epoch": 0.98, + "learning_rate": 2.869996557870014e-06, + "loss": 1.584, + "step": 10978 + }, + { + "epoch": 0.98, + "learning_rate": 2.848223328181576e-06, + "loss": 1.5645, + "step": 10979 + }, + { + "epoch": 0.98, + "learning_rate": 2.8265328867971283e-06, + "loss": 1.7285, + "step": 10980 + }, + { + "epoch": 0.98, + "learning_rate": 2.8049252355174526e-06, + "loss": 1.4551, + "step": 10981 + }, + { + "epoch": 0.98, + "learning_rate": 2.783400376136558e-06, + "loss": 1.4297, + "step": 10982 + }, + { + "epoch": 0.98, + "learning_rate": 2.7619583104415703e-06, + "loss": 1.6055, + "step": 10983 + }, + { + "epoch": 0.98, + "learning_rate": 2.7405990402127323e-06, + "loss": 1.5488, + "step": 10984 + }, + { + "epoch": 0.98, + "learning_rate": 2.7193225672232925e-06, + "loss": 1.6094, + "step": 10985 + }, + { + "epoch": 0.98, + "learning_rate": 2.6981288932398375e-06, + "loss": 1.5898, + "step": 10986 + }, + { + "epoch": 0.98, + "learning_rate": 2.6770180200218485e-06, + "loss": 1.6699, + "step": 10987 + }, + { + "epoch": 0.98, + "learning_rate": 2.655989949322146e-06, + "loss": 1.4961, + "step": 10988 + }, + { + "epoch": 0.98, + "learning_rate": 2.6350446828865558e-06, + "loss": 1.5508, + "step": 10989 + }, + { + "epoch": 0.98, + "learning_rate": 2.6141822224540202e-06, + "loss": 1.5469, + "step": 10990 + }, + { + "epoch": 0.98, + "learning_rate": 2.5934025697567087e-06, + "loss": 1.5996, + "step": 10991 + }, + { + "epoch": 0.98, + "learning_rate": 2.572705726519797e-06, + "loss": 1.5625, + "step": 10992 + }, + { + "epoch": 0.98, + "learning_rate": 2.552091694461689e-06, + "loss": 1.582, + "step": 10993 + }, + { + "epoch": 0.98, + "learning_rate": 2.5315604752937924e-06, + "loss": 1.4355, + "step": 10994 + }, + { + "epoch": 0.98, + "learning_rate": 2.5111120707207445e-06, + "loss": 1.543, + "step": 10995 + }, + { + "epoch": 0.98, + "learning_rate": 2.490746482440187e-06, + "loss": 1.5312, + "step": 10996 + }, + { + "epoch": 0.98, + "learning_rate": 2.4704637121432116e-06, + "loss": 1.5625, + "step": 10997 + }, + { + "epoch": 0.98, + "learning_rate": 2.4502637615134716e-06, + "loss": 1.5391, + "step": 10998 + }, + { + "epoch": 0.98, + "learning_rate": 2.4301466322281805e-06, + "loss": 1.6055, + "step": 10999 + }, + { + "epoch": 0.98, + "learning_rate": 2.4101123259576696e-06, + "loss": 1.5391, + "step": 11000 + }, + { + "epoch": 0.98, + "learning_rate": 2.3901608443651634e-06, + "loss": 1.6191, + "step": 11001 + }, + { + "epoch": 0.98, + "learning_rate": 2.370292189107226e-06, + "loss": 1.6348, + "step": 11002 + }, + { + "epoch": 0.98, + "learning_rate": 2.3505063618333155e-06, + "loss": 1.4961, + "step": 11003 + }, + { + "epoch": 0.98, + "learning_rate": 2.33080336418634e-06, + "loss": 1.5469, + "step": 11004 + }, + { + "epoch": 0.98, + "learning_rate": 2.3111831978018805e-06, + "loss": 1.5977, + "step": 11005 + }, + { + "epoch": 0.98, + "learning_rate": 2.291645864309078e-06, + "loss": 1.6191, + "step": 11006 + }, + { + "epoch": 0.98, + "learning_rate": 2.2721913653299676e-06, + "loss": 1.6016, + "step": 11007 + }, + { + "epoch": 0.98, + "learning_rate": 2.2528197024798136e-06, + "loss": 1.5996, + "step": 11008 + }, + { + "epoch": 0.98, + "learning_rate": 2.233530877366885e-06, + "loss": 1.5566, + "step": 11009 + }, + { + "epoch": 0.98, + "learning_rate": 2.214324891592567e-06, + "loss": 1.4746, + "step": 11010 + }, + { + "epoch": 0.98, + "learning_rate": 2.195201746751474e-06, + "loss": 1.6895, + "step": 11011 + }, + { + "epoch": 0.98, + "learning_rate": 2.176161444431446e-06, + "loss": 1.5371, + "step": 11012 + }, + { + "epoch": 0.98, + "learning_rate": 2.1572039862131077e-06, + "loss": 1.4844, + "step": 11013 + }, + { + "epoch": 0.98, + "learning_rate": 2.1383293736705334e-06, + "loss": 1.4297, + "step": 11014 + }, + { + "epoch": 0.98, + "learning_rate": 2.119537608370581e-06, + "loss": 1.5742, + "step": 11015 + }, + { + "epoch": 0.98, + "learning_rate": 2.1008286918737796e-06, + "loss": 1.6348, + "step": 11016 + }, + { + "epoch": 0.98, + "learning_rate": 2.0822026257331094e-06, + "loss": 1.7227, + "step": 11017 + }, + { + "epoch": 0.98, + "learning_rate": 2.063659411495e-06, + "loss": 1.6211, + "step": 11018 + }, + { + "epoch": 0.98, + "learning_rate": 2.0451990506993312e-06, + "loss": 1.4727, + "step": 11019 + }, + { + "epoch": 0.98, + "learning_rate": 2.026821544878432e-06, + "loss": 1.6035, + "step": 11020 + }, + { + "epoch": 0.98, + "learning_rate": 2.0085268955581936e-06, + "loss": 1.5645, + "step": 11021 + }, + { + "epoch": 0.98, + "learning_rate": 1.990315104257623e-06, + "loss": 1.7246, + "step": 11022 + }, + { + "epoch": 0.98, + "learning_rate": 1.9721861724886213e-06, + "loss": 1.5176, + "step": 11023 + }, + { + "epoch": 0.98, + "learning_rate": 1.954140101756319e-06, + "loss": 1.6113, + "step": 11024 + }, + { + "epoch": 0.98, + "learning_rate": 1.936176893559072e-06, + "loss": 1.6191, + "step": 11025 + }, + { + "epoch": 0.98, + "learning_rate": 1.918296549388354e-06, + "loss": 1.582, + "step": 11026 + }, + { + "epoch": 0.98, + "learning_rate": 1.9004990707285342e-06, + "loss": 1.5684, + "step": 11027 + }, + { + "epoch": 0.98, + "learning_rate": 1.8827844590572074e-06, + "loss": 1.4961, + "step": 11028 + }, + { + "epoch": 0.98, + "learning_rate": 1.8651527158453087e-06, + "loss": 1.4609, + "step": 11029 + }, + { + "epoch": 0.98, + "learning_rate": 1.8476038425565556e-06, + "loss": 1.6953, + "step": 11030 + }, + { + "epoch": 0.98, + "learning_rate": 1.830137840648005e-06, + "loss": 1.4336, + "step": 11031 + }, + { + "epoch": 0.98, + "learning_rate": 1.8127547115697196e-06, + "loss": 1.5371, + "step": 11032 + }, + { + "epoch": 0.98, + "learning_rate": 1.7954544567649889e-06, + "loss": 1.5645, + "step": 11033 + }, + { + "epoch": 0.98, + "learning_rate": 1.7782370776701084e-06, + "loss": 1.6953, + "step": 11034 + }, + { + "epoch": 0.98, + "learning_rate": 1.7611025757144906e-06, + "loss": 1.6816, + "step": 11035 + }, + { + "epoch": 0.98, + "learning_rate": 1.7440509523209968e-06, + "loss": 1.5254, + "step": 11036 + }, + { + "epoch": 0.98, + "learning_rate": 1.7270822089050509e-06, + "loss": 1.7676, + "step": 11037 + }, + { + "epoch": 0.98, + "learning_rate": 1.7101963468755255e-06, + "loss": 1.5059, + "step": 11038 + }, + { + "epoch": 0.98, + "learning_rate": 1.6933933676344105e-06, + "loss": 1.6758, + "step": 11039 + }, + { + "epoch": 0.98, + "learning_rate": 1.6766732725768119e-06, + "loss": 1.5098, + "step": 11040 + }, + { + "epoch": 0.98, + "learning_rate": 1.6600360630909527e-06, + "loss": 1.5059, + "step": 11041 + }, + { + "epoch": 0.98, + "learning_rate": 1.6434817405579506e-06, + "loss": 1.6133, + "step": 11042 + }, + { + "epoch": 0.98, + "learning_rate": 1.6270103063523722e-06, + "loss": 1.5625, + "step": 11043 + }, + { + "epoch": 0.98, + "learning_rate": 1.6106217618416796e-06, + "loss": 1.5273, + "step": 11044 + }, + { + "epoch": 0.98, + "learning_rate": 1.594316108386562e-06, + "loss": 1.6914, + "step": 11045 + }, + { + "epoch": 0.98, + "learning_rate": 1.5780933473407144e-06, + "loss": 1.5488, + "step": 11046 + }, + { + "epoch": 0.98, + "learning_rate": 1.5619534800511704e-06, + "loss": 1.7051, + "step": 11047 + }, + { + "epoch": 0.98, + "learning_rate": 1.545896507857858e-06, + "loss": 1.584, + "step": 11048 + }, + { + "epoch": 0.98, + "learning_rate": 1.5299224320939332e-06, + "loss": 1.543, + "step": 11049 + }, + { + "epoch": 0.98, + "learning_rate": 1.5140312540856682e-06, + "loss": 1.4629, + "step": 11050 + }, + { + "epoch": 0.98, + "learning_rate": 1.4982229751523413e-06, + "loss": 1.6953, + "step": 11051 + }, + { + "epoch": 0.98, + "learning_rate": 1.482497596606458e-06, + "loss": 1.4277, + "step": 11052 + }, + { + "epoch": 0.98, + "learning_rate": 1.4668551197536406e-06, + "loss": 1.5996, + "step": 11053 + }, + { + "epoch": 0.98, + "learning_rate": 1.451295545892517e-06, + "loss": 1.5996, + "step": 11054 + }, + { + "epoch": 0.98, + "learning_rate": 1.4358188763150536e-06, + "loss": 1.6973, + "step": 11055 + }, + { + "epoch": 0.98, + "learning_rate": 1.4204251123061118e-06, + "loss": 1.6328, + "step": 11056 + }, + { + "epoch": 0.98, + "learning_rate": 1.40511425514378e-06, + "loss": 1.584, + "step": 11057 + }, + { + "epoch": 0.98, + "learning_rate": 1.389886306099153e-06, + "loss": 1.5156, + "step": 11058 + }, + { + "epoch": 0.98, + "learning_rate": 1.3747412664367742e-06, + "loss": 1.5918, + "step": 11059 + }, + { + "epoch": 0.98, + "learning_rate": 1.3596791374137497e-06, + "loss": 1.4922, + "step": 11060 + }, + { + "epoch": 0.98, + "learning_rate": 1.3446999202808564e-06, + "loss": 1.5918, + "step": 11061 + }, + { + "epoch": 0.98, + "learning_rate": 1.3298036162815442e-06, + "loss": 1.5723, + "step": 11062 + }, + { + "epoch": 0.98, + "learning_rate": 1.3149902266527126e-06, + "loss": 1.6211, + "step": 11063 + }, + { + "epoch": 0.98, + "learning_rate": 1.3002597526241555e-06, + "loss": 1.6094, + "step": 11064 + }, + { + "epoch": 0.98, + "learning_rate": 1.2856121954188947e-06, + "loss": 1.627, + "step": 11065 + }, + { + "epoch": 0.98, + "learning_rate": 1.2710475562530688e-06, + "loss": 1.5059, + "step": 11066 + }, + { + "epoch": 0.98, + "learning_rate": 1.2565658363359323e-06, + "loss": 1.6016, + "step": 11067 + }, + { + "epoch": 0.98, + "learning_rate": 1.2421670368697457e-06, + "loss": 1.5957, + "step": 11068 + }, + { + "epoch": 0.98, + "learning_rate": 1.2278511590499975e-06, + "loss": 1.5391, + "step": 11069 + }, + { + "epoch": 0.98, + "learning_rate": 1.2136182040652921e-06, + "loss": 1.6426, + "step": 11070 + }, + { + "epoch": 0.98, + "learning_rate": 1.1994681730972402e-06, + "loss": 1.5566, + "step": 11071 + }, + { + "epoch": 0.98, + "learning_rate": 1.1854010673207905e-06, + "loss": 1.5801, + "step": 11072 + }, + { + "epoch": 0.99, + "learning_rate": 1.1714168879036758e-06, + "loss": 1.5508, + "step": 11073 + }, + { + "epoch": 0.99, + "learning_rate": 1.1575156360069672e-06, + "loss": 1.6172, + "step": 11074 + }, + { + "epoch": 0.99, + "learning_rate": 1.1436973127849638e-06, + "loss": 1.6016, + "step": 11075 + }, + { + "epoch": 0.99, + "learning_rate": 1.1299619193847476e-06, + "loss": 1.4824, + "step": 11076 + }, + { + "epoch": 0.99, + "learning_rate": 1.1163094569468513e-06, + "loss": 1.5293, + "step": 11077 + }, + { + "epoch": 0.99, + "learning_rate": 1.102739926604701e-06, + "loss": 1.6016, + "step": 11078 + }, + { + "epoch": 0.99, + "learning_rate": 1.0892533294848405e-06, + "loss": 1.5605, + "step": 11079 + }, + { + "epoch": 0.99, + "learning_rate": 1.0758496667070405e-06, + "loss": 1.5605, + "step": 11080 + }, + { + "epoch": 0.99, + "learning_rate": 1.0625289393841886e-06, + "loss": 1.6055, + "step": 11081 + }, + { + "epoch": 0.99, + "learning_rate": 1.0492911486221778e-06, + "loss": 1.5723, + "step": 11082 + }, + { + "epoch": 0.99, + "learning_rate": 1.0361362955200183e-06, + "loss": 1.5234, + "step": 11083 + }, + { + "epoch": 0.99, + "learning_rate": 1.023064381170058e-06, + "loss": 1.6543, + "step": 11084 + }, + { + "epoch": 0.99, + "learning_rate": 1.010075406657429e-06, + "loss": 1.4941, + "step": 11085 + }, + { + "epoch": 0.99, + "learning_rate": 9.971693730606024e-07, + "loss": 1.6113, + "step": 11086 + }, + { + "epoch": 0.99, + "learning_rate": 9.84346281451165e-07, + "loss": 1.6035, + "step": 11087 + }, + { + "epoch": 0.99, + "learning_rate": 9.716061328935987e-07, + "loss": 1.502, + "step": 11088 + }, + { + "epoch": 0.99, + "learning_rate": 9.589489284457243e-07, + "loss": 1.5625, + "step": 11089 + }, + { + "epoch": 0.99, + "learning_rate": 9.463746691584785e-07, + "loss": 1.6738, + "step": 11090 + }, + { + "epoch": 0.99, + "learning_rate": 9.338833560756932e-07, + "loss": 1.7246, + "step": 11091 + }, + { + "epoch": 0.99, + "learning_rate": 9.214749902346497e-07, + "loss": 1.6426, + "step": 11092 + }, + { + "epoch": 0.99, + "learning_rate": 9.091495726653021e-07, + "loss": 1.5117, + "step": 11093 + }, + { + "epoch": 0.99, + "learning_rate": 8.969071043911648e-07, + "loss": 1.6172, + "step": 11094 + }, + { + "epoch": 0.99, + "learning_rate": 8.84747586428647e-07, + "loss": 1.6504, + "step": 11095 + }, + { + "epoch": 0.99, + "learning_rate": 8.726710197871634e-07, + "loss": 1.5742, + "step": 11096 + }, + { + "epoch": 0.99, + "learning_rate": 8.606774054693567e-07, + "loss": 1.5938, + "step": 11097 + }, + { + "epoch": 0.99, + "learning_rate": 8.487667444712077e-07, + "loss": 1.5723, + "step": 11098 + }, + { + "epoch": 0.99, + "learning_rate": 8.369390377814812e-07, + "loss": 1.5332, + "step": 11099 + }, + { + "epoch": 0.99, + "learning_rate": 8.251942863821693e-07, + "loss": 1.4824, + "step": 11100 + }, + { + "epoch": 0.99, + "learning_rate": 8.1353249124827e-07, + "loss": 1.5215, + "step": 11101 + }, + { + "epoch": 0.99, + "learning_rate": 8.019536533482308e-07, + "loss": 1.6504, + "step": 11102 + }, + { + "epoch": 0.99, + "learning_rate": 7.904577736430607e-07, + "loss": 1.4648, + "step": 11103 + }, + { + "epoch": 0.99, + "learning_rate": 7.790448530875516e-07, + "loss": 1.5273, + "step": 11104 + }, + { + "epoch": 0.99, + "learning_rate": 7.677148926290567e-07, + "loss": 1.5566, + "step": 11105 + }, + { + "epoch": 0.99, + "learning_rate": 7.56467893208268e-07, + "loss": 1.5176, + "step": 11106 + }, + { + "epoch": 0.99, + "learning_rate": 7.453038557589941e-07, + "loss": 1.5371, + "step": 11107 + }, + { + "epoch": 0.99, + "learning_rate": 7.342227812081603e-07, + "loss": 1.543, + "step": 11108 + }, + { + "epoch": 0.99, + "learning_rate": 7.232246704756972e-07, + "loss": 1.4668, + "step": 11109 + }, + { + "epoch": 0.99, + "learning_rate": 7.123095244747635e-07, + "loss": 1.5664, + "step": 11110 + }, + { + "epoch": 0.99, + "learning_rate": 7.01477344111523e-07, + "loss": 1.584, + "step": 11111 + }, + { + "epoch": 0.99, + "learning_rate": 6.907281302853674e-07, + "loss": 1.5566, + "step": 11112 + }, + { + "epoch": 0.99, + "learning_rate": 6.800618838888051e-07, + "loss": 1.5332, + "step": 11113 + }, + { + "epoch": 0.99, + "learning_rate": 6.694786058072388e-07, + "loss": 1.498, + "step": 11114 + }, + { + "epoch": 0.99, + "learning_rate": 6.589782969195213e-07, + "loss": 1.7559, + "step": 11115 + }, + { + "epoch": 0.99, + "learning_rate": 6.485609580973994e-07, + "loss": 1.6133, + "step": 11116 + }, + { + "epoch": 0.99, + "learning_rate": 6.382265902056261e-07, + "loss": 1.7676, + "step": 11117 + }, + { + "epoch": 0.99, + "learning_rate": 6.279751941024037e-07, + "loss": 1.5195, + "step": 11118 + }, + { + "epoch": 0.99, + "learning_rate": 6.17806770638718e-07, + "loss": 1.5625, + "step": 11119 + }, + { + "epoch": 0.99, + "learning_rate": 6.077213206588939e-07, + "loss": 1.6094, + "step": 11120 + }, + { + "epoch": 0.99, + "learning_rate": 5.977188450001503e-07, + "loss": 1.5332, + "step": 11121 + }, + { + "epoch": 0.99, + "learning_rate": 5.877993444930451e-07, + "loss": 1.5723, + "step": 11122 + }, + { + "epoch": 0.99, + "learning_rate": 5.77962819961142e-07, + "loss": 1.6445, + "step": 11123 + }, + { + "epoch": 0.99, + "learning_rate": 5.682092722210097e-07, + "loss": 1.5391, + "step": 11124 + }, + { + "epoch": 0.99, + "learning_rate": 5.585387020825561e-07, + "loss": 1.4434, + "step": 11125 + }, + { + "epoch": 0.99, + "learning_rate": 5.489511103486944e-07, + "loss": 1.5273, + "step": 11126 + }, + { + "epoch": 0.99, + "learning_rate": 5.394464978152324e-07, + "loss": 1.6621, + "step": 11127 + }, + { + "epoch": 0.99, + "learning_rate": 5.30024865271539e-07, + "loss": 1.5254, + "step": 11128 + }, + { + "epoch": 0.99, + "learning_rate": 5.206862134997659e-07, + "loss": 1.5742, + "step": 11129 + }, + { + "epoch": 0.99, + "learning_rate": 5.11430543275071e-07, + "loss": 1.5254, + "step": 11130 + }, + { + "epoch": 0.99, + "learning_rate": 5.022578553661727e-07, + "loss": 1.5938, + "step": 11131 + }, + { + "epoch": 0.99, + "learning_rate": 4.931681505344621e-07, + "loss": 1.5801, + "step": 11132 + }, + { + "epoch": 0.99, + "learning_rate": 4.841614295346686e-07, + "loss": 1.5996, + "step": 11133 + }, + { + "epoch": 0.99, + "learning_rate": 4.752376931146385e-07, + "loss": 1.5957, + "step": 11134 + }, + { + "epoch": 0.99, + "learning_rate": 4.663969420151126e-07, + "loss": 1.5859, + "step": 11135 + }, + { + "epoch": 0.99, + "learning_rate": 4.5763917697017046e-07, + "loss": 1.5762, + "step": 11136 + }, + { + "epoch": 0.99, + "learning_rate": 4.48964398706897e-07, + "loss": 1.541, + "step": 11137 + }, + { + "epoch": 0.99, + "learning_rate": 4.4037260794549396e-07, + "loss": 1.6152, + "step": 11138 + }, + { + "epoch": 0.99, + "learning_rate": 4.3186380539950165e-07, + "loss": 1.4668, + "step": 11139 + }, + { + "epoch": 0.99, + "learning_rate": 4.2343799177502197e-07, + "loss": 1.5801, + "step": 11140 + }, + { + "epoch": 0.99, + "learning_rate": 4.1509516777193945e-07, + "loss": 1.4492, + "step": 11141 + }, + { + "epoch": 0.99, + "learning_rate": 4.0683533408258923e-07, + "loss": 1.6504, + "step": 11142 + }, + { + "epoch": 0.99, + "learning_rate": 3.9865849139297805e-07, + "loss": 1.6016, + "step": 11143 + }, + { + "epoch": 0.99, + "learning_rate": 3.9056464038200733e-07, + "loss": 1.6934, + "step": 11144 + }, + { + "epoch": 0.99, + "learning_rate": 3.82553781721473e-07, + "loss": 1.4863, + "step": 11145 + }, + { + "epoch": 0.99, + "learning_rate": 3.7462591607662077e-07, + "loss": 1.4727, + "step": 11146 + }, + { + "epoch": 0.99, + "learning_rate": 3.667810441055908e-07, + "loss": 1.584, + "step": 11147 + }, + { + "epoch": 0.99, + "learning_rate": 3.5901916645975084e-07, + "loss": 1.6289, + "step": 11148 + }, + { + "epoch": 0.99, + "learning_rate": 3.513402837834745e-07, + "loss": 1.6426, + "step": 11149 + }, + { + "epoch": 0.99, + "learning_rate": 3.4374439671436275e-07, + "loss": 1.625, + "step": 11150 + }, + { + "epoch": 0.99, + "learning_rate": 3.362315058830223e-07, + "loss": 1.5449, + "step": 11151 + }, + { + "epoch": 0.99, + "learning_rate": 3.288016119131765e-07, + "loss": 1.418, + "step": 11152 + }, + { + "epoch": 0.99, + "learning_rate": 3.2145471542177615e-07, + "loss": 1.7285, + "step": 11153 + }, + { + "epoch": 0.99, + "learning_rate": 3.1419081701877796e-07, + "loss": 1.582, + "step": 11154 + }, + { + "epoch": 0.99, + "learning_rate": 3.070099173071439e-07, + "loss": 1.5645, + "step": 11155 + }, + { + "epoch": 0.99, + "learning_rate": 2.9991201688328585e-07, + "loss": 1.6445, + "step": 11156 + }, + { + "epoch": 0.99, + "learning_rate": 2.928971163362881e-07, + "loss": 1.5078, + "step": 11157 + }, + { + "epoch": 0.99, + "learning_rate": 2.8596521624868477e-07, + "loss": 1.5, + "step": 11158 + }, + { + "epoch": 0.99, + "learning_rate": 2.791163171960154e-07, + "loss": 1.5645, + "step": 11159 + }, + { + "epoch": 0.99, + "learning_rate": 2.7235041974682515e-07, + "loss": 1.5566, + "step": 11160 + }, + { + "epoch": 0.99, + "learning_rate": 2.6566752446277596e-07, + "loss": 1.5996, + "step": 11161 + }, + { + "epoch": 0.99, + "learning_rate": 2.590676318989793e-07, + "loss": 1.4766, + "step": 11162 + }, + { + "epoch": 0.99, + "learning_rate": 2.525507426032192e-07, + "loss": 1.5078, + "step": 11163 + }, + { + "epoch": 0.99, + "learning_rate": 2.4611685711650733e-07, + "loss": 1.5625, + "step": 11164 + }, + { + "epoch": 0.99, + "learning_rate": 2.3976597597308304e-07, + "loss": 1.4062, + "step": 11165 + }, + { + "epoch": 0.99, + "learning_rate": 2.334980997001912e-07, + "loss": 1.5762, + "step": 11166 + }, + { + "epoch": 0.99, + "learning_rate": 2.273132288183044e-07, + "loss": 1.6387, + "step": 11167 + }, + { + "epoch": 0.99, + "learning_rate": 2.212113638409008e-07, + "loss": 1.5488, + "step": 11168 + }, + { + "epoch": 0.99, + "learning_rate": 2.151925052744641e-07, + "loss": 1.5137, + "step": 11169 + }, + { + "epoch": 0.99, + "learning_rate": 2.0925665361892776e-07, + "loss": 1.5859, + "step": 11170 + }, + { + "epoch": 0.99, + "learning_rate": 2.0340380936689773e-07, + "loss": 1.5918, + "step": 11171 + }, + { + "epoch": 0.99, + "learning_rate": 1.9763397300442965e-07, + "loss": 1.502, + "step": 11172 + }, + { + "epoch": 0.99, + "learning_rate": 1.9194714501047374e-07, + "loss": 1.7285, + "step": 11173 + }, + { + "epoch": 0.99, + "learning_rate": 1.8634332585731884e-07, + "loss": 1.5566, + "step": 11174 + }, + { + "epoch": 0.99, + "learning_rate": 1.8082251601003742e-07, + "loss": 1.627, + "step": 11175 + }, + { + "epoch": 0.99, + "learning_rate": 1.7538471592715155e-07, + "loss": 1.5391, + "step": 11176 + }, + { + "epoch": 0.99, + "learning_rate": 1.7002992606007795e-07, + "loss": 1.5664, + "step": 11177 + }, + { + "epoch": 0.99, + "learning_rate": 1.6475814685334988e-07, + "loss": 1.4746, + "step": 11178 + }, + { + "epoch": 0.99, + "learning_rate": 1.5956937874472832e-07, + "loss": 1.5137, + "step": 11179 + }, + { + "epoch": 0.99, + "learning_rate": 1.544636221649798e-07, + "loss": 1.4941, + "step": 11180 + }, + { + "epoch": 0.99, + "learning_rate": 1.4944087753798742e-07, + "loss": 1.7344, + "step": 11181 + }, + { + "epoch": 0.99, + "learning_rate": 1.4450114528075098e-07, + "loss": 1.5234, + "step": 11182 + }, + { + "epoch": 0.99, + "learning_rate": 1.396444258034979e-07, + "loss": 1.7402, + "step": 11183 + }, + { + "epoch": 0.99, + "learning_rate": 1.3487071950935014e-07, + "loss": 1.5801, + "step": 11184 + }, + { + "epoch": 1.0, + "learning_rate": 1.3018002679465736e-07, + "loss": 1.5098, + "step": 11185 + }, + { + "epoch": 1.0, + "learning_rate": 1.2557234804888574e-07, + "loss": 1.4922, + "step": 11186 + }, + { + "epoch": 1.0, + "learning_rate": 1.2104768365450712e-07, + "loss": 1.582, + "step": 11187 + }, + { + "epoch": 1.0, + "learning_rate": 1.1660603398733205e-07, + "loss": 1.6387, + "step": 11188 + }, + { + "epoch": 1.0, + "learning_rate": 1.1224739941606554e-07, + "loss": 1.5176, + "step": 11189 + }, + { + "epoch": 1.0, + "learning_rate": 1.079717803025293e-07, + "loss": 1.6562, + "step": 11190 + }, + { + "epoch": 1.0, + "learning_rate": 1.0377917700177264e-07, + "loss": 1.7188, + "step": 11191 + }, + { + "epoch": 1.0, + "learning_rate": 9.966958986185049e-08, + "loss": 1.582, + "step": 11192 + }, + { + "epoch": 1.0, + "learning_rate": 9.564301922393437e-08, + "loss": 1.5332, + "step": 11193 + }, + { + "epoch": 1.0, + "learning_rate": 9.169946542242347e-08, + "loss": 1.5996, + "step": 11194 + }, + { + "epoch": 1.0, + "learning_rate": 8.783892878461153e-08, + "loss": 1.5273, + "step": 11195 + }, + { + "epoch": 1.0, + "learning_rate": 8.406140963101993e-08, + "loss": 1.5527, + "step": 11196 + }, + { + "epoch": 1.0, + "learning_rate": 8.036690827539772e-08, + "loss": 1.707, + "step": 11197 + }, + { + "epoch": 1.0, + "learning_rate": 7.675542502438848e-08, + "loss": 1.5703, + "step": 11198 + }, + { + "epoch": 1.0, + "learning_rate": 7.322696017797447e-08, + "loss": 1.4863, + "step": 11199 + }, + { + "epoch": 1.0, + "learning_rate": 6.97815140288105e-08, + "loss": 1.5938, + "step": 11200 + }, + { + "epoch": 1.0, + "learning_rate": 6.641908686322307e-08, + "loss": 1.625, + "step": 11201 + }, + { + "epoch": 1.0, + "learning_rate": 6.313967896032225e-08, + "loss": 1.5723, + "step": 11202 + }, + { + "epoch": 1.0, + "learning_rate": 5.994329059233472e-08, + "loss": 1.5312, + "step": 11203 + }, + { + "epoch": 1.0, + "learning_rate": 5.6829922024603794e-08, + "loss": 1.6113, + "step": 11204 + }, + { + "epoch": 1.0, + "learning_rate": 5.379957351570042e-08, + "loss": 1.6602, + "step": 11205 + }, + { + "epoch": 1.0, + "learning_rate": 5.085224531720112e-08, + "loss": 1.5117, + "step": 11206 + }, + { + "epoch": 1.0, + "learning_rate": 4.7987937673688034e-08, + "loss": 1.5254, + "step": 11207 + }, + { + "epoch": 1.0, + "learning_rate": 4.5206650823192977e-08, + "loss": 1.4785, + "step": 11208 + }, + { + "epoch": 1.0, + "learning_rate": 4.250838499642029e-08, + "loss": 1.582, + "step": 11209 + }, + { + "epoch": 1.0, + "learning_rate": 3.989314041752401e-08, + "loss": 1.3945, + "step": 11210 + }, + { + "epoch": 1.0, + "learning_rate": 3.7360917303552735e-08, + "loss": 1.6777, + "step": 11211 + }, + { + "epoch": 1.0, + "learning_rate": 3.49117158647827e-08, + "loss": 1.6055, + "step": 11212 + }, + { + "epoch": 1.0, + "learning_rate": 3.254553630460677e-08, + "loss": 1.6289, + "step": 11213 + }, + { + "epoch": 1.0, + "learning_rate": 3.02623788194234e-08, + "loss": 1.582, + "step": 11214 + }, + { + "epoch": 1.0, + "learning_rate": 2.806224359874765e-08, + "loss": 1.543, + "step": 11215 + }, + { + "epoch": 1.0, + "learning_rate": 2.594513082532224e-08, + "loss": 1.5918, + "step": 11216 + }, + { + "epoch": 1.0, + "learning_rate": 2.3911040674895468e-08, + "loss": 1.5449, + "step": 11217 + }, + { + "epoch": 1.0, + "learning_rate": 2.195997331633226e-08, + "loss": 1.6094, + "step": 11218 + }, + { + "epoch": 1.0, + "learning_rate": 2.0091928911614156e-08, + "loss": 1.6074, + "step": 11219 + }, + { + "epoch": 1.0, + "learning_rate": 1.830690761595033e-08, + "loss": 1.7402, + "step": 11220 + }, + { + "epoch": 1.0, + "learning_rate": 1.6604909577333517e-08, + "loss": 1.6074, + "step": 11221 + }, + { + "epoch": 1.0, + "learning_rate": 1.4985934937206124e-08, + "loss": 1.6191, + "step": 11222 + }, + { + "epoch": 1.0, + "learning_rate": 1.3449983829905143e-08, + "loss": 1.6582, + "step": 11223 + }, + { + "epoch": 1.0, + "learning_rate": 1.199705638310622e-08, + "loss": 1.5156, + "step": 11224 + }, + { + "epoch": 1.0, + "learning_rate": 1.062715271726855e-08, + "loss": 1.5996, + "step": 11225 + }, + { + "epoch": 1.0, + "learning_rate": 9.340272946189998e-09, + "loss": 1.6016, + "step": 11226 + }, + { + "epoch": 1.0, + "learning_rate": 8.136417176785039e-09, + "loss": 1.5781, + "step": 11227 + }, + { + "epoch": 1.0, + "learning_rate": 7.015585508862721e-09, + "loss": 1.5293, + "step": 11228 + }, + { + "epoch": 1.0, + "learning_rate": 5.97777803568178e-09, + "loss": 1.543, + "step": 11229 + }, + { + "epoch": 1.0, + "learning_rate": 5.022994843173478e-09, + "loss": 1.5859, + "step": 11230 + }, + { + "epoch": 1.0, + "learning_rate": 4.151236010718762e-09, + "loss": 1.5117, + "step": 11231 + }, + { + "epoch": 1.0, + "learning_rate": 3.3625016107041717e-09, + "loss": 1.5977, + "step": 11232 + }, + { + "epoch": 1.0, + "learning_rate": 2.6567917086328663e-09, + "loss": 1.6582, + "step": 11233 + }, + { + "epoch": 1.0, + "learning_rate": 2.0341063630135993e-09, + "loss": 1.5605, + "step": 11234 + }, + { + "epoch": 1.0, + "learning_rate": 1.4944456255827632e-09, + "loss": 1.5879, + "step": 11235 + }, + { + "epoch": 1.0, + "learning_rate": 1.0378095411933685e-09, + "loss": 1.5547, + "step": 11236 + }, + { + "epoch": 1.0, + "learning_rate": 6.641981477040204e-10, + "loss": 1.5352, + "step": 11237 + }, + { + "epoch": 1.0, + "learning_rate": 3.736114762009635e-10, + "loss": 1.5918, + "step": 11238 + }, + { + "epoch": 1.0, + "learning_rate": 1.6604955066501503e-10, + "loss": 1.5078, + "step": 11239 + }, + { + "epoch": 1.0, + "learning_rate": 4.15123885266766e-11, + "loss": 1.5586, + "step": 11240 + }, + { + "epoch": 1.0, + "learning_rate": 0.0, + "loss": 1.4893, + "step": 11241 + }, + { + "epoch": 1.0, + "step": 11241, + "total_flos": 9.459725673640755e+18, + "train_loss": 1.645774518920692, + "train_runtime": 28024.9955, + "train_samples_per_second": 25.669, + "train_steps_per_second": 0.401 + } + ], + "max_steps": 11241, + "num_train_epochs": 1, + "total_flos": 9.459725673640755e+18, + "trial_name": null, + "trial_params": null +}