{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9994569840896337, "eval_steps": 500, "global_step": 6904, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 0.0, "loss": 6.161, "step": 1 }, { "epoch": 0.0, "learning_rate": 0.0, "loss": 6.1682, "step": 2 }, { "epoch": 0.0, "learning_rate": 4.807692307692308e-06, "loss": 5.9098, "step": 3 }, { "epoch": 0.0, "learning_rate": 9.615384615384616e-06, "loss": 6.2003, "step": 4 }, { "epoch": 0.0, "learning_rate": 1.4423076923076924e-05, "loss": 6.1261, "step": 5 }, { "epoch": 0.0, "learning_rate": 1.923076923076923e-05, "loss": 5.9273, "step": 6 }, { "epoch": 0.0, "learning_rate": 2.4038461538461542e-05, "loss": 5.7911, "step": 7 }, { "epoch": 0.0, "learning_rate": 2.4038461538461542e-05, "loss": 5.6916, "step": 8 }, { "epoch": 0.0, "learning_rate": 2.884615384615385e-05, "loss": 5.4199, "step": 9 }, { "epoch": 0.0, "learning_rate": 2.884615384615385e-05, "loss": 5.2784, "step": 10 }, { "epoch": 0.0, "learning_rate": 3.365384615384615e-05, "loss": 5.4758, "step": 11 }, { "epoch": 0.0, "learning_rate": 3.846153846153846e-05, "loss": 5.4711, "step": 12 }, { "epoch": 0.0, "learning_rate": 4.3269230769230766e-05, "loss": 5.4178, "step": 13 }, { "epoch": 0.0, "learning_rate": 4.3269230769230766e-05, "loss": 5.1831, "step": 14 }, { "epoch": 0.0, "learning_rate": 4.8076923076923084e-05, "loss": 5.1222, "step": 15 }, { "epoch": 0.0, "learning_rate": 5.288461538461539e-05, "loss": 4.9677, "step": 16 }, { "epoch": 0.0, "learning_rate": 5.76923076923077e-05, "loss": 5.014, "step": 17 }, { "epoch": 0.01, "learning_rate": 6.25e-05, "loss": 4.8392, "step": 18 }, { "epoch": 0.01, "learning_rate": 6.73076923076923e-05, "loss": 4.8018, "step": 19 }, { "epoch": 0.01, "learning_rate": 7.211538461538461e-05, "loss": 4.8535, "step": 20 }, { "epoch": 0.01, "learning_rate": 7.692307692307693e-05, "loss": 4.5942, "step": 21 }, { "epoch": 0.01, "learning_rate": 8.173076923076923e-05, "loss": 4.5563, "step": 22 }, { "epoch": 0.01, "learning_rate": 8.653846153846153e-05, "loss": 4.6241, "step": 23 }, { "epoch": 0.01, "learning_rate": 9.134615384615384e-05, "loss": 4.6345, "step": 24 }, { "epoch": 0.01, "learning_rate": 9.615384615384617e-05, "loss": 4.5706, "step": 25 }, { "epoch": 0.01, "learning_rate": 0.00010096153846153847, "loss": 4.5761, "step": 26 }, { "epoch": 0.01, "learning_rate": 0.00010576923076923077, "loss": 4.5912, "step": 27 }, { "epoch": 0.01, "learning_rate": 0.00011057692307692308, "loss": 4.4882, "step": 28 }, { "epoch": 0.01, "learning_rate": 0.0001153846153846154, "loss": 4.5608, "step": 29 }, { "epoch": 0.01, "learning_rate": 0.0001201923076923077, "loss": 4.6495, "step": 30 }, { "epoch": 0.01, "learning_rate": 0.000125, "loss": 4.3678, "step": 31 }, { "epoch": 0.01, "learning_rate": 0.00012980769230769233, "loss": 4.5512, "step": 32 }, { "epoch": 0.01, "learning_rate": 0.0001346153846153846, "loss": 4.4474, "step": 33 }, { "epoch": 0.01, "learning_rate": 0.00013942307692307694, "loss": 4.2791, "step": 34 }, { "epoch": 0.01, "learning_rate": 0.00014423076923076922, "loss": 4.4434, "step": 35 }, { "epoch": 0.01, "learning_rate": 0.00014903846153846155, "loss": 4.3315, "step": 36 }, { "epoch": 0.01, "learning_rate": 0.00015384615384615385, "loss": 4.3086, "step": 37 }, { "epoch": 0.01, "learning_rate": 0.00015865384615384616, "loss": 4.4628, "step": 38 }, { "epoch": 0.01, "learning_rate": 0.00016346153846153846, "loss": 4.4949, "step": 39 }, { "epoch": 0.01, "learning_rate": 0.0001682692307692308, "loss": 4.5125, "step": 40 }, { "epoch": 0.01, "learning_rate": 0.00017307692307692307, "loss": 4.56, "step": 41 }, { "epoch": 0.01, "learning_rate": 0.0001778846153846154, "loss": 4.2873, "step": 42 }, { "epoch": 0.01, "learning_rate": 0.00018269230769230767, "loss": 4.2763, "step": 43 }, { "epoch": 0.01, "learning_rate": 0.0001875, "loss": 4.4362, "step": 44 }, { "epoch": 0.01, "learning_rate": 0.00019230769230769233, "loss": 4.2658, "step": 45 }, { "epoch": 0.01, "learning_rate": 0.0001971153846153846, "loss": 4.3142, "step": 46 }, { "epoch": 0.01, "learning_rate": 0.00020192307692307694, "loss": 4.2805, "step": 47 }, { "epoch": 0.01, "learning_rate": 0.00020673076923076922, "loss": 4.2614, "step": 48 }, { "epoch": 0.01, "learning_rate": 0.00021153846153846155, "loss": 4.3094, "step": 49 }, { "epoch": 0.01, "learning_rate": 0.00021634615384615385, "loss": 4.2918, "step": 50 }, { "epoch": 0.01, "learning_rate": 0.00022115384615384616, "loss": 4.2968, "step": 51 }, { "epoch": 0.02, "learning_rate": 0.00022596153846153846, "loss": 4.2043, "step": 52 }, { "epoch": 0.02, "learning_rate": 0.0002307692307692308, "loss": 4.2329, "step": 53 }, { "epoch": 0.02, "learning_rate": 0.00023557692307692307, "loss": 4.3264, "step": 54 }, { "epoch": 0.02, "learning_rate": 0.0002403846153846154, "loss": 4.3367, "step": 55 }, { "epoch": 0.02, "learning_rate": 0.0002451923076923077, "loss": 4.1987, "step": 56 }, { "epoch": 0.02, "learning_rate": 0.00025, "loss": 4.1895, "step": 57 }, { "epoch": 0.02, "learning_rate": 0.0002548076923076923, "loss": 4.1371, "step": 58 }, { "epoch": 0.02, "learning_rate": 0.00025961538461538467, "loss": 4.2447, "step": 59 }, { "epoch": 0.02, "learning_rate": 0.0002644230769230769, "loss": 4.1681, "step": 60 }, { "epoch": 0.02, "learning_rate": 0.0002692307692307692, "loss": 4.1076, "step": 61 }, { "epoch": 0.02, "learning_rate": 0.0002740384615384616, "loss": 4.0651, "step": 62 }, { "epoch": 0.02, "learning_rate": 0.0002788461538461539, "loss": 4.0728, "step": 63 }, { "epoch": 0.02, "learning_rate": 0.00028365384615384613, "loss": 3.9783, "step": 64 }, { "epoch": 0.02, "learning_rate": 0.00028846153846153843, "loss": 4.1753, "step": 65 }, { "epoch": 0.02, "learning_rate": 0.0002932692307692308, "loss": 4.1236, "step": 66 }, { "epoch": 0.02, "learning_rate": 0.0002980769230769231, "loss": 4.1174, "step": 67 }, { "epoch": 0.02, "learning_rate": 0.00030288461538461535, "loss": 3.8908, "step": 68 }, { "epoch": 0.02, "learning_rate": 0.0003076923076923077, "loss": 4.1196, "step": 69 }, { "epoch": 0.02, "learning_rate": 0.0003125, "loss": 4.1691, "step": 70 }, { "epoch": 0.02, "learning_rate": 0.0003173076923076923, "loss": 4.2177, "step": 71 }, { "epoch": 0.02, "learning_rate": 0.00032211538461538467, "loss": 4.1797, "step": 72 }, { "epoch": 0.02, "learning_rate": 0.0003269230769230769, "loss": 4.1321, "step": 73 }, { "epoch": 0.02, "learning_rate": 0.0003317307692307692, "loss": 3.9375, "step": 74 }, { "epoch": 0.02, "learning_rate": 0.0003365384615384616, "loss": 3.9576, "step": 75 }, { "epoch": 0.02, "learning_rate": 0.0003413461538461539, "loss": 3.9569, "step": 76 }, { "epoch": 0.02, "learning_rate": 0.00034615384615384613, "loss": 4.0711, "step": 77 }, { "epoch": 0.02, "learning_rate": 0.00035096153846153844, "loss": 4.0995, "step": 78 }, { "epoch": 0.02, "learning_rate": 0.0003557692307692308, "loss": 3.912, "step": 79 }, { "epoch": 0.02, "learning_rate": 0.0003605769230769231, "loss": 3.8976, "step": 80 }, { "epoch": 0.02, "learning_rate": 0.00036538461538461535, "loss": 4.0411, "step": 81 }, { "epoch": 0.02, "learning_rate": 0.0003701923076923077, "loss": 4.0273, "step": 82 }, { "epoch": 0.02, "learning_rate": 0.000375, "loss": 3.9225, "step": 83 }, { "epoch": 0.02, "learning_rate": 0.0003798076923076923, "loss": 3.9683, "step": 84 }, { "epoch": 0.02, "learning_rate": 0.00038461538461538467, "loss": 3.7877, "step": 85 }, { "epoch": 0.02, "learning_rate": 0.0003894230769230769, "loss": 3.9174, "step": 86 }, { "epoch": 0.03, "learning_rate": 0.0003942307692307692, "loss": 3.9229, "step": 87 }, { "epoch": 0.03, "learning_rate": 0.0003990384615384616, "loss": 3.9332, "step": 88 }, { "epoch": 0.03, "learning_rate": 0.0004038461538461539, "loss": 3.9367, "step": 89 }, { "epoch": 0.03, "learning_rate": 0.00040865384615384613, "loss": 3.9915, "step": 90 }, { "epoch": 0.03, "learning_rate": 0.00041346153846153844, "loss": 3.8203, "step": 91 }, { "epoch": 0.03, "learning_rate": 0.0004182692307692308, "loss": 4.0365, "step": 92 }, { "epoch": 0.03, "learning_rate": 0.0004230769230769231, "loss": 3.8866, "step": 93 }, { "epoch": 0.03, "learning_rate": 0.00042788461538461535, "loss": 3.8819, "step": 94 }, { "epoch": 0.03, "learning_rate": 0.0004326923076923077, "loss": 3.9308, "step": 95 }, { "epoch": 0.03, "learning_rate": 0.0004375, "loss": 3.8048, "step": 96 }, { "epoch": 0.03, "learning_rate": 0.0004423076923076923, "loss": 3.9687, "step": 97 }, { "epoch": 0.03, "learning_rate": 0.00044711538461538467, "loss": 3.8629, "step": 98 }, { "epoch": 0.03, "learning_rate": 0.0004519230769230769, "loss": 3.9706, "step": 99 }, { "epoch": 0.03, "learning_rate": 0.0004567307692307692, "loss": 3.881, "step": 100 }, { "epoch": 0.03, "learning_rate": 0.0004615384615384616, "loss": 4.0128, "step": 101 }, { "epoch": 0.03, "learning_rate": 0.0004663461538461539, "loss": 3.8763, "step": 102 }, { "epoch": 0.03, "learning_rate": 0.00047115384615384613, "loss": 3.7673, "step": 103 }, { "epoch": 0.03, "learning_rate": 0.00047596153846153844, "loss": 3.8364, "step": 104 }, { "epoch": 0.03, "learning_rate": 0.0004807692307692308, "loss": 3.8304, "step": 105 }, { "epoch": 0.03, "learning_rate": 0.0004855769230769231, "loss": 3.837, "step": 106 }, { "epoch": 0.03, "learning_rate": 0.0004903846153846154, "loss": 3.9548, "step": 107 }, { "epoch": 0.03, "learning_rate": 0.0004951923076923077, "loss": 3.9055, "step": 108 }, { "epoch": 0.03, "learning_rate": 0.0005, "loss": 3.898, "step": 109 }, { "epoch": 0.03, "learning_rate": 0.0005048076923076923, "loss": 3.9116, "step": 110 }, { "epoch": 0.03, "learning_rate": 0.0005096153846153846, "loss": 3.7062, "step": 111 }, { "epoch": 0.03, "learning_rate": 0.0005144230769230769, "loss": 3.8398, "step": 112 }, { "epoch": 0.03, "learning_rate": 0.0005192307692307693, "loss": 4.0592, "step": 113 }, { "epoch": 0.03, "learning_rate": 0.0005240384615384615, "loss": 3.7921, "step": 114 }, { "epoch": 0.03, "learning_rate": 0.0005288461538461538, "loss": 3.7489, "step": 115 }, { "epoch": 0.03, "learning_rate": 0.0005336538461538461, "loss": 3.9802, "step": 116 }, { "epoch": 0.03, "learning_rate": 0.0005384615384615384, "loss": 3.7342, "step": 117 }, { "epoch": 0.03, "learning_rate": 0.0005432692307692307, "loss": 3.9971, "step": 118 }, { "epoch": 0.03, "learning_rate": 0.0005480769230769232, "loss": 3.907, "step": 119 }, { "epoch": 0.03, "learning_rate": 0.0005528846153846155, "loss": 3.7726, "step": 120 }, { "epoch": 0.04, "learning_rate": 0.0005576923076923078, "loss": 3.7401, "step": 121 }, { "epoch": 0.04, "learning_rate": 0.0005625000000000001, "loss": 3.7839, "step": 122 }, { "epoch": 0.04, "learning_rate": 0.0005673076923076923, "loss": 3.9549, "step": 123 }, { "epoch": 0.04, "learning_rate": 0.0005721153846153846, "loss": 3.8995, "step": 124 }, { "epoch": 0.04, "learning_rate": 0.0005769230769230769, "loss": 3.8765, "step": 125 }, { "epoch": 0.04, "learning_rate": 0.0005817307692307693, "loss": 3.8703, "step": 126 }, { "epoch": 0.04, "learning_rate": 0.0005865384615384616, "loss": 3.7604, "step": 127 }, { "epoch": 0.04, "learning_rate": 0.0005913461538461539, "loss": 3.8104, "step": 128 }, { "epoch": 0.04, "learning_rate": 0.0005961538461538462, "loss": 3.8254, "step": 129 }, { "epoch": 0.04, "learning_rate": 0.0006009615384615385, "loss": 3.8615, "step": 130 }, { "epoch": 0.04, "learning_rate": 0.0006057692307692307, "loss": 3.8292, "step": 131 }, { "epoch": 0.04, "learning_rate": 0.0006105769230769231, "loss": 3.8351, "step": 132 }, { "epoch": 0.04, "learning_rate": 0.0006153846153846154, "loss": 3.7863, "step": 133 }, { "epoch": 0.04, "learning_rate": 0.0006201923076923077, "loss": 3.7492, "step": 134 }, { "epoch": 0.04, "learning_rate": 0.000625, "loss": 3.7104, "step": 135 }, { "epoch": 0.04, "learning_rate": 0.0006298076923076923, "loss": 3.9247, "step": 136 }, { "epoch": 0.04, "learning_rate": 0.0006346153846153846, "loss": 3.8404, "step": 137 }, { "epoch": 0.04, "learning_rate": 0.0006394230769230769, "loss": 3.7626, "step": 138 }, { "epoch": 0.04, "learning_rate": 0.0006442307692307693, "loss": 3.6955, "step": 139 }, { "epoch": 0.04, "learning_rate": 0.0006490384615384615, "loss": 3.722, "step": 140 }, { "epoch": 0.04, "learning_rate": 0.0006538461538461538, "loss": 3.7595, "step": 141 }, { "epoch": 0.04, "learning_rate": 0.0006586538461538461, "loss": 3.9546, "step": 142 }, { "epoch": 0.04, "learning_rate": 0.0006634615384615384, "loss": 3.9042, "step": 143 }, { "epoch": 0.04, "learning_rate": 0.0006682692307692307, "loss": 3.7585, "step": 144 }, { "epoch": 0.04, "learning_rate": 0.0006730769230769232, "loss": 3.6973, "step": 145 }, { "epoch": 0.04, "learning_rate": 0.0006778846153846155, "loss": 3.8009, "step": 146 }, { "epoch": 0.04, "learning_rate": 0.0006826923076923078, "loss": 3.7775, "step": 147 }, { "epoch": 0.04, "learning_rate": 0.0006875, "loss": 3.7912, "step": 148 }, { "epoch": 0.04, "learning_rate": 0.0006923076923076923, "loss": 3.6862, "step": 149 }, { "epoch": 0.04, "learning_rate": 0.0006971153846153846, "loss": 3.8582, "step": 150 }, { "epoch": 0.04, "learning_rate": 0.0007019230769230769, "loss": 3.8413, "step": 151 }, { "epoch": 0.04, "learning_rate": 0.0007067307692307693, "loss": 3.7278, "step": 152 }, { "epoch": 0.04, "learning_rate": 0.0007115384615384616, "loss": 3.7517, "step": 153 }, { "epoch": 0.04, "learning_rate": 0.0007163461538461539, "loss": 3.702, "step": 154 }, { "epoch": 0.04, "learning_rate": 0.0007211538461538462, "loss": 3.7415, "step": 155 }, { "epoch": 0.05, "learning_rate": 0.0007259615384615385, "loss": 3.9373, "step": 156 }, { "epoch": 0.05, "learning_rate": 0.0007307692307692307, "loss": 3.7964, "step": 157 }, { "epoch": 0.05, "learning_rate": 0.0007355769230769231, "loss": 3.7393, "step": 158 }, { "epoch": 0.05, "learning_rate": 0.0007403846153846154, "loss": 3.7706, "step": 159 }, { "epoch": 0.05, "learning_rate": 0.0007451923076923077, "loss": 3.6625, "step": 160 }, { "epoch": 0.05, "learning_rate": 0.00075, "loss": 3.8063, "step": 161 }, { "epoch": 0.05, "learning_rate": 0.0007548076923076923, "loss": 3.6056, "step": 162 }, { "epoch": 0.05, "learning_rate": 0.0007596153846153846, "loss": 3.8231, "step": 163 }, { "epoch": 0.05, "learning_rate": 0.0007644230769230769, "loss": 3.7259, "step": 164 }, { "epoch": 0.05, "learning_rate": 0.0007692307692307693, "loss": 3.826, "step": 165 }, { "epoch": 0.05, "learning_rate": 0.0007740384615384615, "loss": 3.7214, "step": 166 }, { "epoch": 0.05, "learning_rate": 0.0007788461538461538, "loss": 3.7655, "step": 167 }, { "epoch": 0.05, "learning_rate": 0.0007836538461538461, "loss": 3.8019, "step": 168 }, { "epoch": 0.05, "learning_rate": 0.0007884615384615384, "loss": 3.925, "step": 169 }, { "epoch": 0.05, "learning_rate": 0.0007932692307692307, "loss": 3.8384, "step": 170 }, { "epoch": 0.05, "learning_rate": 0.0007980769230769232, "loss": 3.688, "step": 171 }, { "epoch": 0.05, "learning_rate": 0.0008028846153846155, "loss": 3.7548, "step": 172 }, { "epoch": 0.05, "learning_rate": 0.0008076923076923078, "loss": 3.5522, "step": 173 }, { "epoch": 0.05, "learning_rate": 0.0008125000000000001, "loss": 3.7104, "step": 174 }, { "epoch": 0.05, "learning_rate": 0.0008173076923076923, "loss": 3.7556, "step": 175 }, { "epoch": 0.05, "learning_rate": 0.0008221153846153846, "loss": 3.7124, "step": 176 }, { "epoch": 0.05, "learning_rate": 0.0008269230769230769, "loss": 3.6851, "step": 177 }, { "epoch": 0.05, "learning_rate": 0.0008317307692307693, "loss": 3.777, "step": 178 }, { "epoch": 0.05, "learning_rate": 0.0008365384615384616, "loss": 3.6594, "step": 179 }, { "epoch": 0.05, "learning_rate": 0.0008413461538461539, "loss": 3.7439, "step": 180 }, { "epoch": 0.05, "learning_rate": 0.0008461538461538462, "loss": 3.8061, "step": 181 }, { "epoch": 0.05, "learning_rate": 0.0008509615384615385, "loss": 3.7258, "step": 182 }, { "epoch": 0.05, "learning_rate": 0.0008557692307692307, "loss": 3.6793, "step": 183 }, { "epoch": 0.05, "learning_rate": 0.0008605769230769231, "loss": 3.6215, "step": 184 }, { "epoch": 0.05, "learning_rate": 0.0008653846153846154, "loss": 3.7049, "step": 185 }, { "epoch": 0.05, "learning_rate": 0.0008701923076923077, "loss": 3.8242, "step": 186 }, { "epoch": 0.05, "learning_rate": 0.000875, "loss": 3.778, "step": 187 }, { "epoch": 0.05, "learning_rate": 0.0008798076923076923, "loss": 3.6382, "step": 188 }, { "epoch": 0.05, "learning_rate": 0.0008846153846153846, "loss": 3.7708, "step": 189 }, { "epoch": 0.06, "learning_rate": 0.0008894230769230769, "loss": 3.6522, "step": 190 }, { "epoch": 0.06, "learning_rate": 0.0008942307692307693, "loss": 3.7645, "step": 191 }, { "epoch": 0.06, "learning_rate": 0.0008990384615384615, "loss": 3.7917, "step": 192 }, { "epoch": 0.06, "learning_rate": 0.0009038461538461538, "loss": 3.6763, "step": 193 }, { "epoch": 0.06, "learning_rate": 0.0009086538461538461, "loss": 3.7301, "step": 194 }, { "epoch": 0.06, "learning_rate": 0.0009134615384615384, "loss": 3.5736, "step": 195 }, { "epoch": 0.06, "learning_rate": 0.0009182692307692308, "loss": 3.6986, "step": 196 }, { "epoch": 0.06, "learning_rate": 0.0009230769230769232, "loss": 3.6325, "step": 197 }, { "epoch": 0.06, "learning_rate": 0.0009278846153846155, "loss": 3.8108, "step": 198 }, { "epoch": 0.06, "learning_rate": 0.0009326923076923078, "loss": 3.8029, "step": 199 }, { "epoch": 0.06, "learning_rate": 0.0009375, "loss": 3.7307, "step": 200 }, { "epoch": 0.06, "learning_rate": 0.0009423076923076923, "loss": 3.7974, "step": 201 }, { "epoch": 0.06, "learning_rate": 0.0009471153846153846, "loss": 3.8639, "step": 202 }, { "epoch": 0.06, "learning_rate": 0.0009519230769230769, "loss": 3.6149, "step": 203 }, { "epoch": 0.06, "learning_rate": 0.0009567307692307693, "loss": 3.7643, "step": 204 }, { "epoch": 0.06, "learning_rate": 0.0009615384615384616, "loss": 3.6839, "step": 205 }, { "epoch": 0.06, "learning_rate": 0.0009663461538461539, "loss": 3.7104, "step": 206 }, { "epoch": 0.06, "learning_rate": 0.0009711538461538462, "loss": 3.7614, "step": 207 }, { "epoch": 0.06, "learning_rate": 0.0009759615384615385, "loss": 3.6576, "step": 208 }, { "epoch": 0.06, "learning_rate": 0.0009807692307692308, "loss": 3.7488, "step": 209 }, { "epoch": 0.06, "learning_rate": 0.0009855769230769232, "loss": 3.7574, "step": 210 }, { "epoch": 0.06, "learning_rate": 0.0009903846153846154, "loss": 3.8157, "step": 211 }, { "epoch": 0.06, "learning_rate": 0.0009951923076923078, "loss": 3.6628, "step": 212 }, { "epoch": 0.06, "learning_rate": 0.001, "loss": 3.6348, "step": 213 }, { "epoch": 0.06, "learning_rate": 0.000999999944968816, "loss": 3.6941, "step": 214 }, { "epoch": 0.06, "learning_rate": 0.0009999997798752765, "loss": 3.8112, "step": 215 }, { "epoch": 0.06, "learning_rate": 0.0009999995047194174, "loss": 3.8529, "step": 216 }, { "epoch": 0.06, "learning_rate": 0.0009999991195012996, "loss": 3.5949, "step": 217 }, { "epoch": 0.06, "learning_rate": 0.0009999986242210078, "loss": 3.6972, "step": 218 }, { "epoch": 0.06, "learning_rate": 0.000999998018878651, "loss": 3.6609, "step": 219 }, { "epoch": 0.06, "learning_rate": 0.0009999973034743625, "loss": 3.6864, "step": 220 }, { "epoch": 0.06, "learning_rate": 0.0009999964780082996, "loss": 3.8264, "step": 221 }, { "epoch": 0.06, "learning_rate": 0.0009999955424806442, "loss": 3.6914, "step": 222 }, { "epoch": 0.06, "learning_rate": 0.0009999944968916022, "loss": 3.7121, "step": 223 }, { "epoch": 0.06, "learning_rate": 0.0009999933412414039, "loss": 3.7541, "step": 224 }, { "epoch": 0.07, "learning_rate": 0.0009999920755303033, "loss": 3.8026, "step": 225 }, { "epoch": 0.07, "learning_rate": 0.0009999906997585793, "loss": 3.5693, "step": 226 }, { "epoch": 0.07, "learning_rate": 0.0009999892139265346, "loss": 3.6715, "step": 227 }, { "epoch": 0.07, "learning_rate": 0.0009999876180344966, "loss": 3.7445, "step": 228 }, { "epoch": 0.07, "learning_rate": 0.0009999859120828163, "loss": 3.796, "step": 229 }, { "epoch": 0.07, "learning_rate": 0.000999984096071869, "loss": 3.657, "step": 230 }, { "epoch": 0.07, "learning_rate": 0.000999982170002055, "loss": 3.6771, "step": 231 }, { "epoch": 0.07, "learning_rate": 0.0009999801338737978, "loss": 3.8769, "step": 232 }, { "epoch": 0.07, "learning_rate": 0.0009999779876875459, "loss": 3.6235, "step": 233 }, { "epoch": 0.07, "learning_rate": 0.0009999757314437716, "loss": 3.6209, "step": 234 }, { "epoch": 0.07, "learning_rate": 0.0009999733651429716, "loss": 3.6345, "step": 235 }, { "epoch": 0.07, "learning_rate": 0.0009999708887856666, "loss": 3.7145, "step": 236 }, { "epoch": 0.07, "learning_rate": 0.000999968302372402, "loss": 3.8341, "step": 237 }, { "epoch": 0.07, "learning_rate": 0.0009999656059037471, "loss": 3.5662, "step": 238 }, { "epoch": 0.07, "learning_rate": 0.0009999627993802953, "loss": 3.8158, "step": 239 }, { "epoch": 0.07, "learning_rate": 0.0009999598828026644, "loss": 3.6846, "step": 240 }, { "epoch": 0.07, "learning_rate": 0.0009999568561714965, "loss": 3.6518, "step": 241 }, { "epoch": 0.07, "learning_rate": 0.0009999537194874577, "loss": 3.7659, "step": 242 }, { "epoch": 0.07, "learning_rate": 0.0009999504727512384, "loss": 3.6396, "step": 243 }, { "epoch": 0.07, "learning_rate": 0.0009999471159635538, "loss": 3.6343, "step": 244 }, { "epoch": 0.07, "learning_rate": 0.0009999436491251424, "loss": 3.758, "step": 245 }, { "epoch": 0.07, "learning_rate": 0.0009999400722367673, "loss": 3.6296, "step": 246 }, { "epoch": 0.07, "learning_rate": 0.0009999363852992158, "loss": 3.7105, "step": 247 }, { "epoch": 0.07, "learning_rate": 0.0009999325883132998, "loss": 3.7128, "step": 248 }, { "epoch": 0.07, "learning_rate": 0.000999928681279855, "loss": 3.7588, "step": 249 }, { "epoch": 0.07, "learning_rate": 0.0009999246641997413, "loss": 3.7156, "step": 250 }, { "epoch": 0.07, "learning_rate": 0.000999920537073843, "loss": 3.7028, "step": 251 }, { "epoch": 0.07, "learning_rate": 0.0009999162999030687, "loss": 3.5924, "step": 252 }, { "epoch": 0.07, "learning_rate": 0.000999911952688351, "loss": 3.7117, "step": 253 }, { "epoch": 0.07, "learning_rate": 0.0009999074954306468, "loss": 3.7178, "step": 254 }, { "epoch": 0.07, "learning_rate": 0.0009999029281309376, "loss": 3.7393, "step": 255 }, { "epoch": 0.07, "learning_rate": 0.0009998982507902281, "loss": 3.7616, "step": 256 }, { "epoch": 0.07, "learning_rate": 0.0009998934634095485, "loss": 3.7342, "step": 257 }, { "epoch": 0.07, "learning_rate": 0.0009998885659899524, "loss": 3.6621, "step": 258 }, { "epoch": 0.08, "learning_rate": 0.0009998835585325178, "loss": 3.5621, "step": 259 }, { "epoch": 0.08, "learning_rate": 0.000999878441038347, "loss": 3.7805, "step": 260 }, { "epoch": 0.08, "learning_rate": 0.0009998732135085666, "loss": 3.6375, "step": 261 }, { "epoch": 0.08, "learning_rate": 0.0009998678759443273, "loss": 3.8603, "step": 262 }, { "epoch": 0.08, "learning_rate": 0.0009998624283468036, "loss": 3.7728, "step": 263 }, { "epoch": 0.08, "learning_rate": 0.000999856870717195, "loss": 3.6819, "step": 264 }, { "epoch": 0.08, "learning_rate": 0.0009998512030567252, "loss": 3.625, "step": 265 }, { "epoch": 0.08, "learning_rate": 0.0009998454253666413, "loss": 3.7778, "step": 266 }, { "epoch": 0.08, "learning_rate": 0.0009998395376482152, "loss": 3.6591, "step": 267 }, { "epoch": 0.08, "learning_rate": 0.0009998335399027433, "loss": 3.6708, "step": 268 }, { "epoch": 0.08, "learning_rate": 0.0009998274321315452, "loss": 3.6223, "step": 269 }, { "epoch": 0.08, "learning_rate": 0.0009998212143359658, "loss": 3.6819, "step": 270 }, { "epoch": 0.08, "learning_rate": 0.0009998148865173737, "loss": 3.7105, "step": 271 }, { "epoch": 0.08, "learning_rate": 0.000999808448677162, "loss": 3.5446, "step": 272 }, { "epoch": 0.08, "learning_rate": 0.0009998019008167476, "loss": 3.611, "step": 273 }, { "epoch": 0.08, "learning_rate": 0.000999795242937572, "loss": 3.7123, "step": 274 }, { "epoch": 0.08, "learning_rate": 0.0009997884750411004, "loss": 3.6479, "step": 275 }, { "epoch": 0.08, "learning_rate": 0.0009997815971288229, "loss": 3.664, "step": 276 }, { "epoch": 0.08, "learning_rate": 0.0009997746092022535, "loss": 3.5848, "step": 277 }, { "epoch": 0.08, "learning_rate": 0.0009997675112629304, "loss": 3.6461, "step": 278 }, { "epoch": 0.08, "learning_rate": 0.000999760303312416, "loss": 3.5358, "step": 279 }, { "epoch": 0.08, "learning_rate": 0.0009997529853522967, "loss": 3.7093, "step": 280 }, { "epoch": 0.08, "learning_rate": 0.0009997455573841837, "loss": 3.7852, "step": 281 }, { "epoch": 0.08, "learning_rate": 0.0009997380194097121, "loss": 3.7664, "step": 282 }, { "epoch": 0.08, "learning_rate": 0.0009997303714305412, "loss": 3.6975, "step": 283 }, { "epoch": 0.08, "learning_rate": 0.0009997226134483542, "loss": 3.5344, "step": 284 }, { "epoch": 0.08, "learning_rate": 0.000999714745464859, "loss": 3.7226, "step": 285 }, { "epoch": 0.08, "learning_rate": 0.0009997067674817877, "loss": 3.704, "step": 286 }, { "epoch": 0.08, "learning_rate": 0.0009996986795008961, "loss": 3.8532, "step": 287 }, { "epoch": 0.08, "learning_rate": 0.000999690481523965, "loss": 3.7976, "step": 288 }, { "epoch": 0.08, "learning_rate": 0.0009996821735527987, "loss": 3.7236, "step": 289 }, { "epoch": 0.08, "learning_rate": 0.0009996737555892257, "loss": 3.6443, "step": 290 }, { "epoch": 0.08, "learning_rate": 0.0009996652276350996, "loss": 3.6849, "step": 291 }, { "epoch": 0.08, "learning_rate": 0.0009996565896922974, "loss": 3.6088, "step": 292 }, { "epoch": 0.08, "learning_rate": 0.0009996478417627203, "loss": 3.5341, "step": 293 }, { "epoch": 0.09, "learning_rate": 0.0009996389838482942, "loss": 3.7067, "step": 294 }, { "epoch": 0.09, "learning_rate": 0.000999630015950969, "loss": 3.7872, "step": 295 }, { "epoch": 0.09, "learning_rate": 0.0009996209380727184, "loss": 3.7297, "step": 296 }, { "epoch": 0.09, "learning_rate": 0.0009996117502155408, "loss": 3.655, "step": 297 }, { "epoch": 0.09, "learning_rate": 0.000999602452381459, "loss": 3.6808, "step": 298 }, { "epoch": 0.09, "learning_rate": 0.0009995930445725193, "loss": 3.5957, "step": 299 }, { "epoch": 0.09, "learning_rate": 0.0009995835267907927, "loss": 3.57, "step": 300 }, { "epoch": 0.09, "learning_rate": 0.0009995738990383743, "loss": 3.6477, "step": 301 }, { "epoch": 0.09, "learning_rate": 0.0009995641613173834, "loss": 3.689, "step": 302 }, { "epoch": 0.09, "learning_rate": 0.0009995543136299637, "loss": 3.6932, "step": 303 }, { "epoch": 0.09, "learning_rate": 0.0009995443559782827, "loss": 3.6288, "step": 304 }, { "epoch": 0.09, "learning_rate": 0.0009995342883645324, "loss": 3.7541, "step": 305 }, { "epoch": 0.09, "learning_rate": 0.000999524110790929, "loss": 3.7432, "step": 306 }, { "epoch": 0.09, "learning_rate": 0.0009995138232597126, "loss": 3.7528, "step": 307 }, { "epoch": 0.09, "learning_rate": 0.000999503425773148, "loss": 3.7018, "step": 308 }, { "epoch": 0.09, "learning_rate": 0.0009994929183335236, "loss": 3.6876, "step": 309 }, { "epoch": 0.09, "learning_rate": 0.0009994823009431529, "loss": 3.7658, "step": 310 }, { "epoch": 0.09, "learning_rate": 0.0009994715736043726, "loss": 3.4898, "step": 311 }, { "epoch": 0.09, "learning_rate": 0.0009994607363195442, "loss": 3.6119, "step": 312 }, { "epoch": 0.09, "learning_rate": 0.0009994497890910533, "loss": 3.6885, "step": 313 }, { "epoch": 0.09, "learning_rate": 0.0009994387319213094, "loss": 3.7195, "step": 314 }, { "epoch": 0.09, "learning_rate": 0.000999427564812747, "loss": 3.6904, "step": 315 }, { "epoch": 0.09, "learning_rate": 0.0009994162877678236, "loss": 3.7741, "step": 316 }, { "epoch": 0.09, "learning_rate": 0.0009994049007890221, "loss": 3.709, "step": 317 }, { "epoch": 0.09, "learning_rate": 0.0009993934038788487, "loss": 3.679, "step": 318 }, { "epoch": 0.09, "learning_rate": 0.0009993817970398342, "loss": 3.7931, "step": 319 }, { "epoch": 0.09, "learning_rate": 0.0009993700802745338, "loss": 3.7332, "step": 320 }, { "epoch": 0.09, "learning_rate": 0.0009993582535855264, "loss": 3.6608, "step": 321 }, { "epoch": 0.09, "learning_rate": 0.0009993463169754154, "loss": 3.5741, "step": 322 }, { "epoch": 0.09, "learning_rate": 0.0009993342704468285, "loss": 3.7506, "step": 323 }, { "epoch": 0.09, "learning_rate": 0.000999322114002417, "loss": 3.6312, "step": 324 }, { "epoch": 0.09, "learning_rate": 0.0009993098476448576, "loss": 3.5923, "step": 325 }, { "epoch": 0.09, "learning_rate": 0.0009992974713768498, "loss": 3.5332, "step": 326 }, { "epoch": 0.09, "learning_rate": 0.000999284985201118, "loss": 3.634, "step": 327 }, { "epoch": 0.09, "learning_rate": 0.0009992723891204106, "loss": 3.5849, "step": 328 }, { "epoch": 0.1, "learning_rate": 0.000999259683137501, "loss": 3.6371, "step": 329 }, { "epoch": 0.1, "learning_rate": 0.0009992468672551853, "loss": 3.6672, "step": 330 }, { "epoch": 0.1, "learning_rate": 0.0009992339414762851, "loss": 3.6747, "step": 331 }, { "epoch": 0.1, "learning_rate": 0.0009992209058036455, "loss": 3.6194, "step": 332 }, { "epoch": 0.1, "learning_rate": 0.0009992077602401356, "loss": 3.6531, "step": 333 }, { "epoch": 0.1, "learning_rate": 0.00099919450478865, "loss": 3.6961, "step": 334 }, { "epoch": 0.1, "learning_rate": 0.0009991811394521055, "loss": 3.7749, "step": 335 }, { "epoch": 0.1, "learning_rate": 0.0009991676642334447, "loss": 3.8673, "step": 336 }, { "epoch": 0.1, "learning_rate": 0.0009991540791356342, "loss": 3.7134, "step": 337 }, { "epoch": 0.1, "learning_rate": 0.0009991403841616636, "loss": 3.7489, "step": 338 }, { "epoch": 0.1, "learning_rate": 0.0009991265793145479, "loss": 3.834, "step": 339 }, { "epoch": 0.1, "learning_rate": 0.0009991126645973259, "loss": 3.827, "step": 340 }, { "epoch": 0.1, "learning_rate": 0.0009990986400130607, "loss": 3.7506, "step": 341 }, { "epoch": 0.1, "learning_rate": 0.000999084505564839, "loss": 3.6117, "step": 342 }, { "epoch": 0.1, "learning_rate": 0.0009990702612557728, "loss": 3.7151, "step": 343 }, { "epoch": 0.1, "learning_rate": 0.0009990559070889968, "loss": 3.8299, "step": 344 }, { "epoch": 0.1, "learning_rate": 0.0009990414430676715, "loss": 3.7023, "step": 345 }, { "epoch": 0.1, "learning_rate": 0.0009990268691949804, "loss": 3.7124, "step": 346 }, { "epoch": 0.1, "learning_rate": 0.0009990121854741316, "loss": 3.6357, "step": 347 }, { "epoch": 0.1, "learning_rate": 0.0009989973919083575, "loss": 3.7341, "step": 348 }, { "epoch": 0.1, "learning_rate": 0.0009989824885009142, "loss": 3.7055, "step": 349 }, { "epoch": 0.1, "learning_rate": 0.0009989674752550826, "loss": 3.6428, "step": 350 }, { "epoch": 0.1, "learning_rate": 0.0009989523521741674, "loss": 3.7944, "step": 351 }, { "epoch": 0.1, "learning_rate": 0.0009989371192614977, "loss": 3.6954, "step": 352 }, { "epoch": 0.1, "learning_rate": 0.0009989217765204266, "loss": 3.7625, "step": 353 }, { "epoch": 0.1, "learning_rate": 0.000998906323954331, "loss": 3.6765, "step": 354 }, { "epoch": 0.1, "learning_rate": 0.0009988907615666128, "loss": 3.6974, "step": 355 }, { "epoch": 0.1, "learning_rate": 0.0009988750893606976, "loss": 3.6291, "step": 356 }, { "epoch": 0.1, "learning_rate": 0.0009988593073400353, "loss": 3.6986, "step": 357 }, { "epoch": 0.1, "learning_rate": 0.0009988434155080998, "loss": 3.7308, "step": 358 }, { "epoch": 0.1, "learning_rate": 0.0009988274138683895, "loss": 3.6717, "step": 359 }, { "epoch": 0.1, "learning_rate": 0.0009988113024244263, "loss": 3.6114, "step": 360 }, { "epoch": 0.1, "learning_rate": 0.0009987950811797572, "loss": 3.7942, "step": 361 }, { "epoch": 0.1, "learning_rate": 0.0009987787501379527, "loss": 3.7083, "step": 362 }, { "epoch": 0.11, "learning_rate": 0.0009987623093026074, "loss": 3.7808, "step": 363 }, { "epoch": 0.11, "learning_rate": 0.0009987457586773409, "loss": 3.5259, "step": 364 }, { "epoch": 0.11, "learning_rate": 0.000998729098265796, "loss": 3.6476, "step": 365 }, { "epoch": 0.11, "learning_rate": 0.0009987123280716403, "loss": 3.7561, "step": 366 }, { "epoch": 0.11, "learning_rate": 0.000998695448098565, "loss": 3.7506, "step": 367 }, { "epoch": 0.11, "learning_rate": 0.0009986784583502861, "loss": 3.616, "step": 368 }, { "epoch": 0.11, "learning_rate": 0.0009986613588305435, "loss": 3.629, "step": 369 }, { "epoch": 0.11, "learning_rate": 0.000998644149543101, "loss": 3.9004, "step": 370 }, { "epoch": 0.11, "learning_rate": 0.0009986268304917468, "loss": 4.0177, "step": 371 }, { "epoch": 0.11, "learning_rate": 0.0009986094016802935, "loss": 3.8866, "step": 372 }, { "epoch": 0.11, "learning_rate": 0.0009985918631125773, "loss": 4.063, "step": 373 }, { "epoch": 0.11, "learning_rate": 0.0009985742147924593, "loss": 4.1941, "step": 374 }, { "epoch": 0.11, "learning_rate": 0.0009985564567238237, "loss": 3.9636, "step": 375 }, { "epoch": 0.11, "learning_rate": 0.00099853858891058, "loss": 4.0097, "step": 376 }, { "epoch": 0.11, "learning_rate": 0.0009985206113566614, "loss": 3.8793, "step": 377 }, { "epoch": 0.11, "learning_rate": 0.0009985025240660248, "loss": 3.9372, "step": 378 }, { "epoch": 0.11, "learning_rate": 0.0009984843270426519, "loss": 3.8242, "step": 379 }, { "epoch": 0.11, "learning_rate": 0.0009984660202905483, "loss": 3.9139, "step": 380 }, { "epoch": 0.11, "learning_rate": 0.0009984476038137435, "loss": 3.8506, "step": 381 }, { "epoch": 0.11, "learning_rate": 0.0009984290776162922, "loss": 3.9393, "step": 382 }, { "epoch": 0.11, "learning_rate": 0.0009984104417022715, "loss": 3.9364, "step": 383 }, { "epoch": 0.11, "learning_rate": 0.0009983916960757842, "loss": 3.7773, "step": 384 }, { "epoch": 0.11, "learning_rate": 0.0009983728407409564, "loss": 3.8694, "step": 385 }, { "epoch": 0.11, "learning_rate": 0.0009983538757019391, "loss": 3.8089, "step": 386 }, { "epoch": 0.11, "learning_rate": 0.0009983348009629063, "loss": 4.0018, "step": 387 }, { "epoch": 0.11, "learning_rate": 0.0009983156165280574, "loss": 3.7097, "step": 388 }, { "epoch": 0.11, "learning_rate": 0.000998296322401615, "loss": 3.8345, "step": 389 }, { "epoch": 0.11, "learning_rate": 0.0009982769185878264, "loss": 3.773, "step": 390 }, { "epoch": 0.11, "learning_rate": 0.000998257405090963, "loss": 3.8152, "step": 391 }, { "epoch": 0.11, "learning_rate": 0.0009982377819153197, "loss": 3.8483, "step": 392 }, { "epoch": 0.11, "learning_rate": 0.0009982180490652165, "loss": 3.784, "step": 393 }, { "epoch": 0.11, "learning_rate": 0.0009981982065449969, "loss": 3.7975, "step": 394 }, { "epoch": 0.11, "learning_rate": 0.0009981782543590288, "loss": 3.7096, "step": 395 }, { "epoch": 0.11, "learning_rate": 0.0009981581925117044, "loss": 3.8472, "step": 396 }, { "epoch": 0.11, "learning_rate": 0.000998138021007439, "loss": 3.6477, "step": 397 }, { "epoch": 0.12, "learning_rate": 0.000998117739850674, "loss": 3.8306, "step": 398 }, { "epoch": 0.12, "learning_rate": 0.0009980973490458728, "loss": 3.6405, "step": 399 }, { "epoch": 0.12, "learning_rate": 0.0009980768485975244, "loss": 3.7399, "step": 400 }, { "epoch": 0.12, "learning_rate": 0.0009980562385101413, "loss": 3.7544, "step": 401 }, { "epoch": 0.12, "learning_rate": 0.0009980355187882606, "loss": 3.754, "step": 402 }, { "epoch": 0.12, "learning_rate": 0.0009980146894364428, "loss": 3.7543, "step": 403 }, { "epoch": 0.12, "learning_rate": 0.000997993750459273, "loss": 3.836, "step": 404 }, { "epoch": 0.12, "learning_rate": 0.0009979727018613607, "loss": 3.6722, "step": 405 }, { "epoch": 0.12, "learning_rate": 0.000997951543647339, "loss": 3.829, "step": 406 }, { "epoch": 0.12, "learning_rate": 0.0009979302758218653, "loss": 3.8155, "step": 407 }, { "epoch": 0.12, "learning_rate": 0.0009979088983896214, "loss": 3.7308, "step": 408 }, { "epoch": 0.12, "learning_rate": 0.0009978874113553127, "loss": 3.6827, "step": 409 }, { "epoch": 0.12, "learning_rate": 0.0009978658147236695, "loss": 3.7619, "step": 410 }, { "epoch": 0.12, "learning_rate": 0.0009978441084994452, "loss": 3.71, "step": 411 }, { "epoch": 0.12, "learning_rate": 0.0009978222926874181, "loss": 3.8948, "step": 412 }, { "epoch": 0.12, "learning_rate": 0.0009978003672923904, "loss": 3.7231, "step": 413 }, { "epoch": 0.12, "learning_rate": 0.0009977783323191884, "loss": 3.8278, "step": 414 }, { "epoch": 0.12, "learning_rate": 0.0009977561877726627, "loss": 3.6713, "step": 415 }, { "epoch": 0.12, "learning_rate": 0.0009977339336576876, "loss": 3.7997, "step": 416 }, { "epoch": 0.12, "learning_rate": 0.000997711569979162, "loss": 3.6536, "step": 417 }, { "epoch": 0.12, "learning_rate": 0.0009976890967420089, "loss": 3.7295, "step": 418 }, { "epoch": 0.12, "learning_rate": 0.0009976665139511745, "loss": 3.6741, "step": 419 }, { "epoch": 0.12, "learning_rate": 0.0009976438216116306, "loss": 3.5868, "step": 420 }, { "epoch": 0.12, "learning_rate": 0.000997621019728372, "loss": 3.8072, "step": 421 }, { "epoch": 0.12, "learning_rate": 0.0009975981083064176, "loss": 3.7895, "step": 422 }, { "epoch": 0.12, "learning_rate": 0.0009975750873508116, "loss": 3.7789, "step": 423 }, { "epoch": 0.12, "learning_rate": 0.0009975519568666206, "loss": 3.7323, "step": 424 }, { "epoch": 0.12, "learning_rate": 0.000997528716858937, "loss": 3.756, "step": 425 }, { "epoch": 0.12, "learning_rate": 0.0009975053673328758, "loss": 3.7363, "step": 426 }, { "epoch": 0.12, "learning_rate": 0.0009974819082935772, "loss": 3.7528, "step": 427 }, { "epoch": 0.12, "learning_rate": 0.0009974583397462052, "loss": 3.7039, "step": 428 }, { "epoch": 0.12, "learning_rate": 0.0009974346616959476, "loss": 3.6738, "step": 429 }, { "epoch": 0.12, "learning_rate": 0.0009974108741480166, "loss": 3.6975, "step": 430 }, { "epoch": 0.12, "learning_rate": 0.0009973869771076483, "loss": 3.6814, "step": 431 }, { "epoch": 0.13, "learning_rate": 0.0009973629705801034, "loss": 3.7683, "step": 432 }, { "epoch": 0.13, "learning_rate": 0.0009973388545706657, "loss": 3.7217, "step": 433 }, { "epoch": 0.13, "learning_rate": 0.0009973146290846444, "loss": 3.6759, "step": 434 }, { "epoch": 0.13, "learning_rate": 0.0009972902941273716, "loss": 3.6048, "step": 435 }, { "epoch": 0.13, "learning_rate": 0.0009972658497042044, "loss": 3.8603, "step": 436 }, { "epoch": 0.13, "learning_rate": 0.0009972412958205235, "loss": 3.8097, "step": 437 }, { "epoch": 0.13, "learning_rate": 0.0009972166324817339, "loss": 3.7029, "step": 438 }, { "epoch": 0.13, "learning_rate": 0.0009971918596932642, "loss": 3.662, "step": 439 }, { "epoch": 0.13, "learning_rate": 0.0009971669774605678, "loss": 3.5865, "step": 440 }, { "epoch": 0.13, "learning_rate": 0.0009971419857891222, "loss": 3.7332, "step": 441 }, { "epoch": 0.13, "learning_rate": 0.0009971168846844283, "loss": 3.6936, "step": 442 }, { "epoch": 0.13, "learning_rate": 0.0009970916741520114, "loss": 3.8466, "step": 443 }, { "epoch": 0.13, "learning_rate": 0.0009970663541974214, "loss": 3.7223, "step": 444 }, { "epoch": 0.13, "learning_rate": 0.0009970409248262315, "loss": 3.615, "step": 445 }, { "epoch": 0.13, "learning_rate": 0.0009970153860440394, "loss": 3.774, "step": 446 }, { "epoch": 0.13, "learning_rate": 0.0009969897378564667, "loss": 3.561, "step": 447 }, { "epoch": 0.13, "learning_rate": 0.0009969639802691593, "loss": 3.6403, "step": 448 }, { "epoch": 0.13, "learning_rate": 0.0009969381132877873, "loss": 3.8436, "step": 449 }, { "epoch": 0.13, "learning_rate": 0.0009969121369180444, "loss": 3.6757, "step": 450 }, { "epoch": 0.13, "learning_rate": 0.0009968860511656485, "loss": 3.6232, "step": 451 }, { "epoch": 0.13, "learning_rate": 0.0009968598560363422, "loss": 3.7082, "step": 452 }, { "epoch": 0.13, "learning_rate": 0.0009968335515358915, "loss": 3.6463, "step": 453 }, { "epoch": 0.13, "learning_rate": 0.0009968071376700864, "loss": 3.7536, "step": 454 }, { "epoch": 0.13, "learning_rate": 0.0009967806144447415, "loss": 3.8328, "step": 455 }, { "epoch": 0.13, "learning_rate": 0.0009967539818656953, "loss": 3.7297, "step": 456 }, { "epoch": 0.13, "learning_rate": 0.00099672723993881, "loss": 3.7524, "step": 457 }, { "epoch": 0.13, "learning_rate": 0.0009967003886699722, "loss": 3.5577, "step": 458 }, { "epoch": 0.13, "learning_rate": 0.000996673428065093, "loss": 3.7276, "step": 459 }, { "epoch": 0.13, "learning_rate": 0.0009966463581301062, "loss": 3.5709, "step": 460 }, { "epoch": 0.13, "learning_rate": 0.0009966191788709714, "loss": 3.6987, "step": 461 }, { "epoch": 0.13, "learning_rate": 0.0009965918902936713, "loss": 3.6349, "step": 462 }, { "epoch": 0.13, "learning_rate": 0.0009965644924042124, "loss": 3.5932, "step": 463 }, { "epoch": 0.13, "learning_rate": 0.000996536985208626, "loss": 3.716, "step": 464 }, { "epoch": 0.13, "learning_rate": 0.0009965093687129669, "loss": 3.8616, "step": 465 }, { "epoch": 0.13, "learning_rate": 0.0009964816429233142, "loss": 3.5844, "step": 466 }, { "epoch": 0.14, "learning_rate": 0.0009964538078457712, "loss": 3.7315, "step": 467 }, { "epoch": 0.14, "learning_rate": 0.0009964258634864648, "loss": 3.7489, "step": 468 }, { "epoch": 0.14, "learning_rate": 0.0009963978098515467, "loss": 3.6614, "step": 469 }, { "epoch": 0.14, "learning_rate": 0.0009963696469471918, "loss": 3.6779, "step": 470 }, { "epoch": 0.14, "learning_rate": 0.0009963413747795995, "loss": 3.7368, "step": 471 }, { "epoch": 0.14, "learning_rate": 0.0009963129933549932, "loss": 3.5822, "step": 472 }, { "epoch": 0.14, "learning_rate": 0.0009962845026796206, "loss": 3.7772, "step": 473 }, { "epoch": 0.14, "learning_rate": 0.0009962559027597532, "loss": 3.7422, "step": 474 }, { "epoch": 0.14, "learning_rate": 0.000996227193601686, "loss": 3.7444, "step": 475 }, { "epoch": 0.14, "learning_rate": 0.000996198375211739, "loss": 3.6498, "step": 476 }, { "epoch": 0.14, "learning_rate": 0.0009961694475962562, "loss": 3.6996, "step": 477 }, { "epoch": 0.14, "learning_rate": 0.0009961404107616048, "loss": 3.6961, "step": 478 }, { "epoch": 0.14, "learning_rate": 0.0009961112647141764, "loss": 3.6028, "step": 479 }, { "epoch": 0.14, "learning_rate": 0.0009960820094603872, "loss": 3.6861, "step": 480 }, { "epoch": 0.14, "learning_rate": 0.0009960526450066766, "loss": 3.7575, "step": 481 }, { "epoch": 0.14, "learning_rate": 0.0009960231713595088, "loss": 3.7073, "step": 482 }, { "epoch": 0.14, "learning_rate": 0.0009959935885253714, "loss": 3.6649, "step": 483 }, { "epoch": 0.14, "learning_rate": 0.0009959638965107767, "loss": 3.6906, "step": 484 }, { "epoch": 0.14, "learning_rate": 0.0009959340953222603, "loss": 3.6002, "step": 485 }, { "epoch": 0.14, "learning_rate": 0.000995904184966382, "loss": 3.6651, "step": 486 }, { "epoch": 0.14, "learning_rate": 0.0009958741654497263, "loss": 3.6374, "step": 487 }, { "epoch": 0.14, "learning_rate": 0.000995844036778901, "loss": 3.6679, "step": 488 }, { "epoch": 0.14, "learning_rate": 0.000995813798960538, "loss": 3.6765, "step": 489 }, { "epoch": 0.14, "learning_rate": 0.0009957834520012937, "loss": 3.6081, "step": 490 }, { "epoch": 0.14, "learning_rate": 0.0009957529959078479, "loss": 3.572, "step": 491 }, { "epoch": 0.14, "learning_rate": 0.0009957224306869053, "loss": 3.6963, "step": 492 }, { "epoch": 0.14, "learning_rate": 0.0009956917563451934, "loss": 3.7229, "step": 493 }, { "epoch": 0.14, "learning_rate": 0.0009956609728894647, "loss": 3.6748, "step": 494 }, { "epoch": 0.14, "learning_rate": 0.0009956300803264954, "loss": 3.7678, "step": 495 }, { "epoch": 0.14, "learning_rate": 0.0009955990786630858, "loss": 3.726, "step": 496 }, { "epoch": 0.14, "learning_rate": 0.00099556796790606, "loss": 3.6513, "step": 497 }, { "epoch": 0.14, "learning_rate": 0.000995536748062266, "loss": 3.5193, "step": 498 }, { "epoch": 0.14, "learning_rate": 0.0009955054191385763, "loss": 3.5712, "step": 499 }, { "epoch": 0.14, "learning_rate": 0.0009954739811418875, "loss": 3.585, "step": 500 }, { "epoch": 0.15, "learning_rate": 0.0009954424340791195, "loss": 3.6559, "step": 501 }, { "epoch": 0.15, "learning_rate": 0.0009954107779572166, "loss": 3.6704, "step": 502 }, { "epoch": 0.15, "learning_rate": 0.0009953790127831472, "loss": 3.6832, "step": 503 }, { "epoch": 0.15, "learning_rate": 0.0009953471385639036, "loss": 3.7979, "step": 504 }, { "epoch": 0.15, "learning_rate": 0.000995315155306502, "loss": 3.6549, "step": 505 }, { "epoch": 0.15, "learning_rate": 0.0009952830630179826, "loss": 3.5736, "step": 506 }, { "epoch": 0.15, "learning_rate": 0.00099525086170541, "loss": 3.5251, "step": 507 }, { "epoch": 0.15, "learning_rate": 0.0009952185513758724, "loss": 3.6032, "step": 508 }, { "epoch": 0.15, "learning_rate": 0.0009951861320364821, "loss": 3.5613, "step": 509 }, { "epoch": 0.15, "learning_rate": 0.0009951536036943754, "loss": 3.6615, "step": 510 }, { "epoch": 0.15, "learning_rate": 0.0009951209663567125, "loss": 3.6612, "step": 511 }, { "epoch": 0.15, "learning_rate": 0.0009950882200306778, "loss": 3.5431, "step": 512 }, { "epoch": 0.15, "learning_rate": 0.0009950553647234796, "loss": 3.6175, "step": 513 }, { "epoch": 0.15, "learning_rate": 0.00099502240044235, "loss": 3.6892, "step": 514 }, { "epoch": 0.15, "learning_rate": 0.0009949893271945455, "loss": 3.6055, "step": 515 }, { "epoch": 0.15, "learning_rate": 0.000994956144987346, "loss": 3.7621, "step": 516 }, { "epoch": 0.15, "learning_rate": 0.000994922853828056, "loss": 3.6264, "step": 517 }, { "epoch": 0.15, "learning_rate": 0.0009948894537240038, "loss": 3.6447, "step": 518 }, { "epoch": 0.15, "learning_rate": 0.0009948559446825412, "loss": 3.6252, "step": 519 }, { "epoch": 0.15, "learning_rate": 0.0009948223267110447, "loss": 3.5332, "step": 520 }, { "epoch": 0.15, "learning_rate": 0.0009947885998169142, "loss": 3.742, "step": 521 }, { "epoch": 0.15, "learning_rate": 0.000994754764007574, "loss": 3.6361, "step": 522 }, { "epoch": 0.15, "learning_rate": 0.0009947208192904722, "loss": 3.7173, "step": 523 }, { "epoch": 0.15, "learning_rate": 0.000994686765673081, "loss": 3.6875, "step": 524 }, { "epoch": 0.15, "learning_rate": 0.0009946526031628959, "loss": 3.6247, "step": 525 }, { "epoch": 0.15, "learning_rate": 0.0009946183317674376, "loss": 3.6669, "step": 526 }, { "epoch": 0.15, "learning_rate": 0.0009945839514942495, "loss": 3.7014, "step": 527 }, { "epoch": 0.15, "learning_rate": 0.0009945494623509001, "loss": 3.6868, "step": 528 }, { "epoch": 0.15, "learning_rate": 0.0009945148643449807, "loss": 3.7157, "step": 529 }, { "epoch": 0.15, "learning_rate": 0.0009944801574841078, "loss": 3.8294, "step": 530 }, { "epoch": 0.15, "learning_rate": 0.000994445341775921, "loss": 3.733, "step": 531 }, { "epoch": 0.15, "learning_rate": 0.0009944104172280837, "loss": 3.7909, "step": 532 }, { "epoch": 0.15, "learning_rate": 0.0009943753838482844, "loss": 3.6931, "step": 533 }, { "epoch": 0.15, "learning_rate": 0.000994340241644234, "loss": 3.6485, "step": 534 }, { "epoch": 0.15, "learning_rate": 0.0009943049906236687, "loss": 3.7484, "step": 535 }, { "epoch": 0.16, "learning_rate": 0.0009942696307943483, "loss": 3.6667, "step": 536 }, { "epoch": 0.16, "learning_rate": 0.0009942341621640557, "loss": 3.7458, "step": 537 }, { "epoch": 0.16, "learning_rate": 0.000994198584740599, "loss": 3.5692, "step": 538 }, { "epoch": 0.16, "learning_rate": 0.0009941628985318093, "loss": 3.5952, "step": 539 }, { "epoch": 0.16, "learning_rate": 0.0009941271035455424, "loss": 3.7567, "step": 540 }, { "epoch": 0.16, "learning_rate": 0.0009940911997896773, "loss": 3.8045, "step": 541 }, { "epoch": 0.16, "learning_rate": 0.0009940551872721175, "loss": 3.6537, "step": 542 }, { "epoch": 0.16, "learning_rate": 0.0009940190660007902, "loss": 3.6709, "step": 543 }, { "epoch": 0.16, "learning_rate": 0.0009939828359836467, "loss": 3.7905, "step": 544 }, { "epoch": 0.16, "learning_rate": 0.0009939464972286618, "loss": 3.5699, "step": 545 }, { "epoch": 0.16, "learning_rate": 0.000993910049743835, "loss": 3.7881, "step": 546 }, { "epoch": 0.16, "learning_rate": 0.000993873493537189, "loss": 3.6279, "step": 547 }, { "epoch": 0.16, "learning_rate": 0.0009938368286167706, "loss": 3.5849, "step": 548 }, { "epoch": 0.16, "learning_rate": 0.000993800054990651, "loss": 3.6961, "step": 549 }, { "epoch": 0.16, "learning_rate": 0.0009937631726669245, "loss": 3.7376, "step": 550 }, { "epoch": 0.16, "learning_rate": 0.0009937261816537106, "loss": 3.7351, "step": 551 }, { "epoch": 0.16, "learning_rate": 0.0009936890819591511, "loss": 3.6581, "step": 552 }, { "epoch": 0.16, "learning_rate": 0.0009936518735914133, "loss": 3.6157, "step": 553 }, { "epoch": 0.16, "learning_rate": 0.000993614556558687, "loss": 3.6744, "step": 554 }, { "epoch": 0.16, "learning_rate": 0.000993577130869187, "loss": 3.5829, "step": 555 }, { "epoch": 0.16, "learning_rate": 0.0009935395965311516, "loss": 3.6365, "step": 556 }, { "epoch": 0.16, "learning_rate": 0.0009935019535528432, "loss": 3.6775, "step": 557 }, { "epoch": 0.16, "learning_rate": 0.0009934642019425472, "loss": 3.7224, "step": 558 }, { "epoch": 0.16, "learning_rate": 0.0009934263417085746, "loss": 3.7002, "step": 559 }, { "epoch": 0.16, "learning_rate": 0.0009933883728592589, "loss": 3.7048, "step": 560 }, { "epoch": 0.16, "learning_rate": 0.000993350295402958, "loss": 3.4936, "step": 561 }, { "epoch": 0.16, "learning_rate": 0.0009933121093480537, "loss": 3.529, "step": 562 }, { "epoch": 0.16, "learning_rate": 0.0009932738147029516, "loss": 3.665, "step": 563 }, { "epoch": 0.16, "learning_rate": 0.0009932354114760818, "loss": 3.7, "step": 564 }, { "epoch": 0.16, "learning_rate": 0.0009931968996758972, "loss": 3.6423, "step": 565 }, { "epoch": 0.16, "learning_rate": 0.0009931582793108753, "loss": 3.5859, "step": 566 }, { "epoch": 0.16, "learning_rate": 0.0009931195503895178, "loss": 3.7281, "step": 567 }, { "epoch": 0.16, "learning_rate": 0.0009930807129203494, "loss": 3.6349, "step": 568 }, { "epoch": 0.16, "learning_rate": 0.0009930417669119194, "loss": 3.6389, "step": 569 }, { "epoch": 0.17, "learning_rate": 0.000993002712372801, "loss": 3.6347, "step": 570 }, { "epoch": 0.17, "learning_rate": 0.0009929635493115907, "loss": 3.6897, "step": 571 }, { "epoch": 0.17, "learning_rate": 0.0009929242777369095, "loss": 3.75, "step": 572 }, { "epoch": 0.17, "learning_rate": 0.0009928848976574018, "loss": 3.6016, "step": 573 }, { "epoch": 0.17, "learning_rate": 0.0009928454090817364, "loss": 3.6417, "step": 574 }, { "epoch": 0.17, "learning_rate": 0.0009928058120186058, "loss": 3.5364, "step": 575 }, { "epoch": 0.17, "learning_rate": 0.0009927661064767258, "loss": 3.7325, "step": 576 }, { "epoch": 0.17, "learning_rate": 0.000992726292464837, "loss": 3.6492, "step": 577 }, { "epoch": 0.17, "learning_rate": 0.0009926863699917033, "loss": 3.6142, "step": 578 }, { "epoch": 0.17, "learning_rate": 0.0009926463390661127, "loss": 3.7066, "step": 579 }, { "epoch": 0.17, "learning_rate": 0.000992606199696877, "loss": 3.4474, "step": 580 }, { "epoch": 0.17, "learning_rate": 0.0009925659518928314, "loss": 3.7053, "step": 581 }, { "epoch": 0.17, "learning_rate": 0.0009925255956628363, "loss": 3.6484, "step": 582 }, { "epoch": 0.17, "learning_rate": 0.0009924851310157744, "loss": 3.5367, "step": 583 }, { "epoch": 0.17, "learning_rate": 0.0009924445579605533, "loss": 3.6106, "step": 584 }, { "epoch": 0.17, "learning_rate": 0.000992403876506104, "loss": 3.5737, "step": 585 }, { "epoch": 0.17, "learning_rate": 0.0009923630866613816, "loss": 3.7044, "step": 586 }, { "epoch": 0.17, "learning_rate": 0.0009923221884353649, "loss": 3.5919, "step": 587 }, { "epoch": 0.17, "learning_rate": 0.0009922811818370567, "loss": 3.6284, "step": 588 }, { "epoch": 0.17, "learning_rate": 0.0009922400668754834, "loss": 3.6501, "step": 589 }, { "epoch": 0.17, "learning_rate": 0.0009921988435596953, "loss": 3.526, "step": 590 }, { "epoch": 0.17, "learning_rate": 0.000992157511898767, "loss": 3.737, "step": 591 }, { "epoch": 0.17, "learning_rate": 0.0009921160719017967, "loss": 3.5667, "step": 592 }, { "epoch": 0.17, "learning_rate": 0.0009920745235779058, "loss": 3.7342, "step": 593 }, { "epoch": 0.17, "learning_rate": 0.0009920328669362408, "loss": 3.6082, "step": 594 }, { "epoch": 0.17, "learning_rate": 0.000991991101985971, "loss": 3.5576, "step": 595 }, { "epoch": 0.17, "learning_rate": 0.0009919492287362897, "loss": 3.6667, "step": 596 }, { "epoch": 0.17, "learning_rate": 0.0009919072471964146, "loss": 3.5459, "step": 597 }, { "epoch": 0.17, "learning_rate": 0.0009918651573755865, "loss": 3.7746, "step": 598 }, { "epoch": 0.17, "learning_rate": 0.000991822959283071, "loss": 3.7523, "step": 599 }, { "epoch": 0.17, "learning_rate": 0.0009917806529281565, "loss": 3.6561, "step": 600 }, { "epoch": 0.17, "learning_rate": 0.0009917382383201555, "loss": 3.7697, "step": 601 }, { "epoch": 0.17, "learning_rate": 0.0009916957154684051, "loss": 3.649, "step": 602 }, { "epoch": 0.17, "learning_rate": 0.0009916530843822654, "loss": 3.606, "step": 603 }, { "epoch": 0.17, "learning_rate": 0.0009916103450711203, "loss": 3.6625, "step": 604 }, { "epoch": 0.18, "learning_rate": 0.0009915674975443777, "loss": 3.6538, "step": 605 }, { "epoch": 0.18, "learning_rate": 0.00099152454181147, "loss": 3.6074, "step": 606 }, { "epoch": 0.18, "learning_rate": 0.0009914814778818524, "loss": 3.5671, "step": 607 }, { "epoch": 0.18, "learning_rate": 0.0009914383057650043, "loss": 3.7064, "step": 608 }, { "epoch": 0.18, "learning_rate": 0.000991395025470429, "loss": 3.5435, "step": 609 }, { "epoch": 0.18, "learning_rate": 0.0009913516370076536, "loss": 3.644, "step": 610 }, { "epoch": 0.18, "learning_rate": 0.000991308140386229, "loss": 3.5077, "step": 611 }, { "epoch": 0.18, "learning_rate": 0.00099126453561573, "loss": 3.5789, "step": 612 }, { "epoch": 0.18, "learning_rate": 0.000991220822705755, "loss": 3.6847, "step": 613 }, { "epoch": 0.18, "learning_rate": 0.000991177001665926, "loss": 3.5346, "step": 614 }, { "epoch": 0.18, "learning_rate": 0.0009911330725058895, "loss": 3.6991, "step": 615 }, { "epoch": 0.18, "learning_rate": 0.0009910890352353154, "loss": 3.5078, "step": 616 }, { "epoch": 0.18, "learning_rate": 0.0009910448898638971, "loss": 3.6742, "step": 617 }, { "epoch": 0.18, "learning_rate": 0.000991000636401352, "loss": 3.5215, "step": 618 }, { "epoch": 0.18, "learning_rate": 0.0009909562748574218, "loss": 3.6943, "step": 619 }, { "epoch": 0.18, "learning_rate": 0.0009909118052418716, "loss": 3.5329, "step": 620 }, { "epoch": 0.18, "learning_rate": 0.0009908672275644897, "loss": 3.6148, "step": 621 }, { "epoch": 0.18, "learning_rate": 0.000990822541835089, "loss": 3.6845, "step": 622 }, { "epoch": 0.18, "learning_rate": 0.0009907777480635063, "loss": 3.6781, "step": 623 }, { "epoch": 0.18, "learning_rate": 0.0009907328462596015, "loss": 3.6402, "step": 624 }, { "epoch": 0.18, "learning_rate": 0.0009906878364332585, "loss": 3.7434, "step": 625 }, { "epoch": 0.18, "learning_rate": 0.0009906427185943853, "loss": 3.6494, "step": 626 }, { "epoch": 0.18, "learning_rate": 0.0009905974927529135, "loss": 3.4317, "step": 627 }, { "epoch": 0.18, "learning_rate": 0.000990552158918798, "loss": 3.5232, "step": 628 }, { "epoch": 0.18, "learning_rate": 0.0009905067171020185, "loss": 3.5147, "step": 629 }, { "epoch": 0.18, "learning_rate": 0.0009904611673125774, "loss": 3.5913, "step": 630 }, { "epoch": 0.18, "learning_rate": 0.0009904155095605013, "loss": 3.6906, "step": 631 }, { "epoch": 0.18, "learning_rate": 0.0009903697438558411, "loss": 3.6337, "step": 632 }, { "epoch": 0.18, "learning_rate": 0.0009903238702086705, "loss": 3.5287, "step": 633 }, { "epoch": 0.18, "learning_rate": 0.0009902778886290878, "loss": 3.5244, "step": 634 }, { "epoch": 0.18, "learning_rate": 0.000990231799127214, "loss": 3.744, "step": 635 }, { "epoch": 0.18, "learning_rate": 0.0009901856017131953, "loss": 3.5939, "step": 636 }, { "epoch": 0.18, "learning_rate": 0.0009901392963972007, "loss": 3.6093, "step": 637 }, { "epoch": 0.18, "learning_rate": 0.000990092883189423, "loss": 3.568, "step": 638 }, { "epoch": 0.19, "learning_rate": 0.0009900463621000787, "loss": 3.6674, "step": 639 }, { "epoch": 0.19, "learning_rate": 0.0009899997331394089, "loss": 3.6506, "step": 640 }, { "epoch": 0.19, "learning_rate": 0.000989952996317677, "loss": 3.7021, "step": 641 }, { "epoch": 0.19, "learning_rate": 0.0009899061516451715, "loss": 3.5932, "step": 642 }, { "epoch": 0.19, "learning_rate": 0.0009898591991322036, "loss": 3.5443, "step": 643 }, { "epoch": 0.19, "learning_rate": 0.0009898121387891092, "loss": 3.6242, "step": 644 }, { "epoch": 0.19, "learning_rate": 0.0009897649706262473, "loss": 3.6105, "step": 645 }, { "epoch": 0.19, "learning_rate": 0.0009897176946540005, "loss": 3.6434, "step": 646 }, { "epoch": 0.19, "learning_rate": 0.000989670310882776, "loss": 3.6683, "step": 647 }, { "epoch": 0.19, "learning_rate": 0.0009896228193230032, "loss": 3.6425, "step": 648 }, { "epoch": 0.19, "learning_rate": 0.000989575219985137, "loss": 3.7033, "step": 649 }, { "epoch": 0.19, "learning_rate": 0.000989527512879655, "loss": 3.5329, "step": 650 }, { "epoch": 0.19, "learning_rate": 0.0009894796980170584, "loss": 3.6693, "step": 651 }, { "epoch": 0.19, "learning_rate": 0.0009894317754078727, "loss": 3.6757, "step": 652 }, { "epoch": 0.19, "learning_rate": 0.0009893837450626471, "loss": 3.5769, "step": 653 }, { "epoch": 0.19, "learning_rate": 0.0009893356069919537, "loss": 3.5945, "step": 654 }, { "epoch": 0.19, "learning_rate": 0.0009892873612063894, "loss": 3.5734, "step": 655 }, { "epoch": 0.19, "learning_rate": 0.0009892390077165737, "loss": 3.4647, "step": 656 }, { "epoch": 0.19, "learning_rate": 0.000989190546533151, "loss": 3.5565, "step": 657 }, { "epoch": 0.19, "learning_rate": 0.0009891419776667885, "loss": 3.5983, "step": 658 }, { "epoch": 0.19, "learning_rate": 0.0009890933011281774, "loss": 3.6734, "step": 659 }, { "epoch": 0.19, "learning_rate": 0.0009890445169280328, "loss": 3.5085, "step": 660 }, { "epoch": 0.19, "learning_rate": 0.0009889956250770933, "loss": 3.5911, "step": 661 }, { "epoch": 0.19, "learning_rate": 0.0009889466255861208, "loss": 3.6021, "step": 662 }, { "epoch": 0.19, "learning_rate": 0.0009888975184659018, "loss": 3.4324, "step": 663 }, { "epoch": 0.19, "learning_rate": 0.0009888483037272455, "loss": 3.705, "step": 664 }, { "epoch": 0.19, "learning_rate": 0.000988798981380986, "loss": 3.5961, "step": 665 }, { "epoch": 0.19, "learning_rate": 0.0009887495514379796, "loss": 3.5768, "step": 666 }, { "epoch": 0.19, "learning_rate": 0.0009887000139091073, "loss": 3.6342, "step": 667 }, { "epoch": 0.19, "learning_rate": 0.0009886503688052739, "loss": 3.5585, "step": 668 }, { "epoch": 0.19, "learning_rate": 0.000988600616137407, "loss": 3.5758, "step": 669 }, { "epoch": 0.19, "learning_rate": 0.000988550755916459, "loss": 3.5678, "step": 670 }, { "epoch": 0.19, "learning_rate": 0.0009885007881534045, "loss": 3.5479, "step": 671 }, { "epoch": 0.19, "learning_rate": 0.0009884507128592435, "loss": 3.6072, "step": 672 }, { "epoch": 0.19, "learning_rate": 0.0009884005300449983, "loss": 3.6328, "step": 673 }, { "epoch": 0.2, "learning_rate": 0.0009883502397217156, "loss": 3.5589, "step": 674 }, { "epoch": 0.2, "learning_rate": 0.0009882998419004653, "loss": 3.5256, "step": 675 }, { "epoch": 0.2, "learning_rate": 0.0009882493365923415, "loss": 3.5706, "step": 676 }, { "epoch": 0.2, "learning_rate": 0.0009881987238084615, "loss": 3.4686, "step": 677 }, { "epoch": 0.2, "learning_rate": 0.0009881480035599667, "loss": 3.5833, "step": 678 }, { "epoch": 0.2, "learning_rate": 0.0009880971758580216, "loss": 3.5522, "step": 679 }, { "epoch": 0.2, "learning_rate": 0.0009880462407138144, "loss": 3.6313, "step": 680 }, { "epoch": 0.2, "learning_rate": 0.0009879951981385578, "loss": 3.4857, "step": 681 }, { "epoch": 0.2, "learning_rate": 0.000987944048143487, "loss": 3.7416, "step": 682 }, { "epoch": 0.2, "learning_rate": 0.0009878927907398616, "loss": 3.517, "step": 683 }, { "epoch": 0.2, "learning_rate": 0.0009878414259389646, "loss": 3.4619, "step": 684 }, { "epoch": 0.2, "learning_rate": 0.0009877899537521027, "loss": 3.6346, "step": 685 }, { "epoch": 0.2, "learning_rate": 0.0009877383741906062, "loss": 3.6255, "step": 686 }, { "epoch": 0.2, "learning_rate": 0.000987686687265829, "loss": 3.5642, "step": 687 }, { "epoch": 0.2, "learning_rate": 0.0009876348929891486, "loss": 3.474, "step": 688 }, { "epoch": 0.2, "learning_rate": 0.0009875829913719664, "loss": 3.5055, "step": 689 }, { "epoch": 0.2, "learning_rate": 0.0009875309824257071, "loss": 3.5864, "step": 690 }, { "epoch": 0.2, "learning_rate": 0.000987478866161819, "loss": 3.4751, "step": 691 }, { "epoch": 0.2, "learning_rate": 0.0009874266425917745, "loss": 3.6781, "step": 692 }, { "epoch": 0.2, "learning_rate": 0.0009873743117270691, "loss": 3.6705, "step": 693 }, { "epoch": 0.2, "learning_rate": 0.0009873218735792222, "loss": 3.5399, "step": 694 }, { "epoch": 0.2, "learning_rate": 0.0009872693281597767, "loss": 3.4929, "step": 695 }, { "epoch": 0.2, "learning_rate": 0.000987216675480299, "loss": 3.5202, "step": 696 }, { "epoch": 0.2, "learning_rate": 0.0009871639155523795, "loss": 3.7003, "step": 697 }, { "epoch": 0.2, "learning_rate": 0.000987111048387632, "loss": 3.5224, "step": 698 }, { "epoch": 0.2, "learning_rate": 0.0009870580739976935, "loss": 3.4661, "step": 699 }, { "epoch": 0.2, "learning_rate": 0.0009870049923942253, "loss": 3.5702, "step": 700 }, { "epoch": 0.2, "learning_rate": 0.000986951803588912, "loss": 3.6086, "step": 701 }, { "epoch": 0.2, "learning_rate": 0.0009868985075934616, "loss": 3.6399, "step": 702 }, { "epoch": 0.2, "learning_rate": 0.0009868451044196059, "loss": 3.6272, "step": 703 }, { "epoch": 0.2, "learning_rate": 0.0009867915940791, "loss": 3.6246, "step": 704 }, { "epoch": 0.2, "learning_rate": 0.0009867379765837235, "loss": 3.4941, "step": 705 }, { "epoch": 0.2, "learning_rate": 0.0009866842519452785, "loss": 3.6187, "step": 706 }, { "epoch": 0.2, "learning_rate": 0.0009866304201755912, "loss": 3.6571, "step": 707 }, { "epoch": 0.21, "learning_rate": 0.0009865764812865112, "loss": 3.467, "step": 708 }, { "epoch": 0.21, "learning_rate": 0.0009865224352899118, "loss": 3.5198, "step": 709 }, { "epoch": 0.21, "learning_rate": 0.0009864682821976902, "loss": 3.5924, "step": 710 }, { "epoch": 0.21, "learning_rate": 0.0009864140220217665, "loss": 3.6005, "step": 711 }, { "epoch": 0.21, "learning_rate": 0.0009863596547740846, "loss": 3.5977, "step": 712 }, { "epoch": 0.21, "learning_rate": 0.0009863051804666124, "loss": 3.4406, "step": 713 }, { "epoch": 0.21, "learning_rate": 0.0009862505991113408, "loss": 3.5955, "step": 714 }, { "epoch": 0.21, "learning_rate": 0.0009861959107202846, "loss": 3.7426, "step": 715 }, { "epoch": 0.21, "learning_rate": 0.0009861411153054822, "loss": 3.5773, "step": 716 }, { "epoch": 0.21, "learning_rate": 0.0009860862128789954, "loss": 3.5234, "step": 717 }, { "epoch": 0.21, "learning_rate": 0.0009860312034529093, "loss": 3.561, "step": 718 }, { "epoch": 0.21, "learning_rate": 0.0009859760870393332, "loss": 3.615, "step": 719 }, { "epoch": 0.21, "learning_rate": 0.000985920863650399, "loss": 3.5786, "step": 720 }, { "epoch": 0.21, "learning_rate": 0.0009858655332982632, "loss": 3.5686, "step": 721 }, { "epoch": 0.21, "learning_rate": 0.0009858100959951057, "loss": 3.6361, "step": 722 }, { "epoch": 0.21, "learning_rate": 0.000985754551753129, "loss": 3.6035, "step": 723 }, { "epoch": 0.21, "learning_rate": 0.00098569890058456, "loss": 3.4891, "step": 724 }, { "epoch": 0.21, "learning_rate": 0.000985643142501649, "loss": 3.5108, "step": 725 }, { "epoch": 0.21, "learning_rate": 0.0009855872775166696, "loss": 3.5662, "step": 726 }, { "epoch": 0.21, "learning_rate": 0.000985531305641919, "loss": 3.5337, "step": 727 }, { "epoch": 0.21, "learning_rate": 0.0009854752268897181, "loss": 3.5486, "step": 728 }, { "epoch": 0.21, "learning_rate": 0.0009854190412724112, "loss": 3.5992, "step": 729 }, { "epoch": 0.21, "learning_rate": 0.0009853627488023663, "loss": 3.5737, "step": 730 }, { "epoch": 0.21, "learning_rate": 0.0009853063494919745, "loss": 3.6321, "step": 731 }, { "epoch": 0.21, "learning_rate": 0.000985249843353651, "loss": 3.6426, "step": 732 }, { "epoch": 0.21, "learning_rate": 0.0009851932303998336, "loss": 3.61, "step": 733 }, { "epoch": 0.21, "learning_rate": 0.0009851365106429849, "loss": 3.6078, "step": 734 }, { "epoch": 0.21, "learning_rate": 0.00098507968409559, "loss": 3.5538, "step": 735 }, { "epoch": 0.21, "learning_rate": 0.000985022750770158, "loss": 3.4704, "step": 736 }, { "epoch": 0.21, "learning_rate": 0.000984965710679221, "loss": 3.5393, "step": 737 }, { "epoch": 0.21, "learning_rate": 0.000984908563835335, "loss": 3.6422, "step": 738 }, { "epoch": 0.21, "learning_rate": 0.00098485131025108, "loss": 3.4752, "step": 739 }, { "epoch": 0.21, "learning_rate": 0.000984793949939058, "loss": 3.53, "step": 740 }, { "epoch": 0.21, "learning_rate": 0.0009847364829118962, "loss": 3.5809, "step": 741 }, { "epoch": 0.21, "learning_rate": 0.0009846789091822441, "loss": 3.6176, "step": 742 }, { "epoch": 0.22, "learning_rate": 0.0009846212287627754, "loss": 3.5731, "step": 743 }, { "epoch": 0.22, "learning_rate": 0.0009845634416661866, "loss": 3.6697, "step": 744 }, { "epoch": 0.22, "learning_rate": 0.0009845055479051985, "loss": 3.6432, "step": 745 }, { "epoch": 0.22, "learning_rate": 0.0009844475474925548, "loss": 3.4783, "step": 746 }, { "epoch": 0.22, "learning_rate": 0.0009843894404410224, "loss": 3.5522, "step": 747 }, { "epoch": 0.22, "learning_rate": 0.0009843312267633928, "loss": 3.6019, "step": 748 }, { "epoch": 0.22, "learning_rate": 0.00098427290647248, "loss": 3.5303, "step": 749 }, { "epoch": 0.22, "learning_rate": 0.0009842144795811215, "loss": 3.6123, "step": 750 }, { "epoch": 0.22, "learning_rate": 0.0009841559461021786, "loss": 3.5729, "step": 751 }, { "epoch": 0.22, "learning_rate": 0.000984097306048536, "loss": 3.4976, "step": 752 }, { "epoch": 0.22, "learning_rate": 0.000984038559433102, "loss": 3.5393, "step": 753 }, { "epoch": 0.22, "learning_rate": 0.0009839797062688083, "loss": 3.6539, "step": 754 }, { "epoch": 0.22, "learning_rate": 0.0009839207465686093, "loss": 3.5819, "step": 755 }, { "epoch": 0.22, "learning_rate": 0.000983861680345484, "loss": 3.5266, "step": 756 }, { "epoch": 0.22, "learning_rate": 0.0009838025076124345, "loss": 3.6981, "step": 757 }, { "epoch": 0.22, "learning_rate": 0.0009837432283824855, "loss": 3.6821, "step": 758 }, { "epoch": 0.22, "learning_rate": 0.0009836838426686863, "loss": 3.4628, "step": 759 }, { "epoch": 0.22, "learning_rate": 0.0009836243504841092, "loss": 3.6034, "step": 760 }, { "epoch": 0.22, "learning_rate": 0.0009835647518418498, "loss": 3.6074, "step": 761 }, { "epoch": 0.22, "learning_rate": 0.0009835050467550273, "loss": 3.6137, "step": 762 }, { "epoch": 0.22, "learning_rate": 0.000983445235236784, "loss": 3.4411, "step": 763 }, { "epoch": 0.22, "learning_rate": 0.0009833853173002861, "loss": 3.5851, "step": 764 }, { "epoch": 0.22, "learning_rate": 0.000983325292958723, "loss": 3.4749, "step": 765 }, { "epoch": 0.22, "learning_rate": 0.0009832651622253077, "loss": 3.5695, "step": 766 }, { "epoch": 0.22, "learning_rate": 0.0009832049251132762, "loss": 3.6013, "step": 767 }, { "epoch": 0.22, "learning_rate": 0.0009831445816358884, "loss": 3.5717, "step": 768 }, { "epoch": 0.22, "learning_rate": 0.0009830841318064273, "loss": 3.6076, "step": 769 }, { "epoch": 0.22, "learning_rate": 0.0009830235756381992, "loss": 3.499, "step": 770 }, { "epoch": 0.22, "learning_rate": 0.0009829629131445341, "loss": 3.4922, "step": 771 }, { "epoch": 0.22, "learning_rate": 0.0009829021443387856, "loss": 3.6222, "step": 772 }, { "epoch": 0.22, "learning_rate": 0.0009828412692343303, "loss": 3.3502, "step": 773 }, { "epoch": 0.22, "learning_rate": 0.000982780287844568, "loss": 3.351, "step": 774 }, { "epoch": 0.22, "learning_rate": 0.0009827192001829227, "loss": 3.5911, "step": 775 }, { "epoch": 0.22, "learning_rate": 0.000982658006262841, "loss": 3.5716, "step": 776 }, { "epoch": 0.23, "learning_rate": 0.0009825967060977932, "loss": 3.5324, "step": 777 }, { "epoch": 0.23, "learning_rate": 0.000982535299701273, "loss": 3.6293, "step": 778 }, { "epoch": 0.23, "learning_rate": 0.0009824737870867976, "loss": 3.5073, "step": 779 }, { "epoch": 0.23, "learning_rate": 0.0009824121682679072, "loss": 3.5074, "step": 780 }, { "epoch": 0.23, "learning_rate": 0.000982350443258166, "loss": 3.5036, "step": 781 }, { "epoch": 0.23, "learning_rate": 0.000982288612071161, "loss": 3.5905, "step": 782 }, { "epoch": 0.23, "learning_rate": 0.0009822266747205024, "loss": 3.5023, "step": 783 }, { "epoch": 0.23, "learning_rate": 0.0009821646312198249, "loss": 3.5743, "step": 784 }, { "epoch": 0.23, "learning_rate": 0.0009821024815827853, "loss": 3.5681, "step": 785 }, { "epoch": 0.23, "learning_rate": 0.0009820402258230642, "loss": 3.6316, "step": 786 }, { "epoch": 0.23, "learning_rate": 0.000981977863954366, "loss": 3.5863, "step": 787 }, { "epoch": 0.23, "learning_rate": 0.0009819153959904178, "loss": 3.6011, "step": 788 }, { "epoch": 0.23, "learning_rate": 0.0009818528219449705, "loss": 3.5443, "step": 789 }, { "epoch": 0.23, "learning_rate": 0.000981790141831798, "loss": 3.5701, "step": 790 }, { "epoch": 0.23, "learning_rate": 0.0009817273556646982, "loss": 3.5753, "step": 791 }, { "epoch": 0.23, "learning_rate": 0.0009816644634574913, "loss": 3.549, "step": 792 }, { "epoch": 0.23, "learning_rate": 0.0009816014652240218, "loss": 3.5605, "step": 793 }, { "epoch": 0.23, "learning_rate": 0.000981538360978157, "loss": 3.601, "step": 794 }, { "epoch": 0.23, "learning_rate": 0.0009814751507337877, "loss": 3.5687, "step": 795 }, { "epoch": 0.23, "learning_rate": 0.0009814118345048285, "loss": 3.6279, "step": 796 }, { "epoch": 0.23, "learning_rate": 0.0009813484123052162, "loss": 3.5233, "step": 797 }, { "epoch": 0.23, "learning_rate": 0.0009812848841489119, "loss": 3.4506, "step": 798 }, { "epoch": 0.23, "learning_rate": 0.0009812212500498996, "loss": 3.6081, "step": 799 }, { "epoch": 0.23, "learning_rate": 0.000981157510022187, "loss": 3.6848, "step": 800 }, { "epoch": 0.23, "learning_rate": 0.0009810936640798045, "loss": 3.4994, "step": 801 }, { "epoch": 0.23, "learning_rate": 0.0009810297122368067, "loss": 3.6364, "step": 802 }, { "epoch": 0.23, "learning_rate": 0.0009809656545072703, "loss": 3.4953, "step": 803 }, { "epoch": 0.23, "learning_rate": 0.0009809014909052966, "loss": 3.5484, "step": 804 }, { "epoch": 0.23, "learning_rate": 0.0009808372214450093, "loss": 3.5691, "step": 805 }, { "epoch": 0.23, "learning_rate": 0.0009807728461405556, "loss": 3.5722, "step": 806 }, { "epoch": 0.23, "learning_rate": 0.0009807083650061063, "loss": 3.5811, "step": 807 }, { "epoch": 0.23, "learning_rate": 0.0009806437780558552, "loss": 3.6131, "step": 808 }, { "epoch": 0.23, "learning_rate": 0.0009805790853040196, "loss": 3.5346, "step": 809 }, { "epoch": 0.23, "learning_rate": 0.0009805142867648398, "loss": 3.6232, "step": 810 }, { "epoch": 0.23, "learning_rate": 0.0009804493824525796, "loss": 3.5237, "step": 811 }, { "epoch": 0.24, "learning_rate": 0.0009803843723815262, "loss": 3.6583, "step": 812 }, { "epoch": 0.24, "learning_rate": 0.0009803192565659897, "loss": 3.5203, "step": 813 }, { "epoch": 0.24, "learning_rate": 0.0009802540350203038, "loss": 3.5106, "step": 814 }, { "epoch": 0.24, "learning_rate": 0.0009801887077588254, "loss": 3.6142, "step": 815 }, { "epoch": 0.24, "learning_rate": 0.0009801232747959348, "loss": 3.4449, "step": 816 }, { "epoch": 0.24, "learning_rate": 0.000980057736146035, "loss": 3.4883, "step": 817 }, { "epoch": 0.24, "learning_rate": 0.0009799920918235532, "loss": 3.4582, "step": 818 }, { "epoch": 0.24, "learning_rate": 0.000979926341842939, "loss": 3.638, "step": 819 }, { "epoch": 0.24, "learning_rate": 0.0009798604862186655, "loss": 3.4349, "step": 820 }, { "epoch": 0.24, "learning_rate": 0.0009797945249652295, "loss": 3.5413, "step": 821 }, { "epoch": 0.24, "learning_rate": 0.0009797284580971503, "loss": 3.5257, "step": 822 }, { "epoch": 0.24, "learning_rate": 0.0009796622856289713, "loss": 3.4987, "step": 823 }, { "epoch": 0.24, "learning_rate": 0.0009795960075752585, "loss": 3.6352, "step": 824 }, { "epoch": 0.24, "learning_rate": 0.000979529623950601, "loss": 3.5039, "step": 825 }, { "epoch": 0.24, "learning_rate": 0.0009794631347696122, "loss": 3.3518, "step": 826 }, { "epoch": 0.24, "learning_rate": 0.0009793965400469273, "loss": 3.6784, "step": 827 }, { "epoch": 0.24, "learning_rate": 0.0009793298397972057, "loss": 3.5734, "step": 828 }, { "epoch": 0.24, "learning_rate": 0.0009792630340351301, "loss": 3.5829, "step": 829 }, { "epoch": 0.24, "learning_rate": 0.0009791961227754056, "loss": 3.5014, "step": 830 }, { "epoch": 0.24, "learning_rate": 0.0009791291060327612, "loss": 3.5623, "step": 831 }, { "epoch": 0.24, "learning_rate": 0.000979061983821949, "loss": 3.6146, "step": 832 }, { "epoch": 0.24, "learning_rate": 0.0009789947561577445, "loss": 3.5619, "step": 833 }, { "epoch": 0.24, "learning_rate": 0.0009789274230549456, "loss": 3.4901, "step": 834 }, { "epoch": 0.24, "learning_rate": 0.0009788599845283745, "loss": 3.5296, "step": 835 }, { "epoch": 0.24, "learning_rate": 0.0009787924405928758, "loss": 3.4983, "step": 836 }, { "epoch": 0.24, "learning_rate": 0.0009787247912633177, "loss": 3.4982, "step": 837 }, { "epoch": 0.24, "learning_rate": 0.0009786570365545916, "loss": 3.5671, "step": 838 }, { "epoch": 0.24, "learning_rate": 0.0009785891764816116, "loss": 3.4727, "step": 839 }, { "epoch": 0.24, "learning_rate": 0.0009785212110593158, "loss": 3.4206, "step": 840 }, { "epoch": 0.24, "learning_rate": 0.0009784531403026649, "loss": 3.5059, "step": 841 }, { "epoch": 0.24, "learning_rate": 0.000978384964226643, "loss": 3.6528, "step": 842 }, { "epoch": 0.24, "learning_rate": 0.0009783166828462573, "loss": 3.56, "step": 843 }, { "epoch": 0.24, "learning_rate": 0.0009782482961765383, "loss": 3.4675, "step": 844 }, { "epoch": 0.24, "learning_rate": 0.000978179804232539, "loss": 3.5227, "step": 845 }, { "epoch": 0.25, "learning_rate": 0.0009781112070293373, "loss": 3.5736, "step": 846 }, { "epoch": 0.25, "learning_rate": 0.0009780425045820323, "loss": 3.658, "step": 847 }, { "epoch": 0.25, "learning_rate": 0.0009779736969057473, "loss": 3.6315, "step": 848 }, { "epoch": 0.25, "learning_rate": 0.0009779047840156288, "loss": 3.5286, "step": 849 }, { "epoch": 0.25, "learning_rate": 0.000977835765926846, "loss": 3.5504, "step": 850 }, { "epoch": 0.25, "learning_rate": 0.0009777666426545916, "loss": 3.5619, "step": 851 }, { "epoch": 0.25, "learning_rate": 0.000977697414214081, "loss": 3.4639, "step": 852 }, { "epoch": 0.25, "learning_rate": 0.0009776280806205536, "loss": 3.585, "step": 853 }, { "epoch": 0.25, "learning_rate": 0.000977558641889271, "loss": 3.6519, "step": 854 }, { "epoch": 0.25, "learning_rate": 0.0009774890980355187, "loss": 3.4163, "step": 855 }, { "epoch": 0.25, "learning_rate": 0.000977419449074605, "loss": 3.5057, "step": 856 }, { "epoch": 0.25, "learning_rate": 0.0009773496950218612, "loss": 3.5909, "step": 857 }, { "epoch": 0.25, "learning_rate": 0.000977279835892642, "loss": 3.6187, "step": 858 }, { "epoch": 0.25, "learning_rate": 0.000977209871702325, "loss": 3.5689, "step": 859 }, { "epoch": 0.25, "learning_rate": 0.0009771398024663112, "loss": 3.5449, "step": 860 }, { "epoch": 0.25, "learning_rate": 0.0009770696282000244, "loss": 3.5003, "step": 861 }, { "epoch": 0.25, "learning_rate": 0.0009769993489189119, "loss": 3.5458, "step": 862 }, { "epoch": 0.25, "learning_rate": 0.0009769289646384437, "loss": 3.4318, "step": 863 }, { "epoch": 0.25, "learning_rate": 0.0009768584753741135, "loss": 3.5175, "step": 864 }, { "epoch": 0.25, "learning_rate": 0.0009767878811414372, "loss": 3.5169, "step": 865 }, { "epoch": 0.25, "learning_rate": 0.0009767171819559549, "loss": 3.5585, "step": 866 }, { "epoch": 0.25, "learning_rate": 0.0009766463778332285, "loss": 3.4212, "step": 867 }, { "epoch": 0.25, "learning_rate": 0.0009765754687888445, "loss": 3.5118, "step": 868 }, { "epoch": 0.25, "learning_rate": 0.0009765044548384113, "loss": 3.5994, "step": 869 }, { "epoch": 0.25, "learning_rate": 0.000976433335997561, "loss": 3.4767, "step": 870 }, { "epoch": 0.25, "learning_rate": 0.0009763621122819484, "loss": 3.5178, "step": 871 }, { "epoch": 0.25, "learning_rate": 0.0009762907837072519, "loss": 3.5422, "step": 872 }, { "epoch": 0.25, "learning_rate": 0.0009762193502891725, "loss": 3.4347, "step": 873 }, { "epoch": 0.25, "learning_rate": 0.0009761478120434345, "loss": 3.4119, "step": 874 }, { "epoch": 0.25, "learning_rate": 0.0009760761689857852, "loss": 3.6649, "step": 875 }, { "epoch": 0.25, "learning_rate": 0.0009760044211319952, "loss": 3.6256, "step": 876 }, { "epoch": 0.25, "learning_rate": 0.0009759325684978576, "loss": 3.4753, "step": 877 }, { "epoch": 0.25, "learning_rate": 0.0009758606110991892, "loss": 3.3769, "step": 878 }, { "epoch": 0.25, "learning_rate": 0.0009757885489518296, "loss": 3.5747, "step": 879 }, { "epoch": 0.25, "learning_rate": 0.0009757163820716416, "loss": 3.5021, "step": 880 }, { "epoch": 0.26, "learning_rate": 0.0009756441104745106, "loss": 3.6369, "step": 881 }, { "epoch": 0.26, "learning_rate": 0.0009755717341763455, "loss": 3.5371, "step": 882 }, { "epoch": 0.26, "learning_rate": 0.0009754992531930782, "loss": 3.5898, "step": 883 }, { "epoch": 0.26, "learning_rate": 0.0009754266675406634, "loss": 3.59, "step": 884 }, { "epoch": 0.26, "learning_rate": 0.0009753539772350791, "loss": 3.4418, "step": 885 }, { "epoch": 0.26, "learning_rate": 0.0009752811822923262, "loss": 3.5543, "step": 886 }, { "epoch": 0.26, "learning_rate": 0.0009752082827284288, "loss": 3.6001, "step": 887 }, { "epoch": 0.26, "learning_rate": 0.0009751352785594337, "loss": 3.503, "step": 888 }, { "epoch": 0.26, "learning_rate": 0.0009750621698014111, "loss": 3.4875, "step": 889 }, { "epoch": 0.26, "learning_rate": 0.0009749889564704537, "loss": 3.5998, "step": 890 }, { "epoch": 0.26, "learning_rate": 0.0009749156385826781, "loss": 3.4877, "step": 891 }, { "epoch": 0.26, "learning_rate": 0.0009748422161542228, "loss": 3.4938, "step": 892 }, { "epoch": 0.26, "learning_rate": 0.0009747686892012504, "loss": 3.4152, "step": 893 }, { "epoch": 0.26, "learning_rate": 0.0009746950577399458, "loss": 3.5186, "step": 894 }, { "epoch": 0.26, "learning_rate": 0.000974621321786517, "loss": 3.5221, "step": 895 }, { "epoch": 0.26, "learning_rate": 0.0009745474813571953, "loss": 3.5841, "step": 896 }, { "epoch": 0.26, "learning_rate": 0.0009744735364682345, "loss": 3.7238, "step": 897 }, { "epoch": 0.26, "learning_rate": 0.000974399487135912, "loss": 3.6349, "step": 898 }, { "epoch": 0.26, "learning_rate": 0.0009743253333765279, "loss": 3.5319, "step": 899 }, { "epoch": 0.26, "learning_rate": 0.0009742510752064051, "loss": 3.4815, "step": 900 }, { "epoch": 0.26, "learning_rate": 0.0009741767126418898, "loss": 3.452, "step": 901 }, { "epoch": 0.26, "learning_rate": 0.0009741022456993509, "loss": 3.6086, "step": 902 }, { "epoch": 0.26, "learning_rate": 0.0009740276743951806, "loss": 3.43, "step": 903 }, { "epoch": 0.26, "learning_rate": 0.0009739529987457936, "loss": 3.5281, "step": 904 }, { "epoch": 0.26, "learning_rate": 0.0009738782187676281, "loss": 3.3784, "step": 905 }, { "epoch": 0.26, "learning_rate": 0.000973803334477145, "loss": 3.445, "step": 906 }, { "epoch": 0.26, "learning_rate": 0.0009737283458908281, "loss": 3.6006, "step": 907 }, { "epoch": 0.26, "learning_rate": 0.0009736532530251842, "loss": 3.525, "step": 908 }, { "epoch": 0.26, "learning_rate": 0.0009735780558967434, "loss": 3.5538, "step": 909 }, { "epoch": 0.26, "learning_rate": 0.000973502754522058, "loss": 3.4658, "step": 910 }, { "epoch": 0.26, "learning_rate": 0.0009734273489177041, "loss": 3.6239, "step": 911 }, { "epoch": 0.26, "learning_rate": 0.0009733518391002803, "loss": 3.3687, "step": 912 }, { "epoch": 0.26, "learning_rate": 0.0009732762250864078, "loss": 3.5682, "step": 913 }, { "epoch": 0.26, "learning_rate": 0.0009732005068927314, "loss": 3.526, "step": 914 }, { "epoch": 0.26, "learning_rate": 0.0009731246845359185, "loss": 3.6061, "step": 915 }, { "epoch": 0.27, "learning_rate": 0.0009730487580326594, "loss": 3.4868, "step": 916 }, { "epoch": 0.27, "learning_rate": 0.0009729727273996675, "loss": 3.4171, "step": 917 }, { "epoch": 0.27, "learning_rate": 0.0009728965926536793, "loss": 3.4232, "step": 918 }, { "epoch": 0.27, "learning_rate": 0.0009728203538114533, "loss": 3.288, "step": 919 }, { "epoch": 0.27, "learning_rate": 0.000972744010889772, "loss": 3.5914, "step": 920 }, { "epoch": 0.27, "learning_rate": 0.0009726675639054403, "loss": 3.5839, "step": 921 }, { "epoch": 0.27, "learning_rate": 0.0009725910128752861, "loss": 3.5542, "step": 922 }, { "epoch": 0.27, "learning_rate": 0.00097251435781616, "loss": 3.4124, "step": 923 }, { "epoch": 0.27, "learning_rate": 0.0009724375987449358, "loss": 3.383, "step": 924 }, { "epoch": 0.27, "learning_rate": 0.0009723607356785102, "loss": 3.4318, "step": 925 }, { "epoch": 0.27, "learning_rate": 0.0009722837686338024, "loss": 3.5786, "step": 926 }, { "epoch": 0.27, "learning_rate": 0.0009722066976277548, "loss": 3.371, "step": 927 }, { "epoch": 0.27, "learning_rate": 0.0009721295226773329, "loss": 3.5054, "step": 928 }, { "epoch": 0.27, "learning_rate": 0.0009720522437995245, "loss": 3.5444, "step": 929 }, { "epoch": 0.27, "learning_rate": 0.0009719748610113408, "loss": 3.4402, "step": 930 }, { "epoch": 0.27, "learning_rate": 0.0009718973743298154, "loss": 3.4786, "step": 931 }, { "epoch": 0.27, "learning_rate": 0.0009718197837720053, "loss": 3.5061, "step": 932 }, { "epoch": 0.27, "learning_rate": 0.0009717420893549903, "loss": 3.627, "step": 933 }, { "epoch": 0.27, "learning_rate": 0.0009716642910958721, "loss": 3.6156, "step": 934 }, { "epoch": 0.27, "learning_rate": 0.0009715863890117769, "loss": 3.5177, "step": 935 }, { "epoch": 0.27, "learning_rate": 0.0009715083831198521, "loss": 3.5053, "step": 936 }, { "epoch": 0.27, "learning_rate": 0.0009714302734372694, "loss": 3.4878, "step": 937 }, { "epoch": 0.27, "learning_rate": 0.0009713520599812222, "loss": 3.4091, "step": 938 }, { "epoch": 0.27, "learning_rate": 0.0009712737427689276, "loss": 3.497, "step": 939 }, { "epoch": 0.27, "learning_rate": 0.0009711953218176249, "loss": 3.4804, "step": 940 }, { "epoch": 0.27, "learning_rate": 0.0009711167971445764, "loss": 3.4217, "step": 941 }, { "epoch": 0.27, "learning_rate": 0.0009710381687670675, "loss": 3.5062, "step": 942 }, { "epoch": 0.27, "learning_rate": 0.0009709594367024062, "loss": 3.5981, "step": 943 }, { "epoch": 0.27, "learning_rate": 0.0009708806009679235, "loss": 3.6186, "step": 944 }, { "epoch": 0.27, "learning_rate": 0.0009708016615809729, "loss": 3.4613, "step": 945 }, { "epoch": 0.27, "learning_rate": 0.0009707226185589311, "loss": 3.5123, "step": 946 }, { "epoch": 0.27, "learning_rate": 0.0009706434719191972, "loss": 3.5741, "step": 947 }, { "epoch": 0.27, "learning_rate": 0.0009705642216791935, "loss": 3.4768, "step": 948 }, { "epoch": 0.27, "learning_rate": 0.0009704848678563648, "loss": 3.5631, "step": 949 }, { "epoch": 0.28, "learning_rate": 0.000970405410468179, "loss": 3.5149, "step": 950 }, { "epoch": 0.28, "learning_rate": 0.0009703258495321266, "loss": 3.5053, "step": 951 }, { "epoch": 0.28, "learning_rate": 0.0009702461850657208, "loss": 3.606, "step": 952 }, { "epoch": 0.28, "learning_rate": 0.0009701664170864979, "loss": 3.5679, "step": 953 }, { "epoch": 0.28, "learning_rate": 0.0009700865456120167, "loss": 3.5625, "step": 954 }, { "epoch": 0.28, "learning_rate": 0.0009700065706598588, "loss": 3.5396, "step": 955 }, { "epoch": 0.28, "learning_rate": 0.0009699264922476289, "loss": 3.5066, "step": 956 }, { "epoch": 0.28, "learning_rate": 0.0009698463103929542, "loss": 3.4495, "step": 957 }, { "epoch": 0.28, "learning_rate": 0.0009697660251134846, "loss": 3.5757, "step": 958 }, { "epoch": 0.28, "learning_rate": 0.0009696856364268928, "loss": 3.4091, "step": 959 }, { "epoch": 0.28, "learning_rate": 0.0009696051443508745, "loss": 3.3879, "step": 960 }, { "epoch": 0.28, "learning_rate": 0.0009695245489031478, "loss": 3.5407, "step": 961 }, { "epoch": 0.28, "learning_rate": 0.0009694438501014539, "loss": 3.5848, "step": 962 }, { "epoch": 0.28, "learning_rate": 0.0009693630479635566, "loss": 3.4625, "step": 963 }, { "epoch": 0.28, "learning_rate": 0.0009692821425072427, "loss": 3.4761, "step": 964 }, { "epoch": 0.28, "learning_rate": 0.0009692011337503211, "loss": 3.5363, "step": 965 }, { "epoch": 0.28, "learning_rate": 0.0009691200217106237, "loss": 3.4911, "step": 966 }, { "epoch": 0.28, "learning_rate": 0.0009690388064060059, "loss": 3.5524, "step": 967 }, { "epoch": 0.28, "learning_rate": 0.0009689574878543446, "loss": 3.5366, "step": 968 }, { "epoch": 0.28, "learning_rate": 0.0009688760660735403, "loss": 3.5017, "step": 969 }, { "epoch": 0.28, "learning_rate": 0.0009687945410815158, "loss": 3.4439, "step": 970 }, { "epoch": 0.28, "learning_rate": 0.000968712912896217, "loss": 3.6677, "step": 971 }, { "epoch": 0.28, "learning_rate": 0.0009686311815356121, "loss": 3.5207, "step": 972 }, { "epoch": 0.28, "learning_rate": 0.0009685493470176922, "loss": 3.6109, "step": 973 }, { "epoch": 0.28, "learning_rate": 0.0009684674093604713, "loss": 3.4524, "step": 974 }, { "epoch": 0.28, "learning_rate": 0.0009683853685819856, "loss": 3.4903, "step": 975 }, { "epoch": 0.28, "learning_rate": 0.0009683032247002945, "loss": 3.5888, "step": 976 }, { "epoch": 0.28, "learning_rate": 0.0009682209777334798, "loss": 3.5445, "step": 977 }, { "epoch": 0.28, "learning_rate": 0.0009681386276996462, "loss": 3.5005, "step": 978 }, { "epoch": 0.28, "learning_rate": 0.000968056174616921, "loss": 3.4943, "step": 979 }, { "epoch": 0.28, "learning_rate": 0.0009679736185034539, "loss": 3.5839, "step": 980 }, { "epoch": 0.28, "learning_rate": 0.0009678909593774179, "loss": 3.5211, "step": 981 }, { "epoch": 0.28, "learning_rate": 0.000967808197257008, "loss": 3.3511, "step": 982 }, { "epoch": 0.28, "learning_rate": 0.0009677253321604425, "loss": 3.5873, "step": 983 }, { "epoch": 0.28, "learning_rate": 0.0009676423641059617, "loss": 3.4395, "step": 984 }, { "epoch": 0.29, "learning_rate": 0.0009675592931118293, "loss": 3.6192, "step": 985 }, { "epoch": 0.29, "learning_rate": 0.0009674761191963311, "loss": 3.5369, "step": 986 }, { "epoch": 0.29, "learning_rate": 0.0009673928423777756, "loss": 3.5729, "step": 987 }, { "epoch": 0.29, "learning_rate": 0.0009673094626744943, "loss": 3.5083, "step": 988 }, { "epoch": 0.29, "learning_rate": 0.000967225980104841, "loss": 3.5692, "step": 989 }, { "epoch": 0.29, "learning_rate": 0.0009671423946871924, "loss": 3.5435, "step": 990 }, { "epoch": 0.29, "learning_rate": 0.0009670587064399476, "loss": 3.4407, "step": 991 }, { "epoch": 0.29, "learning_rate": 0.0009669749153815285, "loss": 3.4946, "step": 992 }, { "epoch": 0.29, "learning_rate": 0.0009668910215303797, "loss": 3.4737, "step": 993 }, { "epoch": 0.29, "learning_rate": 0.000966807024904968, "loss": 3.4256, "step": 994 }, { "epoch": 0.29, "learning_rate": 0.0009667229255237835, "loss": 3.4798, "step": 995 }, { "epoch": 0.29, "learning_rate": 0.0009666387234053385, "loss": 3.4783, "step": 996 }, { "epoch": 0.29, "learning_rate": 0.0009665544185681677, "loss": 3.4432, "step": 997 }, { "epoch": 0.29, "learning_rate": 0.0009664700110308287, "loss": 3.4962, "step": 998 }, { "epoch": 0.29, "learning_rate": 0.000966385500811902, "loss": 3.3048, "step": 999 }, { "epoch": 0.29, "learning_rate": 0.0009663008879299902, "loss": 3.5994, "step": 1000 }, { "epoch": 0.29, "learning_rate": 0.0009662161724037187, "loss": 3.5069, "step": 1001 }, { "epoch": 0.29, "learning_rate": 0.0009661313542517354, "loss": 3.5633, "step": 1002 }, { "epoch": 0.29, "learning_rate": 0.0009660464334927112, "loss": 3.5597, "step": 1003 }, { "epoch": 0.29, "learning_rate": 0.0009659614101453389, "loss": 3.4308, "step": 1004 }, { "epoch": 0.29, "learning_rate": 0.0009658762842283342, "loss": 3.5643, "step": 1005 }, { "epoch": 0.29, "learning_rate": 0.0009657910557604358, "loss": 3.45, "step": 1006 }, { "epoch": 0.29, "learning_rate": 0.0009657057247604042, "loss": 3.5006, "step": 1007 }, { "epoch": 0.29, "learning_rate": 0.0009656202912470232, "loss": 3.5166, "step": 1008 }, { "epoch": 0.29, "learning_rate": 0.0009655347552390986, "loss": 3.5072, "step": 1009 }, { "epoch": 0.29, "learning_rate": 0.0009654491167554591, "loss": 3.4849, "step": 1010 }, { "epoch": 0.29, "learning_rate": 0.0009653633758149558, "loss": 3.4952, "step": 1011 }, { "epoch": 0.29, "learning_rate": 0.0009652775324364624, "loss": 3.4285, "step": 1012 }, { "epoch": 0.29, "learning_rate": 0.0009651915866388752, "loss": 3.617, "step": 1013 }, { "epoch": 0.29, "learning_rate": 0.0009651055384411128, "loss": 3.4667, "step": 1014 }, { "epoch": 0.29, "learning_rate": 0.0009650193878621169, "loss": 3.423, "step": 1015 }, { "epoch": 0.29, "learning_rate": 0.000964933134920851, "loss": 3.5756, "step": 1016 }, { "epoch": 0.29, "learning_rate": 0.0009648467796363017, "loss": 3.5222, "step": 1017 }, { "epoch": 0.29, "learning_rate": 0.0009647603220274781, "loss": 3.5495, "step": 1018 }, { "epoch": 0.3, "learning_rate": 0.0009646737621134112, "loss": 3.4985, "step": 1019 }, { "epoch": 0.3, "learning_rate": 0.0009645870999131554, "loss": 3.5168, "step": 1020 }, { "epoch": 0.3, "learning_rate": 0.0009645003354457871, "loss": 3.4323, "step": 1021 }, { "epoch": 0.3, "learning_rate": 0.0009644134687304052, "loss": 3.4265, "step": 1022 }, { "epoch": 0.3, "learning_rate": 0.0009643264997861312, "loss": 3.584, "step": 1023 }, { "epoch": 0.3, "learning_rate": 0.0009642394286321091, "loss": 3.4916, "step": 1024 }, { "epoch": 0.3, "learning_rate": 0.0009641522552875055, "loss": 3.3122, "step": 1025 }, { "epoch": 0.3, "learning_rate": 0.0009640649797715095, "loss": 3.5789, "step": 1026 }, { "epoch": 0.3, "learning_rate": 0.0009639776021033326, "loss": 3.4873, "step": 1027 }, { "epoch": 0.3, "learning_rate": 0.0009638901223022085, "loss": 3.5818, "step": 1028 }, { "epoch": 0.3, "learning_rate": 0.0009638025403873939, "loss": 3.401, "step": 1029 }, { "epoch": 0.3, "learning_rate": 0.0009637148563781677, "loss": 3.6326, "step": 1030 }, { "epoch": 0.3, "learning_rate": 0.0009636270702938313, "loss": 3.641, "step": 1031 }, { "epoch": 0.3, "learning_rate": 0.0009635391821537087, "loss": 3.5108, "step": 1032 }, { "epoch": 0.3, "learning_rate": 0.000963451191977146, "loss": 3.4928, "step": 1033 }, { "epoch": 0.3, "learning_rate": 0.0009633630997835124, "loss": 3.4518, "step": 1034 }, { "epoch": 0.3, "learning_rate": 0.0009632749055921987, "loss": 3.5628, "step": 1035 }, { "epoch": 0.3, "learning_rate": 0.0009631866094226189, "loss": 3.4975, "step": 1036 }, { "epoch": 0.3, "learning_rate": 0.0009630982112942092, "loss": 3.557, "step": 1037 }, { "epoch": 0.3, "learning_rate": 0.0009630097112264283, "loss": 3.4622, "step": 1038 }, { "epoch": 0.3, "learning_rate": 0.0009629211092387569, "loss": 3.6069, "step": 1039 }, { "epoch": 0.3, "learning_rate": 0.0009628324053506988, "loss": 3.43, "step": 1040 }, { "epoch": 0.3, "learning_rate": 0.0009627435995817798, "loss": 3.4051, "step": 1041 }, { "epoch": 0.3, "learning_rate": 0.0009626546919515482, "loss": 3.4932, "step": 1042 }, { "epoch": 0.3, "learning_rate": 0.000962565682479575, "loss": 3.3411, "step": 1043 }, { "epoch": 0.3, "learning_rate": 0.0009624765711854531, "loss": 3.6247, "step": 1044 }, { "epoch": 0.3, "learning_rate": 0.0009623873580887981, "loss": 3.4938, "step": 1045 }, { "epoch": 0.3, "learning_rate": 0.0009622980432092483, "loss": 3.5675, "step": 1046 }, { "epoch": 0.3, "learning_rate": 0.000962208626566464, "loss": 3.4404, "step": 1047 }, { "epoch": 0.3, "learning_rate": 0.0009621191081801277, "loss": 3.4603, "step": 1048 }, { "epoch": 0.3, "learning_rate": 0.0009620294880699449, "loss": 3.5026, "step": 1049 }, { "epoch": 0.3, "learning_rate": 0.0009619397662556434, "loss": 3.4933, "step": 1050 }, { "epoch": 0.3, "learning_rate": 0.0009618499427569729, "loss": 3.423, "step": 1051 }, { "epoch": 0.3, "learning_rate": 0.0009617600175937057, "loss": 3.5696, "step": 1052 }, { "epoch": 0.3, "learning_rate": 0.0009616699907856368, "loss": 3.4475, "step": 1053 }, { "epoch": 0.31, "learning_rate": 0.0009615798623525831, "loss": 3.3406, "step": 1054 }, { "epoch": 0.31, "learning_rate": 0.0009614896323143844, "loss": 3.5829, "step": 1055 }, { "epoch": 0.31, "learning_rate": 0.0009613993006909022, "loss": 3.5474, "step": 1056 }, { "epoch": 0.31, "learning_rate": 0.000961308867502021, "loss": 3.4575, "step": 1057 }, { "epoch": 0.31, "learning_rate": 0.0009612183327676472, "loss": 3.3231, "step": 1058 }, { "epoch": 0.31, "learning_rate": 0.0009611276965077098, "loss": 3.631, "step": 1059 }, { "epoch": 0.31, "learning_rate": 0.0009610369587421603, "loss": 3.4946, "step": 1060 }, { "epoch": 0.31, "learning_rate": 0.0009609461194909719, "loss": 3.4722, "step": 1061 }, { "epoch": 0.31, "learning_rate": 0.0009608551787741408, "loss": 3.3482, "step": 1062 }, { "epoch": 0.31, "learning_rate": 0.0009607641366116855, "loss": 3.4333, "step": 1063 }, { "epoch": 0.31, "learning_rate": 0.0009606729930236463, "loss": 3.3754, "step": 1064 }, { "epoch": 0.31, "learning_rate": 0.0009605817480300863, "loss": 3.3337, "step": 1065 }, { "epoch": 0.31, "learning_rate": 0.0009604904016510907, "loss": 3.4779, "step": 1066 }, { "epoch": 0.31, "learning_rate": 0.0009603989539067673, "loss": 3.5222, "step": 1067 }, { "epoch": 0.31, "learning_rate": 0.0009603074048172458, "loss": 3.6245, "step": 1068 }, { "epoch": 0.31, "learning_rate": 0.0009602157544026784, "loss": 3.5671, "step": 1069 }, { "epoch": 0.31, "learning_rate": 0.0009601240026832398, "loss": 3.457, "step": 1070 }, { "epoch": 0.31, "learning_rate": 0.0009600321496791268, "loss": 3.5599, "step": 1071 }, { "epoch": 0.31, "learning_rate": 0.0009599401954105583, "loss": 3.5857, "step": 1072 }, { "epoch": 0.31, "learning_rate": 0.000959848139897776, "loss": 3.3973, "step": 1073 }, { "epoch": 0.31, "learning_rate": 0.0009597559831610434, "loss": 3.495, "step": 1074 }, { "epoch": 0.31, "learning_rate": 0.0009596637252206466, "loss": 3.508, "step": 1075 }, { "epoch": 0.31, "learning_rate": 0.0009595713660968937, "loss": 3.4039, "step": 1076 }, { "epoch": 0.31, "learning_rate": 0.0009594789058101153, "loss": 3.543, "step": 1077 }, { "epoch": 0.31, "learning_rate": 0.0009593863443806643, "loss": 3.4228, "step": 1078 }, { "epoch": 0.31, "learning_rate": 0.0009592936818289158, "loss": 3.4424, "step": 1079 }, { "epoch": 0.31, "learning_rate": 0.0009592009181752667, "loss": 3.5694, "step": 1080 }, { "epoch": 0.31, "learning_rate": 0.000959108053440137, "loss": 3.4883, "step": 1081 }, { "epoch": 0.31, "learning_rate": 0.0009590150876439685, "loss": 3.4609, "step": 1082 }, { "epoch": 0.31, "learning_rate": 0.000958922020807225, "loss": 3.5559, "step": 1083 }, { "epoch": 0.31, "learning_rate": 0.000958828852950393, "loss": 3.4133, "step": 1084 }, { "epoch": 0.31, "learning_rate": 0.0009587355840939812, "loss": 3.4119, "step": 1085 }, { "epoch": 0.31, "learning_rate": 0.00095864221425852, "loss": 3.5892, "step": 1086 }, { "epoch": 0.31, "learning_rate": 0.0009585487434645627, "loss": 3.477, "step": 1087 }, { "epoch": 0.32, "learning_rate": 0.0009584551717326846, "loss": 3.4365, "step": 1088 }, { "epoch": 0.32, "learning_rate": 0.0009583614990834828, "loss": 3.5156, "step": 1089 }, { "epoch": 0.32, "learning_rate": 0.0009582677255375773, "loss": 3.5673, "step": 1090 }, { "epoch": 0.32, "learning_rate": 0.0009581738511156099, "loss": 3.5105, "step": 1091 }, { "epoch": 0.32, "learning_rate": 0.0009580798758382445, "loss": 3.4694, "step": 1092 }, { "epoch": 0.32, "learning_rate": 0.0009579857997261676, "loss": 3.5583, "step": 1093 }, { "epoch": 0.32, "learning_rate": 0.0009578916228000875, "loss": 3.5915, "step": 1094 }, { "epoch": 0.32, "learning_rate": 0.0009577973450807352, "loss": 3.4807, "step": 1095 }, { "epoch": 0.32, "learning_rate": 0.0009577029665888631, "loss": 3.5021, "step": 1096 }, { "epoch": 0.32, "learning_rate": 0.0009576084873452465, "loss": 3.487, "step": 1097 }, { "epoch": 0.32, "learning_rate": 0.0009575139073706827, "loss": 3.5604, "step": 1098 }, { "epoch": 0.32, "learning_rate": 0.0009574192266859909, "loss": 3.4013, "step": 1099 }, { "epoch": 0.32, "learning_rate": 0.0009573244453120127, "loss": 3.3565, "step": 1100 }, { "epoch": 0.32, "learning_rate": 0.0009572295632696119, "loss": 3.5115, "step": 1101 }, { "epoch": 0.32, "learning_rate": 0.0009571345805796744, "loss": 3.3657, "step": 1102 }, { "epoch": 0.32, "learning_rate": 0.0009570394972631081, "loss": 3.5658, "step": 1103 }, { "epoch": 0.32, "learning_rate": 0.0009569443133408433, "loss": 3.4801, "step": 1104 }, { "epoch": 0.32, "learning_rate": 0.0009568490288338324, "loss": 3.4802, "step": 1105 }, { "epoch": 0.32, "learning_rate": 0.0009567536437630498, "loss": 3.5203, "step": 1106 }, { "epoch": 0.32, "learning_rate": 0.000956658158149492, "loss": 3.3927, "step": 1107 }, { "epoch": 0.32, "learning_rate": 0.0009565625720141779, "loss": 3.5339, "step": 1108 }, { "epoch": 0.32, "learning_rate": 0.0009564668853781482, "loss": 3.4401, "step": 1109 }, { "epoch": 0.32, "learning_rate": 0.0009563710982624661, "loss": 3.3405, "step": 1110 }, { "epoch": 0.32, "learning_rate": 0.0009562752106882167, "loss": 3.5112, "step": 1111 }, { "epoch": 0.32, "learning_rate": 0.0009561792226765072, "loss": 3.4346, "step": 1112 }, { "epoch": 0.32, "learning_rate": 0.0009560831342484667, "loss": 3.542, "step": 1113 }, { "epoch": 0.32, "learning_rate": 0.000955986945425247, "loss": 3.4283, "step": 1114 }, { "epoch": 0.32, "learning_rate": 0.0009558906562280214, "loss": 3.4636, "step": 1115 }, { "epoch": 0.32, "learning_rate": 0.0009557942666779855, "loss": 3.4943, "step": 1116 }, { "epoch": 0.32, "learning_rate": 0.0009556977767963573, "loss": 3.3695, "step": 1117 }, { "epoch": 0.32, "learning_rate": 0.0009556011866043763, "loss": 3.298, "step": 1118 }, { "epoch": 0.32, "learning_rate": 0.0009555044961233047, "loss": 3.4682, "step": 1119 }, { "epoch": 0.32, "learning_rate": 0.0009554077053744262, "loss": 3.6107, "step": 1120 }, { "epoch": 0.32, "learning_rate": 0.000955310814379047, "loss": 3.4354, "step": 1121 }, { "epoch": 0.32, "learning_rate": 0.000955213823158495, "loss": 3.4328, "step": 1122 }, { "epoch": 0.33, "learning_rate": 0.0009551167317341208, "loss": 3.4753, "step": 1123 }, { "epoch": 0.33, "learning_rate": 0.000955019540127296, "loss": 3.3754, "step": 1124 }, { "epoch": 0.33, "learning_rate": 0.0009549222483594154, "loss": 3.5392, "step": 1125 }, { "epoch": 0.33, "learning_rate": 0.000954824856451895, "loss": 3.3951, "step": 1126 }, { "epoch": 0.33, "learning_rate": 0.0009547273644261733, "loss": 3.4474, "step": 1127 }, { "epoch": 0.33, "learning_rate": 0.0009546297723037108, "loss": 3.585, "step": 1128 }, { "epoch": 0.33, "learning_rate": 0.0009545320801059899, "loss": 3.4031, "step": 1129 }, { "epoch": 0.33, "learning_rate": 0.000954434287854515, "loss": 3.4119, "step": 1130 }, { "epoch": 0.33, "learning_rate": 0.0009543363955708124, "loss": 3.4096, "step": 1131 }, { "epoch": 0.33, "learning_rate": 0.0009542384032764311, "loss": 3.3013, "step": 1132 }, { "epoch": 0.33, "learning_rate": 0.0009541403109929412, "loss": 3.3416, "step": 1133 }, { "epoch": 0.33, "learning_rate": 0.0009540421187419354, "loss": 3.3955, "step": 1134 }, { "epoch": 0.33, "learning_rate": 0.0009539438265450282, "loss": 3.4547, "step": 1135 }, { "epoch": 0.33, "learning_rate": 0.0009538454344238563, "loss": 3.5079, "step": 1136 }, { "epoch": 0.33, "learning_rate": 0.000953746942400078, "loss": 3.4254, "step": 1137 }, { "epoch": 0.33, "learning_rate": 0.000953648350495374, "loss": 3.3355, "step": 1138 }, { "epoch": 0.33, "learning_rate": 0.0009535496587314467, "loss": 3.5066, "step": 1139 }, { "epoch": 0.33, "learning_rate": 0.0009534508671300208, "loss": 3.5104, "step": 1140 }, { "epoch": 0.33, "learning_rate": 0.0009533519757128426, "loss": 3.5015, "step": 1141 }, { "epoch": 0.33, "learning_rate": 0.0009532529845016804, "loss": 3.5309, "step": 1142 }, { "epoch": 0.33, "learning_rate": 0.0009531538935183251, "loss": 3.5318, "step": 1143 }, { "epoch": 0.33, "learning_rate": 0.0009530547027845885, "loss": 3.5455, "step": 1144 }, { "epoch": 0.33, "learning_rate": 0.0009529554123223053, "loss": 3.5305, "step": 1145 }, { "epoch": 0.33, "learning_rate": 0.0009528560221533318, "loss": 3.4937, "step": 1146 }, { "epoch": 0.33, "learning_rate": 0.0009527565322995459, "loss": 3.5614, "step": 1147 }, { "epoch": 0.33, "learning_rate": 0.0009526569427828482, "loss": 3.3454, "step": 1148 }, { "epoch": 0.33, "learning_rate": 0.0009525572536251606, "loss": 3.6448, "step": 1149 }, { "epoch": 0.33, "learning_rate": 0.0009524574648484272, "loss": 3.6655, "step": 1150 }, { "epoch": 0.33, "learning_rate": 0.0009523575764746138, "loss": 3.4615, "step": 1151 }, { "epoch": 0.33, "learning_rate": 0.0009522575885257085, "loss": 3.4586, "step": 1152 }, { "epoch": 0.33, "learning_rate": 0.0009521575010237213, "loss": 3.4774, "step": 1153 }, { "epoch": 0.33, "learning_rate": 0.0009520573139906835, "loss": 3.3822, "step": 1154 }, { "epoch": 0.33, "learning_rate": 0.000951957027448649, "loss": 3.3402, "step": 1155 }, { "epoch": 0.33, "learning_rate": 0.0009518566414196933, "loss": 3.5972, "step": 1156 }, { "epoch": 0.34, "learning_rate": 0.0009517561559259138, "loss": 3.4594, "step": 1157 }, { "epoch": 0.34, "learning_rate": 0.0009516555709894299, "loss": 3.3494, "step": 1158 }, { "epoch": 0.34, "learning_rate": 0.0009515548866323829, "loss": 3.4584, "step": 1159 }, { "epoch": 0.34, "learning_rate": 0.0009514541028769359, "loss": 3.4955, "step": 1160 }, { "epoch": 0.34, "learning_rate": 0.0009513532197452737, "loss": 3.442, "step": 1161 }, { "epoch": 0.34, "learning_rate": 0.0009512522372596033, "loss": 3.4369, "step": 1162 }, { "epoch": 0.34, "learning_rate": 0.0009511511554421536, "loss": 3.3733, "step": 1163 }, { "epoch": 0.34, "learning_rate": 0.000951049974315175, "loss": 3.5298, "step": 1164 }, { "epoch": 0.34, "learning_rate": 0.0009509486939009399, "loss": 3.406, "step": 1165 }, { "epoch": 0.34, "learning_rate": 0.0009508473142217429, "loss": 3.4008, "step": 1166 }, { "epoch": 0.34, "learning_rate": 0.0009507458352999001, "loss": 3.5708, "step": 1167 }, { "epoch": 0.34, "learning_rate": 0.0009506442571577494, "loss": 3.401, "step": 1168 }, { "epoch": 0.34, "learning_rate": 0.0009505425798176506, "loss": 3.5604, "step": 1169 }, { "epoch": 0.34, "learning_rate": 0.0009504408033019856, "loss": 3.4011, "step": 1170 }, { "epoch": 0.34, "learning_rate": 0.0009503389276331578, "loss": 3.4842, "step": 1171 }, { "epoch": 0.34, "learning_rate": 0.0009502369528335925, "loss": 3.4707, "step": 1172 }, { "epoch": 0.34, "learning_rate": 0.0009501348789257373, "loss": 3.426, "step": 1173 }, { "epoch": 0.34, "learning_rate": 0.0009500327059320606, "loss": 3.4284, "step": 1174 }, { "epoch": 0.34, "learning_rate": 0.0009499304338750535, "loss": 3.4284, "step": 1175 }, { "epoch": 0.34, "learning_rate": 0.0009498280627772286, "loss": 3.3262, "step": 1176 }, { "epoch": 0.34, "learning_rate": 0.0009497255926611204, "loss": 3.4692, "step": 1177 }, { "epoch": 0.34, "learning_rate": 0.0009496230235492848, "loss": 3.6114, "step": 1178 }, { "epoch": 0.34, "learning_rate": 0.0009495203554643002, "loss": 3.506, "step": 1179 }, { "epoch": 0.34, "learning_rate": 0.0009494175884287659, "loss": 3.4448, "step": 1180 }, { "epoch": 0.34, "learning_rate": 0.000949314722465304, "loss": 3.4518, "step": 1181 }, { "epoch": 0.34, "learning_rate": 0.0009492117575965576, "loss": 3.452, "step": 1182 }, { "epoch": 0.34, "learning_rate": 0.0009491086938451916, "loss": 3.4753, "step": 1183 }, { "epoch": 0.34, "learning_rate": 0.0009490055312338932, "loss": 3.4168, "step": 1184 }, { "epoch": 0.34, "learning_rate": 0.0009489022697853709, "loss": 3.4701, "step": 1185 }, { "epoch": 0.34, "learning_rate": 0.000948798909522355, "loss": 3.3463, "step": 1186 }, { "epoch": 0.34, "learning_rate": 0.000948695450467598, "loss": 3.4065, "step": 1187 }, { "epoch": 0.34, "learning_rate": 0.0009485918926438735, "loss": 3.648, "step": 1188 }, { "epoch": 0.34, "learning_rate": 0.0009484882360739771, "loss": 3.337, "step": 1189 }, { "epoch": 0.34, "learning_rate": 0.0009483844807807265, "loss": 3.5844, "step": 1190 }, { "epoch": 0.34, "learning_rate": 0.0009482806267869604, "loss": 3.5108, "step": 1191 }, { "epoch": 0.35, "learning_rate": 0.00094817667411554, "loss": 3.5523, "step": 1192 }, { "epoch": 0.35, "learning_rate": 0.0009480726227893474, "loss": 3.4023, "step": 1193 }, { "epoch": 0.35, "learning_rate": 0.0009479684728312873, "loss": 3.4591, "step": 1194 }, { "epoch": 0.35, "learning_rate": 0.0009478642242642856, "loss": 3.4727, "step": 1195 }, { "epoch": 0.35, "learning_rate": 0.00094775987711129, "loss": 3.5113, "step": 1196 }, { "epoch": 0.35, "learning_rate": 0.0009476554313952697, "loss": 3.527, "step": 1197 }, { "epoch": 0.35, "learning_rate": 0.0009475508871392157, "loss": 3.4104, "step": 1198 }, { "epoch": 0.35, "learning_rate": 0.0009474462443661414, "loss": 3.5622, "step": 1199 }, { "epoch": 0.35, "learning_rate": 0.0009473415030990805, "loss": 3.4857, "step": 1200 }, { "epoch": 0.35, "learning_rate": 0.0009472366633610896, "loss": 3.3896, "step": 1201 }, { "epoch": 0.35, "learning_rate": 0.0009471317251752464, "loss": 3.4251, "step": 1202 }, { "epoch": 0.35, "learning_rate": 0.0009470266885646504, "loss": 3.5522, "step": 1203 }, { "epoch": 0.35, "learning_rate": 0.0009469215535524227, "loss": 3.3946, "step": 1204 }, { "epoch": 0.35, "learning_rate": 0.0009468163201617061, "loss": 3.3417, "step": 1205 }, { "epoch": 0.35, "learning_rate": 0.0009467109884156653, "loss": 3.5147, "step": 1206 }, { "epoch": 0.35, "learning_rate": 0.0009466055583374862, "loss": 3.3857, "step": 1207 }, { "epoch": 0.35, "learning_rate": 0.0009465000299503766, "loss": 3.4718, "step": 1208 }, { "epoch": 0.35, "learning_rate": 0.0009463944032775659, "loss": 3.3237, "step": 1209 }, { "epoch": 0.35, "learning_rate": 0.0009462886783423052, "loss": 3.5157, "step": 1210 }, { "epoch": 0.35, "learning_rate": 0.000946182855167867, "loss": 3.4439, "step": 1211 }, { "epoch": 0.35, "learning_rate": 0.000946076933777546, "loss": 3.3567, "step": 1212 }, { "epoch": 0.35, "learning_rate": 0.0009459709141946578, "loss": 3.3312, "step": 1213 }, { "epoch": 0.35, "learning_rate": 0.00094586479644254, "loss": 3.3966, "step": 1214 }, { "epoch": 0.35, "learning_rate": 0.0009457585805445517, "loss": 3.3479, "step": 1215 }, { "epoch": 0.35, "learning_rate": 0.0009456522665240737, "loss": 3.5239, "step": 1216 }, { "epoch": 0.35, "learning_rate": 0.0009455458544045084, "loss": 3.4368, "step": 1217 }, { "epoch": 0.35, "learning_rate": 0.0009454393442092795, "loss": 3.4658, "step": 1218 }, { "epoch": 0.35, "learning_rate": 0.0009453327359618329, "loss": 3.4498, "step": 1219 }, { "epoch": 0.35, "learning_rate": 0.0009452260296856354, "loss": 3.5219, "step": 1220 }, { "epoch": 0.35, "learning_rate": 0.0009451192254041758, "loss": 3.3854, "step": 1221 }, { "epoch": 0.35, "learning_rate": 0.0009450123231409645, "loss": 3.5392, "step": 1222 }, { "epoch": 0.35, "learning_rate": 0.000944905322919533, "loss": 3.4415, "step": 1223 }, { "epoch": 0.35, "learning_rate": 0.000944798224763435, "loss": 3.4649, "step": 1224 }, { "epoch": 0.35, "learning_rate": 0.0009446910286962454, "loss": 3.4086, "step": 1225 }, { "epoch": 0.36, "learning_rate": 0.0009445837347415607, "loss": 3.5799, "step": 1226 }, { "epoch": 0.36, "learning_rate": 0.0009444763429229988, "loss": 3.4461, "step": 1227 }, { "epoch": 0.36, "learning_rate": 0.0009443688532641994, "loss": 3.4643, "step": 1228 }, { "epoch": 0.36, "learning_rate": 0.0009442612657888236, "loss": 3.4334, "step": 1229 }, { "epoch": 0.36, "learning_rate": 0.0009441535805205541, "loss": 3.4699, "step": 1230 }, { "epoch": 0.36, "learning_rate": 0.0009440457974830952, "loss": 3.4442, "step": 1231 }, { "epoch": 0.36, "learning_rate": 0.0009439379167001725, "loss": 3.4856, "step": 1232 }, { "epoch": 0.36, "learning_rate": 0.0009438299381955332, "loss": 3.5447, "step": 1233 }, { "epoch": 0.36, "learning_rate": 0.0009437218619929461, "loss": 3.5375, "step": 1234 }, { "epoch": 0.36, "learning_rate": 0.0009436136881162014, "loss": 3.4859, "step": 1235 }, { "epoch": 0.36, "learning_rate": 0.0009435054165891108, "loss": 3.5139, "step": 1236 }, { "epoch": 0.36, "learning_rate": 0.0009433970474355078, "loss": 3.431, "step": 1237 }, { "epoch": 0.36, "learning_rate": 0.0009432885806792468, "loss": 3.5373, "step": 1238 }, { "epoch": 0.36, "learning_rate": 0.0009431800163442042, "loss": 3.5694, "step": 1239 }, { "epoch": 0.36, "learning_rate": 0.0009430713544542775, "loss": 3.5188, "step": 1240 }, { "epoch": 0.36, "learning_rate": 0.0009429625950333863, "loss": 3.4209, "step": 1241 }, { "epoch": 0.36, "learning_rate": 0.0009428537381054707, "loss": 3.5914, "step": 1242 }, { "epoch": 0.36, "learning_rate": 0.0009427447836944931, "loss": 3.4998, "step": 1243 }, { "epoch": 0.36, "learning_rate": 0.0009426357318244371, "loss": 3.375, "step": 1244 }, { "epoch": 0.36, "learning_rate": 0.0009425265825193077, "loss": 3.5494, "step": 1245 }, { "epoch": 0.36, "learning_rate": 0.0009424173358031313, "loss": 3.4496, "step": 1246 }, { "epoch": 0.36, "learning_rate": 0.0009423079916999557, "loss": 3.4364, "step": 1247 }, { "epoch": 0.36, "learning_rate": 0.0009421985502338503, "loss": 3.3708, "step": 1248 }, { "epoch": 0.36, "learning_rate": 0.000942089011428906, "loss": 3.4589, "step": 1249 }, { "epoch": 0.36, "learning_rate": 0.000941979375309235, "loss": 3.5386, "step": 1250 }, { "epoch": 0.36, "learning_rate": 0.0009418696418989707, "loss": 3.3466, "step": 1251 }, { "epoch": 0.36, "learning_rate": 0.0009417598112222684, "loss": 3.4504, "step": 1252 }, { "epoch": 0.36, "learning_rate": 0.0009416498833033043, "loss": 3.4788, "step": 1253 }, { "epoch": 0.36, "learning_rate": 0.0009415398581662766, "loss": 3.5419, "step": 1254 }, { "epoch": 0.36, "learning_rate": 0.0009414297358354041, "loss": 3.5052, "step": 1255 }, { "epoch": 0.36, "learning_rate": 0.0009413195163349278, "loss": 3.5594, "step": 1256 }, { "epoch": 0.36, "learning_rate": 0.0009412091996891096, "loss": 3.4582, "step": 1257 }, { "epoch": 0.36, "learning_rate": 0.0009410987859222328, "loss": 3.5344, "step": 1258 }, { "epoch": 0.36, "learning_rate": 0.0009409882750586027, "loss": 3.4305, "step": 1259 }, { "epoch": 0.36, "learning_rate": 0.0009408776671225448, "loss": 3.3858, "step": 1260 }, { "epoch": 0.37, "learning_rate": 0.0009407669621384071, "loss": 3.4205, "step": 1261 }, { "epoch": 0.37, "learning_rate": 0.0009406561601305583, "loss": 3.3421, "step": 1262 }, { "epoch": 0.37, "learning_rate": 0.0009405452611233888, "loss": 3.4428, "step": 1263 }, { "epoch": 0.37, "learning_rate": 0.0009404342651413102, "loss": 3.4048, "step": 1264 }, { "epoch": 0.37, "learning_rate": 0.0009403231722087552, "loss": 3.4089, "step": 1265 }, { "epoch": 0.37, "learning_rate": 0.0009402119823501786, "loss": 3.533, "step": 1266 }, { "epoch": 0.37, "learning_rate": 0.0009401006955900555, "loss": 3.4595, "step": 1267 }, { "epoch": 0.37, "learning_rate": 0.0009399893119528832, "loss": 3.5276, "step": 1268 }, { "epoch": 0.37, "learning_rate": 0.0009398778314631801, "loss": 3.4688, "step": 1269 }, { "epoch": 0.37, "learning_rate": 0.0009397662541454854, "loss": 3.4219, "step": 1270 }, { "epoch": 0.37, "learning_rate": 0.0009396545800243603, "loss": 3.515, "step": 1271 }, { "epoch": 0.37, "learning_rate": 0.000939542809124387, "loss": 3.5304, "step": 1272 }, { "epoch": 0.37, "learning_rate": 0.0009394309414701691, "loss": 3.4831, "step": 1273 }, { "epoch": 0.37, "learning_rate": 0.0009393189770863313, "loss": 3.3917, "step": 1274 }, { "epoch": 0.37, "learning_rate": 0.0009392069159975199, "loss": 3.384, "step": 1275 }, { "epoch": 0.37, "learning_rate": 0.000939094758228402, "loss": 3.4211, "step": 1276 }, { "epoch": 0.37, "learning_rate": 0.0009389825038036665, "loss": 3.3417, "step": 1277 }, { "epoch": 0.37, "learning_rate": 0.0009388701527480237, "loss": 3.3621, "step": 1278 }, { "epoch": 0.37, "learning_rate": 0.0009387577050862043, "loss": 3.521, "step": 1279 }, { "epoch": 0.37, "learning_rate": 0.000938645160842961, "loss": 3.4802, "step": 1280 }, { "epoch": 0.37, "learning_rate": 0.0009385325200430678, "loss": 3.3666, "step": 1281 }, { "epoch": 0.37, "learning_rate": 0.0009384197827113195, "loss": 3.4192, "step": 1282 }, { "epoch": 0.37, "learning_rate": 0.0009383069488725323, "loss": 3.373, "step": 1283 }, { "epoch": 0.37, "learning_rate": 0.0009381940185515439, "loss": 3.4617, "step": 1284 }, { "epoch": 0.37, "learning_rate": 0.0009380809917732131, "loss": 3.4004, "step": 1285 }, { "epoch": 0.37, "learning_rate": 0.0009379678685624196, "loss": 3.3708, "step": 1286 }, { "epoch": 0.37, "learning_rate": 0.000937854648944065, "loss": 3.3608, "step": 1287 }, { "epoch": 0.37, "learning_rate": 0.0009377413329430715, "loss": 3.2966, "step": 1288 }, { "epoch": 0.37, "learning_rate": 0.0009376279205843827, "loss": 3.3409, "step": 1289 }, { "epoch": 0.37, "learning_rate": 0.0009375144118929637, "loss": 3.532, "step": 1290 }, { "epoch": 0.37, "learning_rate": 0.0009374008068938003, "loss": 3.3268, "step": 1291 }, { "epoch": 0.37, "learning_rate": 0.0009372871056118998, "loss": 3.4235, "step": 1292 }, { "epoch": 0.37, "learning_rate": 0.0009371733080722911, "loss": 3.2505, "step": 1293 }, { "epoch": 0.37, "learning_rate": 0.0009370594143000233, "loss": 3.3253, "step": 1294 }, { "epoch": 0.38, "learning_rate": 0.0009369454243201676, "loss": 3.4299, "step": 1295 }, { "epoch": 0.38, "learning_rate": 0.0009368313381578157, "loss": 3.4081, "step": 1296 }, { "epoch": 0.38, "learning_rate": 0.0009367171558380812, "loss": 3.4095, "step": 1297 }, { "epoch": 0.38, "learning_rate": 0.0009366028773860979, "loss": 3.4344, "step": 1298 }, { "epoch": 0.38, "learning_rate": 0.0009364885028270219, "loss": 3.4723, "step": 1299 }, { "epoch": 0.38, "learning_rate": 0.0009363740321860294, "loss": 3.5192, "step": 1300 }, { "epoch": 0.38, "learning_rate": 0.0009362594654883185, "loss": 3.4867, "step": 1301 }, { "epoch": 0.38, "learning_rate": 0.000936144802759108, "loss": 3.5584, "step": 1302 }, { "epoch": 0.38, "learning_rate": 0.0009360300440236383, "loss": 3.4309, "step": 1303 }, { "epoch": 0.38, "learning_rate": 0.0009359151893071701, "loss": 3.4978, "step": 1304 }, { "epoch": 0.38, "learning_rate": 0.0009358002386349862, "loss": 3.2717, "step": 1305 }, { "epoch": 0.38, "learning_rate": 0.0009356851920323901, "loss": 3.3344, "step": 1306 }, { "epoch": 0.38, "learning_rate": 0.0009355700495247061, "loss": 3.5188, "step": 1307 }, { "epoch": 0.38, "learning_rate": 0.0009354548111372802, "loss": 3.4985, "step": 1308 }, { "epoch": 0.38, "learning_rate": 0.000935339476895479, "loss": 3.4764, "step": 1309 }, { "epoch": 0.38, "learning_rate": 0.0009352240468246907, "loss": 3.29, "step": 1310 }, { "epoch": 0.38, "learning_rate": 0.000935108520950324, "loss": 3.3709, "step": 1311 }, { "epoch": 0.38, "learning_rate": 0.0009349928992978093, "loss": 3.4172, "step": 1312 }, { "epoch": 0.38, "learning_rate": 0.0009348771818925974, "loss": 3.3744, "step": 1313 }, { "epoch": 0.38, "learning_rate": 0.0009347613687601611, "loss": 3.4147, "step": 1314 }, { "epoch": 0.38, "learning_rate": 0.0009346454599259931, "loss": 3.421, "step": 1315 }, { "epoch": 0.38, "learning_rate": 0.0009345294554156084, "loss": 3.4694, "step": 1316 }, { "epoch": 0.38, "learning_rate": 0.000934413355254542, "loss": 3.4997, "step": 1317 }, { "epoch": 0.38, "learning_rate": 0.0009342971594683506, "loss": 3.3398, "step": 1318 }, { "epoch": 0.38, "learning_rate": 0.0009341808680826118, "loss": 3.4477, "step": 1319 }, { "epoch": 0.38, "learning_rate": 0.0009340644811229242, "loss": 3.5037, "step": 1320 }, { "epoch": 0.38, "learning_rate": 0.0009339479986149075, "loss": 3.5472, "step": 1321 }, { "epoch": 0.38, "learning_rate": 0.0009338314205842022, "loss": 3.469, "step": 1322 }, { "epoch": 0.38, "learning_rate": 0.0009337147470564702, "loss": 3.3621, "step": 1323 }, { "epoch": 0.38, "learning_rate": 0.0009335979780573941, "loss": 3.3787, "step": 1324 }, { "epoch": 0.38, "learning_rate": 0.0009334811136126778, "loss": 3.4651, "step": 1325 }, { "epoch": 0.38, "learning_rate": 0.0009333641537480459, "loss": 3.4207, "step": 1326 }, { "epoch": 0.38, "learning_rate": 0.000933247098489244, "loss": 3.3333, "step": 1327 }, { "epoch": 0.38, "learning_rate": 0.0009331299478620394, "loss": 3.427, "step": 1328 }, { "epoch": 0.38, "learning_rate": 0.0009330127018922195, "loss": 3.5332, "step": 1329 }, { "epoch": 0.39, "learning_rate": 0.0009328953606055928, "loss": 3.4831, "step": 1330 }, { "epoch": 0.39, "learning_rate": 0.0009327779240279894, "loss": 3.3182, "step": 1331 }, { "epoch": 0.39, "learning_rate": 0.0009326603921852599, "loss": 3.4366, "step": 1332 }, { "epoch": 0.39, "learning_rate": 0.0009325427651032758, "loss": 3.4651, "step": 1333 }, { "epoch": 0.39, "learning_rate": 0.0009324250428079301, "loss": 3.4332, "step": 1334 }, { "epoch": 0.39, "learning_rate": 0.000932307225325136, "loss": 3.3028, "step": 1335 }, { "epoch": 0.39, "learning_rate": 0.0009321893126808283, "loss": 3.4168, "step": 1336 }, { "epoch": 0.39, "learning_rate": 0.0009320713049009624, "loss": 3.4024, "step": 1337 }, { "epoch": 0.39, "learning_rate": 0.0009319532020115146, "loss": 3.2594, "step": 1338 }, { "epoch": 0.39, "learning_rate": 0.0009318350040384824, "loss": 3.3419, "step": 1339 }, { "epoch": 0.39, "learning_rate": 0.0009317167110078842, "loss": 3.3499, "step": 1340 }, { "epoch": 0.39, "learning_rate": 0.000931598322945759, "loss": 3.4765, "step": 1341 }, { "epoch": 0.39, "learning_rate": 0.000931479839878167, "loss": 3.4306, "step": 1342 }, { "epoch": 0.39, "learning_rate": 0.0009313612618311896, "loss": 3.5119, "step": 1343 }, { "epoch": 0.39, "learning_rate": 0.0009312425888309281, "loss": 3.4875, "step": 1344 }, { "epoch": 0.39, "learning_rate": 0.000931123820903506, "loss": 3.5151, "step": 1345 }, { "epoch": 0.39, "learning_rate": 0.0009310049580750666, "loss": 3.4533, "step": 1346 }, { "epoch": 0.39, "learning_rate": 0.0009308860003717749, "loss": 3.4276, "step": 1347 }, { "epoch": 0.39, "learning_rate": 0.0009307669478198162, "loss": 3.444, "step": 1348 }, { "epoch": 0.39, "learning_rate": 0.0009306478004453969, "loss": 3.4755, "step": 1349 }, { "epoch": 0.39, "learning_rate": 0.0009305285582747444, "loss": 3.4241, "step": 1350 }, { "epoch": 0.39, "learning_rate": 0.0009304092213341068, "loss": 3.3772, "step": 1351 }, { "epoch": 0.39, "learning_rate": 0.0009302897896497532, "loss": 3.3596, "step": 1352 }, { "epoch": 0.39, "learning_rate": 0.0009301702632479734, "loss": 3.5062, "step": 1353 }, { "epoch": 0.39, "learning_rate": 0.0009300506421550781, "loss": 3.3361, "step": 1354 }, { "epoch": 0.39, "learning_rate": 0.0009299309263973987, "loss": 3.5178, "step": 1355 }, { "epoch": 0.39, "learning_rate": 0.000929811116001288, "loss": 3.346, "step": 1356 }, { "epoch": 0.39, "learning_rate": 0.0009296912109931188, "loss": 3.4259, "step": 1357 }, { "epoch": 0.39, "learning_rate": 0.0009295712113992856, "loss": 3.5577, "step": 1358 }, { "epoch": 0.39, "learning_rate": 0.000929451117246203, "loss": 3.2834, "step": 1359 }, { "epoch": 0.39, "learning_rate": 0.0009293309285603067, "loss": 3.4307, "step": 1360 }, { "epoch": 0.39, "learning_rate": 0.0009292106453680531, "loss": 3.2813, "step": 1361 }, { "epoch": 0.39, "learning_rate": 0.0009290902676959197, "loss": 3.2926, "step": 1362 }, { "epoch": 0.39, "learning_rate": 0.0009289697955704047, "loss": 3.3531, "step": 1363 }, { "epoch": 0.4, "learning_rate": 0.0009288492290180266, "loss": 3.4795, "step": 1364 }, { "epoch": 0.4, "learning_rate": 0.0009287285680653254, "loss": 3.399, "step": 1365 }, { "epoch": 0.4, "learning_rate": 0.0009286078127388617, "loss": 3.3518, "step": 1366 }, { "epoch": 0.4, "learning_rate": 0.0009284869630652164, "loss": 3.4574, "step": 1367 }, { "epoch": 0.4, "learning_rate": 0.0009283660190709914, "loss": 3.5092, "step": 1368 }, { "epoch": 0.4, "learning_rate": 0.00092824498078281, "loss": 3.5326, "step": 1369 }, { "epoch": 0.4, "learning_rate": 0.0009281238482273153, "loss": 3.3758, "step": 1370 }, { "epoch": 0.4, "learning_rate": 0.0009280026214311716, "loss": 3.402, "step": 1371 }, { "epoch": 0.4, "learning_rate": 0.000927881300421064, "loss": 3.3841, "step": 1372 }, { "epoch": 0.4, "learning_rate": 0.0009277598852236984, "loss": 3.4685, "step": 1373 }, { "epoch": 0.4, "learning_rate": 0.000927638375865801, "loss": 3.3931, "step": 1374 }, { "epoch": 0.4, "learning_rate": 0.0009275167723741193, "loss": 3.4629, "step": 1375 }, { "epoch": 0.4, "learning_rate": 0.0009273950747754209, "loss": 3.5442, "step": 1376 }, { "epoch": 0.4, "learning_rate": 0.0009272732830964948, "loss": 3.3796, "step": 1377 }, { "epoch": 0.4, "learning_rate": 0.0009271513973641501, "loss": 3.4243, "step": 1378 }, { "epoch": 0.4, "learning_rate": 0.000927029417605217, "loss": 3.333, "step": 1379 }, { "epoch": 0.4, "learning_rate": 0.0009269073438465462, "loss": 3.3831, "step": 1380 }, { "epoch": 0.4, "learning_rate": 0.0009267851761150093, "loss": 3.371, "step": 1381 }, { "epoch": 0.4, "learning_rate": 0.000926662914437498, "loss": 3.3735, "step": 1382 }, { "epoch": 0.4, "learning_rate": 0.0009265405588409257, "loss": 3.5322, "step": 1383 }, { "epoch": 0.4, "learning_rate": 0.0009264181093522257, "loss": 3.4954, "step": 1384 }, { "epoch": 0.4, "learning_rate": 0.0009262955659983521, "loss": 3.3605, "step": 1385 }, { "epoch": 0.4, "learning_rate": 0.0009261729288062795, "loss": 3.4737, "step": 1386 }, { "epoch": 0.4, "learning_rate": 0.0009260501978030038, "loss": 3.3139, "step": 1387 }, { "epoch": 0.4, "learning_rate": 0.0009259273730155409, "loss": 3.4509, "step": 1388 }, { "epoch": 0.4, "learning_rate": 0.0009258044544709274, "loss": 3.4176, "step": 1389 }, { "epoch": 0.4, "learning_rate": 0.0009256814421962212, "loss": 3.4684, "step": 1390 }, { "epoch": 0.4, "learning_rate": 0.0009255583362184998, "loss": 3.4119, "step": 1391 }, { "epoch": 0.4, "learning_rate": 0.0009254351365648623, "loss": 3.4409, "step": 1392 }, { "epoch": 0.4, "learning_rate": 0.0009253118432624279, "loss": 3.4422, "step": 1393 }, { "epoch": 0.4, "learning_rate": 0.0009251884563383362, "loss": 3.4088, "step": 1394 }, { "epoch": 0.4, "learning_rate": 0.0009250649758197481, "loss": 3.4316, "step": 1395 }, { "epoch": 0.4, "learning_rate": 0.0009249414017338444, "loss": 3.5577, "step": 1396 }, { "epoch": 0.4, "learning_rate": 0.0009248177341078272, "loss": 3.3473, "step": 1397 }, { "epoch": 0.4, "learning_rate": 0.0009246939729689185, "loss": 3.3608, "step": 1398 }, { "epoch": 0.41, "learning_rate": 0.0009245701183443612, "loss": 3.4354, "step": 1399 }, { "epoch": 0.41, "learning_rate": 0.0009244461702614189, "loss": 3.4208, "step": 1400 }, { "epoch": 0.41, "learning_rate": 0.0009243221287473755, "loss": 3.3903, "step": 1401 }, { "epoch": 0.41, "learning_rate": 0.0009241979938295358, "loss": 3.5084, "step": 1402 }, { "epoch": 0.41, "learning_rate": 0.0009240737655352247, "loss": 3.4489, "step": 1403 }, { "epoch": 0.41, "learning_rate": 0.000923949443891788, "loss": 3.4788, "step": 1404 }, { "epoch": 0.41, "learning_rate": 0.0009238250289265921, "loss": 3.4446, "step": 1405 }, { "epoch": 0.41, "learning_rate": 0.0009237005206670238, "loss": 3.2476, "step": 1406 }, { "epoch": 0.41, "learning_rate": 0.0009235759191404904, "loss": 3.4619, "step": 1407 }, { "epoch": 0.41, "learning_rate": 0.0009234512243744197, "loss": 3.3443, "step": 1408 }, { "epoch": 0.41, "learning_rate": 0.0009233264363962601, "loss": 3.458, "step": 1409 }, { "epoch": 0.41, "learning_rate": 0.0009232015552334807, "loss": 3.4374, "step": 1410 }, { "epoch": 0.41, "learning_rate": 0.0009230765809135708, "loss": 3.4712, "step": 1411 }, { "epoch": 0.41, "learning_rate": 0.0009229515134640405, "loss": 3.4655, "step": 1412 }, { "epoch": 0.41, "learning_rate": 0.0009228263529124198, "loss": 3.3235, "step": 1413 }, { "epoch": 0.41, "learning_rate": 0.0009227010992862603, "loss": 3.4861, "step": 1414 }, { "epoch": 0.41, "learning_rate": 0.0009225757526131328, "loss": 3.4776, "step": 1415 }, { "epoch": 0.41, "learning_rate": 0.0009224503129206294, "loss": 3.4416, "step": 1416 }, { "epoch": 0.41, "learning_rate": 0.0009223247802363628, "loss": 3.414, "step": 1417 }, { "epoch": 0.41, "learning_rate": 0.0009221991545879653, "loss": 3.3897, "step": 1418 }, { "epoch": 0.41, "learning_rate": 0.0009220734360030907, "loss": 3.4098, "step": 1419 }, { "epoch": 0.41, "learning_rate": 0.0009219476245094125, "loss": 3.3968, "step": 1420 }, { "epoch": 0.41, "learning_rate": 0.0009218217201346251, "loss": 3.4156, "step": 1421 }, { "epoch": 0.41, "learning_rate": 0.0009216957229064429, "loss": 3.4565, "step": 1422 }, { "epoch": 0.41, "learning_rate": 0.0009215696328526012, "loss": 3.3153, "step": 1423 }, { "epoch": 0.41, "learning_rate": 0.0009214434500008555, "loss": 3.4571, "step": 1424 }, { "epoch": 0.41, "learning_rate": 0.0009213171743789819, "loss": 3.3284, "step": 1425 }, { "epoch": 0.41, "learning_rate": 0.0009211908060147764, "loss": 3.4729, "step": 1426 }, { "epoch": 0.41, "learning_rate": 0.0009210643449360563, "loss": 3.4779, "step": 1427 }, { "epoch": 0.41, "learning_rate": 0.0009209377911706584, "loss": 3.3601, "step": 1428 }, { "epoch": 0.41, "learning_rate": 0.0009208111447464406, "loss": 3.5096, "step": 1429 }, { "epoch": 0.41, "learning_rate": 0.0009206844056912809, "loss": 3.4168, "step": 1430 }, { "epoch": 0.41, "learning_rate": 0.0009205575740330774, "loss": 3.5034, "step": 1431 }, { "epoch": 0.41, "learning_rate": 0.0009204306497997492, "loss": 3.4343, "step": 1432 }, { "epoch": 0.42, "learning_rate": 0.0009203036330192354, "loss": 3.3579, "step": 1433 }, { "epoch": 0.42, "learning_rate": 0.0009201765237194954, "loss": 3.3178, "step": 1434 }, { "epoch": 0.42, "learning_rate": 0.0009200493219285091, "loss": 3.5167, "step": 1435 }, { "epoch": 0.42, "learning_rate": 0.000919922027674277, "loss": 3.3586, "step": 1436 }, { "epoch": 0.42, "learning_rate": 0.0009197946409848195, "loss": 3.3971, "step": 1437 }, { "epoch": 0.42, "learning_rate": 0.0009196671618881775, "loss": 3.4838, "step": 1438 }, { "epoch": 0.42, "learning_rate": 0.0009195395904124127, "loss": 3.3883, "step": 1439 }, { "epoch": 0.42, "learning_rate": 0.0009194119265856063, "loss": 3.3505, "step": 1440 }, { "epoch": 0.42, "learning_rate": 0.0009192841704358603, "loss": 3.4158, "step": 1441 }, { "epoch": 0.42, "learning_rate": 0.0009191563219912973, "loss": 3.3792, "step": 1442 }, { "epoch": 0.42, "learning_rate": 0.0009190283812800596, "loss": 3.3501, "step": 1443 }, { "epoch": 0.42, "learning_rate": 0.0009189003483303103, "loss": 3.4569, "step": 1444 }, { "epoch": 0.42, "learning_rate": 0.0009187722231702326, "loss": 3.3888, "step": 1445 }, { "epoch": 0.42, "learning_rate": 0.0009186440058280298, "loss": 3.3838, "step": 1446 }, { "epoch": 0.42, "learning_rate": 0.0009185156963319261, "loss": 3.4199, "step": 1447 }, { "epoch": 0.42, "learning_rate": 0.0009183872947101652, "loss": 3.4749, "step": 1448 }, { "epoch": 0.42, "learning_rate": 0.0009182588009910118, "loss": 3.3924, "step": 1449 }, { "epoch": 0.42, "learning_rate": 0.0009181302152027503, "loss": 3.4622, "step": 1450 }, { "epoch": 0.42, "learning_rate": 0.0009180015373736856, "loss": 3.3308, "step": 1451 }, { "epoch": 0.42, "learning_rate": 0.0009178727675321432, "loss": 3.4488, "step": 1452 }, { "epoch": 0.42, "learning_rate": 0.0009177439057064682, "loss": 3.3675, "step": 1453 }, { "epoch": 0.42, "learning_rate": 0.0009176149519250264, "loss": 3.437, "step": 1454 }, { "epoch": 0.42, "learning_rate": 0.0009174859062162038, "loss": 3.4502, "step": 1455 }, { "epoch": 0.42, "learning_rate": 0.0009173567686084063, "loss": 3.4229, "step": 1456 }, { "epoch": 0.42, "learning_rate": 0.0009172275391300604, "loss": 3.5385, "step": 1457 }, { "epoch": 0.42, "learning_rate": 0.0009170982178096128, "loss": 3.4238, "step": 1458 }, { "epoch": 0.42, "learning_rate": 0.0009169688046755302, "loss": 3.3664, "step": 1459 }, { "epoch": 0.42, "learning_rate": 0.0009168392997562999, "loss": 3.5207, "step": 1460 }, { "epoch": 0.42, "learning_rate": 0.0009167097030804288, "loss": 3.3588, "step": 1461 }, { "epoch": 0.42, "learning_rate": 0.0009165800146764445, "loss": 3.5378, "step": 1462 }, { "epoch": 0.42, "learning_rate": 0.0009164502345728945, "loss": 3.3924, "step": 1463 }, { "epoch": 0.42, "learning_rate": 0.0009163203627983468, "loss": 3.472, "step": 1464 }, { "epoch": 0.42, "learning_rate": 0.0009161903993813892, "loss": 3.4022, "step": 1465 }, { "epoch": 0.42, "learning_rate": 0.0009160603443506299, "loss": 3.404, "step": 1466 }, { "epoch": 0.42, "learning_rate": 0.0009159301977346975, "loss": 3.314, "step": 1467 }, { "epoch": 0.43, "learning_rate": 0.0009157999595622399, "loss": 3.2916, "step": 1468 }, { "epoch": 0.43, "learning_rate": 0.0009156696298619264, "loss": 3.3927, "step": 1469 }, { "epoch": 0.43, "learning_rate": 0.0009155392086624454, "loss": 3.3874, "step": 1470 }, { "epoch": 0.43, "learning_rate": 0.0009154086959925061, "loss": 3.5102, "step": 1471 }, { "epoch": 0.43, "learning_rate": 0.0009152780918808372, "loss": 3.4116, "step": 1472 }, { "epoch": 0.43, "learning_rate": 0.0009151473963561883, "loss": 3.5719, "step": 1473 }, { "epoch": 0.43, "learning_rate": 0.0009150166094473283, "loss": 3.4166, "step": 1474 }, { "epoch": 0.43, "learning_rate": 0.000914885731183047, "loss": 3.4619, "step": 1475 }, { "epoch": 0.43, "learning_rate": 0.0009147547615921536, "loss": 3.4612, "step": 1476 }, { "epoch": 0.43, "learning_rate": 0.0009146237007034781, "loss": 3.5615, "step": 1477 }, { "epoch": 0.43, "learning_rate": 0.0009144925485458701, "loss": 3.4768, "step": 1478 }, { "epoch": 0.43, "learning_rate": 0.0009143613051481994, "loss": 3.3689, "step": 1479 }, { "epoch": 0.43, "learning_rate": 0.0009142299705393559, "loss": 3.4848, "step": 1480 }, { "epoch": 0.43, "learning_rate": 0.0009140985447482497, "loss": 3.479, "step": 1481 }, { "epoch": 0.43, "learning_rate": 0.0009139670278038108, "loss": 3.27, "step": 1482 }, { "epoch": 0.43, "learning_rate": 0.0009138354197349892, "loss": 3.5017, "step": 1483 }, { "epoch": 0.43, "learning_rate": 0.0009137037205707553, "loss": 3.4301, "step": 1484 }, { "epoch": 0.43, "learning_rate": 0.0009135719303400993, "loss": 3.51, "step": 1485 }, { "epoch": 0.43, "learning_rate": 0.0009134400490720313, "loss": 3.493, "step": 1486 }, { "epoch": 0.43, "learning_rate": 0.000913308076795582, "loss": 3.3665, "step": 1487 }, { "epoch": 0.43, "learning_rate": 0.0009131760135398015, "loss": 3.2569, "step": 1488 }, { "epoch": 0.43, "learning_rate": 0.0009130438593337602, "loss": 3.4917, "step": 1489 }, { "epoch": 0.43, "learning_rate": 0.0009129116142065486, "loss": 3.3882, "step": 1490 }, { "epoch": 0.43, "learning_rate": 0.0009127792781872769, "loss": 3.4547, "step": 1491 }, { "epoch": 0.43, "learning_rate": 0.0009126468513050758, "loss": 3.5479, "step": 1492 }, { "epoch": 0.43, "learning_rate": 0.0009125143335890957, "loss": 3.326, "step": 1493 }, { "epoch": 0.43, "learning_rate": 0.000912381725068507, "loss": 3.3763, "step": 1494 }, { "epoch": 0.43, "learning_rate": 0.0009122490257724999, "loss": 3.4186, "step": 1495 }, { "epoch": 0.43, "learning_rate": 0.000912116235730285, "loss": 3.3406, "step": 1496 }, { "epoch": 0.43, "learning_rate": 0.0009119833549710928, "loss": 3.4783, "step": 1497 }, { "epoch": 0.43, "learning_rate": 0.0009118503835241734, "loss": 3.3811, "step": 1498 }, { "epoch": 0.43, "learning_rate": 0.0009117173214187971, "loss": 3.4911, "step": 1499 }, { "epoch": 0.43, "learning_rate": 0.0009115841686842544, "loss": 3.4621, "step": 1500 }, { "epoch": 0.43, "learning_rate": 0.0009114509253498553, "loss": 3.4161, "step": 1501 }, { "epoch": 0.43, "learning_rate": 0.0009113175914449301, "loss": 3.4388, "step": 1502 }, { "epoch": 0.44, "learning_rate": 0.0009111841669988288, "loss": 3.4246, "step": 1503 }, { "epoch": 0.44, "learning_rate": 0.0009110506520409213, "loss": 3.3159, "step": 1504 }, { "epoch": 0.44, "learning_rate": 0.0009109170466005979, "loss": 3.5645, "step": 1505 }, { "epoch": 0.44, "learning_rate": 0.0009107833507072679, "loss": 3.3976, "step": 1506 }, { "epoch": 0.44, "learning_rate": 0.0009106495643903616, "loss": 3.3902, "step": 1507 }, { "epoch": 0.44, "learning_rate": 0.0009105156876793286, "loss": 3.2418, "step": 1508 }, { "epoch": 0.44, "learning_rate": 0.0009103817206036382, "loss": 3.4133, "step": 1509 }, { "epoch": 0.44, "learning_rate": 0.0009102476631927801, "loss": 3.3553, "step": 1510 }, { "epoch": 0.44, "learning_rate": 0.0009101135154762635, "loss": 3.4636, "step": 1511 }, { "epoch": 0.44, "learning_rate": 0.0009099792774836178, "loss": 3.3586, "step": 1512 }, { "epoch": 0.44, "learning_rate": 0.0009098449492443921, "loss": 3.3726, "step": 1513 }, { "epoch": 0.44, "learning_rate": 0.0009097105307881552, "loss": 3.3504, "step": 1514 }, { "epoch": 0.44, "learning_rate": 0.0009095760221444959, "loss": 3.3367, "step": 1515 }, { "epoch": 0.44, "learning_rate": 0.000909441423343023, "loss": 3.4277, "step": 1516 }, { "epoch": 0.44, "learning_rate": 0.0009093067344133652, "loss": 3.4782, "step": 1517 }, { "epoch": 0.44, "learning_rate": 0.0009091719553851706, "loss": 3.3788, "step": 1518 }, { "epoch": 0.44, "learning_rate": 0.0009090370862881074, "loss": 3.4783, "step": 1519 }, { "epoch": 0.44, "learning_rate": 0.0009089021271518639, "loss": 3.2593, "step": 1520 }, { "epoch": 0.44, "learning_rate": 0.0009087670780061475, "loss": 3.4685, "step": 1521 }, { "epoch": 0.44, "learning_rate": 0.0009086319388806863, "loss": 3.448, "step": 1522 }, { "epoch": 0.44, "learning_rate": 0.0009084967098052275, "loss": 3.3183, "step": 1523 }, { "epoch": 0.44, "learning_rate": 0.0009083613908095385, "loss": 3.4089, "step": 1524 }, { "epoch": 0.44, "learning_rate": 0.0009082259819234062, "loss": 3.3306, "step": 1525 }, { "epoch": 0.44, "learning_rate": 0.0009080904831766375, "loss": 3.485, "step": 1526 }, { "epoch": 0.44, "learning_rate": 0.0009079548945990592, "loss": 3.442, "step": 1527 }, { "epoch": 0.44, "learning_rate": 0.0009078192162205174, "loss": 3.4291, "step": 1528 }, { "epoch": 0.44, "learning_rate": 0.0009076834480708786, "loss": 3.4991, "step": 1529 }, { "epoch": 0.44, "learning_rate": 0.0009075475901800285, "loss": 3.3433, "step": 1530 }, { "epoch": 0.44, "learning_rate": 0.0009074116425778729, "loss": 3.4155, "step": 1531 }, { "epoch": 0.44, "learning_rate": 0.000907275605294337, "loss": 3.4234, "step": 1532 }, { "epoch": 0.44, "learning_rate": 0.0009071394783593663, "loss": 3.4906, "step": 1533 }, { "epoch": 0.44, "learning_rate": 0.0009070032618029255, "loss": 3.4373, "step": 1534 }, { "epoch": 0.44, "learning_rate": 0.0009068669556549993, "loss": 3.3876, "step": 1535 }, { "epoch": 0.44, "learning_rate": 0.000906730559945592, "loss": 3.3738, "step": 1536 }, { "epoch": 0.45, "learning_rate": 0.0009065940747047277, "loss": 3.4782, "step": 1537 }, { "epoch": 0.45, "learning_rate": 0.0009064574999624502, "loss": 3.4552, "step": 1538 }, { "epoch": 0.45, "learning_rate": 0.000906320835748823, "loss": 3.5532, "step": 1539 }, { "epoch": 0.45, "learning_rate": 0.0009061840820939291, "loss": 3.4128, "step": 1540 }, { "epoch": 0.45, "learning_rate": 0.0009060472390278717, "loss": 3.4418, "step": 1541 }, { "epoch": 0.45, "learning_rate": 0.000905910306580773, "loss": 3.4463, "step": 1542 }, { "epoch": 0.45, "learning_rate": 0.0009057732847827754, "loss": 3.5065, "step": 1543 }, { "epoch": 0.45, "learning_rate": 0.0009056361736640407, "loss": 3.3871, "step": 1544 }, { "epoch": 0.45, "learning_rate": 0.0009054989732547506, "loss": 3.3726, "step": 1545 }, { "epoch": 0.45, "learning_rate": 0.0009053616835851062, "loss": 3.3427, "step": 1546 }, { "epoch": 0.45, "learning_rate": 0.0009052243046853283, "loss": 3.3802, "step": 1547 }, { "epoch": 0.45, "learning_rate": 0.0009050868365856575, "loss": 3.4166, "step": 1548 }, { "epoch": 0.45, "learning_rate": 0.0009049492793163538, "loss": 3.3535, "step": 1549 }, { "epoch": 0.45, "learning_rate": 0.000904811632907697, "loss": 3.3599, "step": 1550 }, { "epoch": 0.45, "learning_rate": 0.0009046738973899866, "loss": 3.5453, "step": 1551 }, { "epoch": 0.45, "learning_rate": 0.0009045360727935414, "loss": 3.3058, "step": 1552 }, { "epoch": 0.45, "learning_rate": 0.0009043981591487002, "loss": 3.3802, "step": 1553 }, { "epoch": 0.45, "learning_rate": 0.0009042601564858213, "loss": 3.5476, "step": 1554 }, { "epoch": 0.45, "learning_rate": 0.0009041220648352819, "loss": 3.4963, "step": 1555 }, { "epoch": 0.45, "learning_rate": 0.0009039838842274801, "loss": 3.3665, "step": 1556 }, { "epoch": 0.45, "learning_rate": 0.0009038456146928325, "loss": 3.3512, "step": 1557 }, { "epoch": 0.45, "learning_rate": 0.0009037072562617757, "loss": 3.4731, "step": 1558 }, { "epoch": 0.45, "learning_rate": 0.0009035688089647659, "loss": 3.386, "step": 1559 }, { "epoch": 0.45, "learning_rate": 0.0009034302728322787, "loss": 3.2112, "step": 1560 }, { "epoch": 0.45, "learning_rate": 0.0009032916478948092, "loss": 3.4905, "step": 1561 }, { "epoch": 0.45, "learning_rate": 0.0009031529341828724, "loss": 3.3176, "step": 1562 }, { "epoch": 0.45, "learning_rate": 0.0009030141317270026, "loss": 3.3519, "step": 1563 }, { "epoch": 0.45, "learning_rate": 0.0009028752405577535, "loss": 3.4219, "step": 1564 }, { "epoch": 0.45, "learning_rate": 0.0009027362607056986, "loss": 3.3804, "step": 1565 }, { "epoch": 0.45, "learning_rate": 0.0009025971922014307, "loss": 3.3938, "step": 1566 }, { "epoch": 0.45, "learning_rate": 0.0009024580350755623, "loss": 3.3151, "step": 1567 }, { "epoch": 0.45, "learning_rate": 0.0009023187893587255, "loss": 3.5547, "step": 1568 }, { "epoch": 0.45, "learning_rate": 0.0009021794550815713, "loss": 3.4192, "step": 1569 }, { "epoch": 0.45, "learning_rate": 0.000902040032274771, "loss": 3.392, "step": 1570 }, { "epoch": 0.45, "learning_rate": 0.0009019005209690147, "loss": 3.4099, "step": 1571 }, { "epoch": 0.46, "learning_rate": 0.0009017609211950127, "loss": 3.2996, "step": 1572 }, { "epoch": 0.46, "learning_rate": 0.000901621232983494, "loss": 3.4289, "step": 1573 }, { "epoch": 0.46, "learning_rate": 0.0009014814563652077, "loss": 3.3132, "step": 1574 }, { "epoch": 0.46, "learning_rate": 0.0009013415913709217, "loss": 3.3513, "step": 1575 }, { "epoch": 0.46, "learning_rate": 0.000901201638031424, "loss": 3.5028, "step": 1576 }, { "epoch": 0.46, "learning_rate": 0.0009010615963775219, "loss": 3.3635, "step": 1577 }, { "epoch": 0.46, "learning_rate": 0.0009009214664400419, "loss": 3.3513, "step": 1578 }, { "epoch": 0.46, "learning_rate": 0.0009007812482498301, "loss": 3.4214, "step": 1579 }, { "epoch": 0.46, "learning_rate": 0.000900640941837752, "loss": 3.3444, "step": 1580 }, { "epoch": 0.46, "learning_rate": 0.0009005005472346923, "loss": 3.3484, "step": 1581 }, { "epoch": 0.46, "learning_rate": 0.0009003600644715557, "loss": 3.5227, "step": 1582 }, { "epoch": 0.46, "learning_rate": 0.0009002194935792656, "loss": 3.3583, "step": 1583 }, { "epoch": 0.46, "learning_rate": 0.0009000788345887654, "loss": 3.2395, "step": 1584 }, { "epoch": 0.46, "learning_rate": 0.0008999380875310175, "loss": 3.434, "step": 1585 }, { "epoch": 0.46, "learning_rate": 0.0008997972524370037, "loss": 3.3589, "step": 1586 }, { "epoch": 0.46, "learning_rate": 0.0008996563293377254, "loss": 3.3679, "step": 1587 }, { "epoch": 0.46, "learning_rate": 0.0008995153182642032, "loss": 3.3344, "step": 1588 }, { "epoch": 0.46, "learning_rate": 0.0008993742192474771, "loss": 3.3964, "step": 1589 }, { "epoch": 0.46, "learning_rate": 0.0008992330323186068, "loss": 3.4193, "step": 1590 }, { "epoch": 0.46, "learning_rate": 0.0008990917575086708, "loss": 3.4544, "step": 1591 }, { "epoch": 0.46, "learning_rate": 0.000898950394848767, "loss": 3.4655, "step": 1592 }, { "epoch": 0.46, "learning_rate": 0.0008988089443700131, "loss": 3.4745, "step": 1593 }, { "epoch": 0.46, "learning_rate": 0.0008986674061035456, "loss": 3.4333, "step": 1594 }, { "epoch": 0.46, "learning_rate": 0.0008985257800805209, "loss": 3.3412, "step": 1595 }, { "epoch": 0.46, "learning_rate": 0.0008983840663321141, "loss": 3.3424, "step": 1596 }, { "epoch": 0.46, "learning_rate": 0.00089824226488952, "loss": 3.32, "step": 1597 }, { "epoch": 0.46, "learning_rate": 0.0008981003757839527, "loss": 3.4338, "step": 1598 }, { "epoch": 0.46, "learning_rate": 0.0008979583990466454, "loss": 3.2909, "step": 1599 }, { "epoch": 0.46, "learning_rate": 0.0008978163347088507, "loss": 3.3318, "step": 1600 }, { "epoch": 0.46, "learning_rate": 0.0008976741828018402, "loss": 3.3918, "step": 1601 }, { "epoch": 0.46, "learning_rate": 0.0008975319433569055, "loss": 3.3622, "step": 1602 }, { "epoch": 0.46, "learning_rate": 0.0008973896164053569, "loss": 3.4138, "step": 1603 }, { "epoch": 0.46, "learning_rate": 0.0008972472019785243, "loss": 3.3027, "step": 1604 }, { "epoch": 0.46, "learning_rate": 0.000897104700107756, "loss": 3.4346, "step": 1605 }, { "epoch": 0.47, "learning_rate": 0.0008969621108244208, "loss": 3.4899, "step": 1606 }, { "epoch": 0.47, "learning_rate": 0.0008968194341599056, "loss": 3.3582, "step": 1607 }, { "epoch": 0.47, "learning_rate": 0.0008966766701456176, "loss": 3.2857, "step": 1608 }, { "epoch": 0.47, "learning_rate": 0.0008965338188129824, "loss": 3.4243, "step": 1609 }, { "epoch": 0.47, "learning_rate": 0.0008963908801934451, "loss": 3.3265, "step": 1610 }, { "epoch": 0.47, "learning_rate": 0.00089624785431847, "loss": 3.3795, "step": 1611 }, { "epoch": 0.47, "learning_rate": 0.0008961047412195409, "loss": 3.3041, "step": 1612 }, { "epoch": 0.47, "learning_rate": 0.0008959615409281603, "loss": 3.3915, "step": 1613 }, { "epoch": 0.47, "learning_rate": 0.0008958182534758501, "loss": 3.4096, "step": 1614 }, { "epoch": 0.47, "learning_rate": 0.0008956748788941514, "loss": 3.441, "step": 1615 }, { "epoch": 0.47, "learning_rate": 0.0008955314172146247, "loss": 3.4681, "step": 1616 }, { "epoch": 0.47, "learning_rate": 0.0008953878684688492, "loss": 3.4491, "step": 1617 }, { "epoch": 0.47, "learning_rate": 0.0008952442326884239, "loss": 3.3392, "step": 1618 }, { "epoch": 0.47, "learning_rate": 0.0008951005099049663, "loss": 3.4295, "step": 1619 }, { "epoch": 0.47, "learning_rate": 0.0008949567001501134, "loss": 3.3247, "step": 1620 }, { "epoch": 0.47, "learning_rate": 0.0008948128034555211, "loss": 3.3867, "step": 1621 }, { "epoch": 0.47, "learning_rate": 0.000894668819852865, "loss": 3.3997, "step": 1622 }, { "epoch": 0.47, "learning_rate": 0.0008945247493738391, "loss": 3.3472, "step": 1623 }, { "epoch": 0.47, "learning_rate": 0.0008943805920501571, "loss": 3.3823, "step": 1624 }, { "epoch": 0.47, "learning_rate": 0.0008942363479135516, "loss": 3.453, "step": 1625 }, { "epoch": 0.47, "learning_rate": 0.000894092016995774, "loss": 3.4222, "step": 1626 }, { "epoch": 0.47, "learning_rate": 0.0008939475993285956, "loss": 3.2948, "step": 1627 }, { "epoch": 0.47, "learning_rate": 0.0008938030949438059, "loss": 3.3695, "step": 1628 }, { "epoch": 0.47, "learning_rate": 0.0008936585038732142, "loss": 3.2836, "step": 1629 }, { "epoch": 0.47, "learning_rate": 0.0008935138261486482, "loss": 3.4491, "step": 1630 }, { "epoch": 0.47, "learning_rate": 0.0008933690618019552, "loss": 3.3722, "step": 1631 }, { "epoch": 0.47, "learning_rate": 0.0008932242108650015, "loss": 3.4862, "step": 1632 }, { "epoch": 0.47, "learning_rate": 0.0008930792733696725, "loss": 3.3029, "step": 1633 }, { "epoch": 0.47, "learning_rate": 0.0008929342493478721, "loss": 3.3983, "step": 1634 }, { "epoch": 0.47, "learning_rate": 0.0008927891388315241, "loss": 3.296, "step": 1635 }, { "epoch": 0.47, "learning_rate": 0.0008926439418525707, "loss": 3.3845, "step": 1636 }, { "epoch": 0.47, "learning_rate": 0.0008924986584429732, "loss": 3.4169, "step": 1637 }, { "epoch": 0.47, "learning_rate": 0.0008923532886347125, "loss": 3.4701, "step": 1638 }, { "epoch": 0.47, "learning_rate": 0.0008922078324597878, "loss": 3.3467, "step": 1639 }, { "epoch": 0.47, "learning_rate": 0.0008920622899502178, "loss": 3.4706, "step": 1640 }, { "epoch": 0.48, "learning_rate": 0.0008919166611380396, "loss": 3.3655, "step": 1641 }, { "epoch": 0.48, "learning_rate": 0.0008917709460553101, "loss": 3.3584, "step": 1642 }, { "epoch": 0.48, "learning_rate": 0.0008916251447341046, "loss": 3.516, "step": 1643 }, { "epoch": 0.48, "learning_rate": 0.0008914792572065178, "loss": 3.4284, "step": 1644 }, { "epoch": 0.48, "learning_rate": 0.0008913332835046628, "loss": 3.3717, "step": 1645 }, { "epoch": 0.48, "learning_rate": 0.0008911872236606723, "loss": 3.3326, "step": 1646 }, { "epoch": 0.48, "learning_rate": 0.0008910410777066975, "loss": 3.276, "step": 1647 }, { "epoch": 0.48, "learning_rate": 0.0008908948456749089, "loss": 3.4644, "step": 1648 }, { "epoch": 0.48, "learning_rate": 0.0008907485275974958, "loss": 3.4554, "step": 1649 }, { "epoch": 0.48, "learning_rate": 0.0008906021235066664, "loss": 3.4571, "step": 1650 }, { "epoch": 0.48, "learning_rate": 0.0008904556334346477, "loss": 3.3694, "step": 1651 }, { "epoch": 0.48, "learning_rate": 0.0008903090574136858, "loss": 3.3984, "step": 1652 }, { "epoch": 0.48, "learning_rate": 0.0008901623954760459, "loss": 3.3359, "step": 1653 }, { "epoch": 0.48, "learning_rate": 0.0008900156476540119, "loss": 3.2889, "step": 1654 }, { "epoch": 0.48, "learning_rate": 0.0008898688139798865, "loss": 3.3656, "step": 1655 }, { "epoch": 0.48, "learning_rate": 0.0008897218944859915, "loss": 3.2288, "step": 1656 }, { "epoch": 0.48, "learning_rate": 0.0008895748892046674, "loss": 3.3871, "step": 1657 }, { "epoch": 0.48, "learning_rate": 0.0008894277981682739, "loss": 3.3536, "step": 1658 }, { "epoch": 0.48, "learning_rate": 0.0008892806214091893, "loss": 3.5329, "step": 1659 }, { "epoch": 0.48, "learning_rate": 0.0008891333589598107, "loss": 3.4646, "step": 1660 }, { "epoch": 0.48, "learning_rate": 0.0008889860108525543, "loss": 3.3202, "step": 1661 }, { "epoch": 0.48, "learning_rate": 0.0008888385771198552, "loss": 3.4292, "step": 1662 }, { "epoch": 0.48, "learning_rate": 0.0008886910577941672, "loss": 3.3812, "step": 1663 }, { "epoch": 0.48, "learning_rate": 0.0008885434529079625, "loss": 3.3298, "step": 1664 }, { "epoch": 0.48, "learning_rate": 0.0008883957624937332, "loss": 3.3088, "step": 1665 }, { "epoch": 0.48, "learning_rate": 0.0008882479865839893, "loss": 3.4496, "step": 1666 }, { "epoch": 0.48, "learning_rate": 0.0008881001252112599, "loss": 3.2832, "step": 1667 }, { "epoch": 0.48, "learning_rate": 0.0008879521784080932, "loss": 3.4611, "step": 1668 }, { "epoch": 0.48, "learning_rate": 0.0008878041462070556, "loss": 3.3892, "step": 1669 }, { "epoch": 0.48, "learning_rate": 0.0008876560286407329, "loss": 3.3901, "step": 1670 }, { "epoch": 0.48, "learning_rate": 0.0008875078257417294, "loss": 3.4996, "step": 1671 }, { "epoch": 0.48, "learning_rate": 0.0008873595375426681, "loss": 3.4846, "step": 1672 }, { "epoch": 0.48, "learning_rate": 0.0008872111640761911, "loss": 3.3232, "step": 1673 }, { "epoch": 0.48, "learning_rate": 0.000887062705374959, "loss": 3.4312, "step": 1674 }, { "epoch": 0.49, "learning_rate": 0.0008869141614716511, "loss": 3.3542, "step": 1675 }, { "epoch": 0.49, "learning_rate": 0.0008867655323989656, "loss": 3.3957, "step": 1676 }, { "epoch": 0.49, "learning_rate": 0.0008866168181896197, "loss": 3.4165, "step": 1677 }, { "epoch": 0.49, "learning_rate": 0.0008864680188763488, "loss": 3.4353, "step": 1678 }, { "epoch": 0.49, "learning_rate": 0.0008863191344919074, "loss": 3.4652, "step": 1679 }, { "epoch": 0.49, "learning_rate": 0.0008861701650690686, "loss": 3.424, "step": 1680 }, { "epoch": 0.49, "learning_rate": 0.0008860211106406244, "loss": 3.3843, "step": 1681 }, { "epoch": 0.49, "learning_rate": 0.0008858719712393851, "loss": 3.3123, "step": 1682 }, { "epoch": 0.49, "learning_rate": 0.0008857227468981804, "loss": 3.4461, "step": 1683 }, { "epoch": 0.49, "learning_rate": 0.0008855734376498576, "loss": 3.4767, "step": 1684 }, { "epoch": 0.49, "learning_rate": 0.0008854240435272841, "loss": 3.3446, "step": 1685 }, { "epoch": 0.49, "learning_rate": 0.0008852745645633448, "loss": 3.4318, "step": 1686 }, { "epoch": 0.49, "learning_rate": 0.0008851250007909439, "loss": 3.2335, "step": 1687 }, { "epoch": 0.49, "learning_rate": 0.0008849753522430037, "loss": 3.3963, "step": 1688 }, { "epoch": 0.49, "learning_rate": 0.0008848256189524662, "loss": 3.4311, "step": 1689 }, { "epoch": 0.49, "learning_rate": 0.0008846758009522908, "loss": 3.4011, "step": 1690 }, { "epoch": 0.49, "learning_rate": 0.0008845258982754566, "loss": 3.4417, "step": 1691 }, { "epoch": 0.49, "learning_rate": 0.0008843759109549606, "loss": 3.4263, "step": 1692 }, { "epoch": 0.49, "learning_rate": 0.0008842258390238188, "loss": 3.3227, "step": 1693 }, { "epoch": 0.49, "learning_rate": 0.0008840756825150657, "loss": 3.4103, "step": 1694 }, { "epoch": 0.49, "learning_rate": 0.0008839254414617546, "loss": 3.331, "step": 1695 }, { "epoch": 0.49, "learning_rate": 0.0008837751158969571, "loss": 3.417, "step": 1696 }, { "epoch": 0.49, "learning_rate": 0.0008836247058537638, "loss": 3.3434, "step": 1697 }, { "epoch": 0.49, "learning_rate": 0.0008834742113652834, "loss": 3.3571, "step": 1698 }, { "epoch": 0.49, "learning_rate": 0.0008833236324646437, "loss": 3.3045, "step": 1699 }, { "epoch": 0.49, "learning_rate": 0.0008831729691849906, "loss": 3.4677, "step": 1700 }, { "epoch": 0.49, "learning_rate": 0.000883022221559489, "loss": 3.288, "step": 1701 }, { "epoch": 0.49, "learning_rate": 0.0008828713896213222, "loss": 3.4299, "step": 1702 }, { "epoch": 0.49, "learning_rate": 0.0008827204734036919, "loss": 3.3239, "step": 1703 }, { "epoch": 0.49, "learning_rate": 0.0008825694729398186, "loss": 3.5415, "step": 1704 }, { "epoch": 0.49, "learning_rate": 0.000882418388262941, "loss": 3.4223, "step": 1705 }, { "epoch": 0.49, "learning_rate": 0.0008822672194063171, "loss": 3.378, "step": 1706 }, { "epoch": 0.49, "learning_rate": 0.0008821159664032224, "loss": 3.4145, "step": 1707 }, { "epoch": 0.49, "learning_rate": 0.0008819646292869515, "loss": 3.3313, "step": 1708 }, { "epoch": 0.49, "learning_rate": 0.0008818132080908176, "loss": 3.4405, "step": 1709 }, { "epoch": 0.5, "learning_rate": 0.0008816617028481523, "loss": 3.3754, "step": 1710 }, { "epoch": 0.5, "learning_rate": 0.0008815101135923055, "loss": 3.4556, "step": 1711 }, { "epoch": 0.5, "learning_rate": 0.000881358440356646, "loss": 3.376, "step": 1712 }, { "epoch": 0.5, "learning_rate": 0.0008812066831745602, "loss": 3.5598, "step": 1713 }, { "epoch": 0.5, "learning_rate": 0.0008810548420794543, "loss": 3.3608, "step": 1714 }, { "epoch": 0.5, "learning_rate": 0.000880902917104752, "loss": 3.3658, "step": 1715 }, { "epoch": 0.5, "learning_rate": 0.0008807509082838957, "loss": 3.3063, "step": 1716 }, { "epoch": 0.5, "learning_rate": 0.0008805988156503464, "loss": 3.3708, "step": 1717 }, { "epoch": 0.5, "learning_rate": 0.0008804466392375833, "loss": 3.3033, "step": 1718 }, { "epoch": 0.5, "learning_rate": 0.0008802943790791045, "loss": 3.3869, "step": 1719 }, { "epoch": 0.5, "learning_rate": 0.0008801420352084259, "loss": 3.4691, "step": 1720 }, { "epoch": 0.5, "learning_rate": 0.0008799896076590823, "loss": 3.4929, "step": 1721 }, { "epoch": 0.5, "learning_rate": 0.0008798370964646268, "loss": 3.3101, "step": 1722 }, { "epoch": 0.5, "learning_rate": 0.0008796845016586308, "loss": 3.3632, "step": 1723 }, { "epoch": 0.5, "learning_rate": 0.0008795318232746842, "loss": 3.4584, "step": 1724 }, { "epoch": 0.5, "learning_rate": 0.0008793790613463954, "loss": 3.519, "step": 1725 }, { "epoch": 0.5, "learning_rate": 0.0008792262159073911, "loss": 3.4006, "step": 1726 }, { "epoch": 0.5, "learning_rate": 0.0008790732869913161, "loss": 3.3995, "step": 1727 }, { "epoch": 0.5, "learning_rate": 0.0008789202746318341, "loss": 3.3605, "step": 1728 }, { "epoch": 0.5, "learning_rate": 0.0008787671788626268, "loss": 3.3151, "step": 1729 }, { "epoch": 0.5, "learning_rate": 0.0008786139997173944, "loss": 3.3407, "step": 1730 }, { "epoch": 0.5, "learning_rate": 0.0008784607372298552, "loss": 3.4603, "step": 1731 }, { "epoch": 0.5, "learning_rate": 0.0008783073914337465, "loss": 3.3278, "step": 1732 }, { "epoch": 0.5, "learning_rate": 0.0008781539623628232, "loss": 3.3586, "step": 1733 }, { "epoch": 0.5, "learning_rate": 0.0008780004500508588, "loss": 3.3956, "step": 1734 }, { "epoch": 0.5, "learning_rate": 0.0008778468545316454, "loss": 3.4038, "step": 1735 }, { "epoch": 0.5, "learning_rate": 0.0008776931758389928, "loss": 3.3723, "step": 1736 }, { "epoch": 0.5, "learning_rate": 0.0008775394140067299, "loss": 3.352, "step": 1737 }, { "epoch": 0.5, "learning_rate": 0.0008773855690687031, "loss": 3.5563, "step": 1738 }, { "epoch": 0.5, "learning_rate": 0.0008772316410587779, "loss": 3.359, "step": 1739 }, { "epoch": 0.5, "learning_rate": 0.0008770776300108373, "loss": 3.4804, "step": 1740 }, { "epoch": 0.5, "learning_rate": 0.000876923535958783, "loss": 3.3439, "step": 1741 }, { "epoch": 0.5, "learning_rate": 0.0008767693589365352, "loss": 3.4438, "step": 1742 }, { "epoch": 0.5, "learning_rate": 0.0008766150989780317, "loss": 3.3398, "step": 1743 }, { "epoch": 0.51, "learning_rate": 0.0008764607561172292, "loss": 3.3372, "step": 1744 }, { "epoch": 0.51, "learning_rate": 0.0008763063303881022, "loss": 3.4553, "step": 1745 }, { "epoch": 0.51, "learning_rate": 0.0008761518218246436, "loss": 3.4436, "step": 1746 }, { "epoch": 0.51, "learning_rate": 0.0008759972304608648, "loss": 3.4692, "step": 1747 }, { "epoch": 0.51, "learning_rate": 0.0008758425563307951, "loss": 3.2915, "step": 1748 }, { "epoch": 0.51, "learning_rate": 0.0008756877994684818, "loss": 3.4361, "step": 1749 }, { "epoch": 0.51, "learning_rate": 0.0008755329599079911, "loss": 3.2118, "step": 1750 }, { "epoch": 0.51, "learning_rate": 0.0008753780376834069, "loss": 3.6013, "step": 1751 }, { "epoch": 0.51, "learning_rate": 0.0008752230328288313, "loss": 3.4064, "step": 1752 }, { "epoch": 0.51, "learning_rate": 0.0008750679453783847, "loss": 3.3342, "step": 1753 }, { "epoch": 0.51, "learning_rate": 0.0008749127753662059, "loss": 3.2794, "step": 1754 }, { "epoch": 0.51, "learning_rate": 0.0008747575228264515, "loss": 3.39, "step": 1755 }, { "epoch": 0.51, "learning_rate": 0.0008746021877932965, "loss": 3.4412, "step": 1756 }, { "epoch": 0.51, "learning_rate": 0.0008744467703009339, "loss": 3.4493, "step": 1757 }, { "epoch": 0.51, "learning_rate": 0.0008742912703835749, "loss": 3.307, "step": 1758 }, { "epoch": 0.51, "learning_rate": 0.000874135688075449, "loss": 3.3551, "step": 1759 }, { "epoch": 0.51, "learning_rate": 0.0008739800234108036, "loss": 3.4491, "step": 1760 }, { "epoch": 0.51, "learning_rate": 0.0008738242764239046, "loss": 3.4067, "step": 1761 }, { "epoch": 0.51, "learning_rate": 0.0008736684471490352, "loss": 3.2745, "step": 1762 }, { "epoch": 0.51, "learning_rate": 0.000873512535620498, "loss": 3.3658, "step": 1763 }, { "epoch": 0.51, "learning_rate": 0.0008733565418726125, "loss": 3.4044, "step": 1764 }, { "epoch": 0.51, "learning_rate": 0.0008732004659397169, "loss": 3.3933, "step": 1765 }, { "epoch": 0.51, "learning_rate": 0.0008730443078561674, "loss": 3.3054, "step": 1766 }, { "epoch": 0.51, "learning_rate": 0.0008728880676563383, "loss": 3.4028, "step": 1767 }, { "epoch": 0.51, "learning_rate": 0.0008727317453746218, "loss": 3.4266, "step": 1768 }, { "epoch": 0.51, "learning_rate": 0.0008725753410454284, "loss": 3.4602, "step": 1769 }, { "epoch": 0.51, "learning_rate": 0.0008724188547031866, "loss": 3.2936, "step": 1770 }, { "epoch": 0.51, "learning_rate": 0.0008722622863823427, "loss": 3.3524, "step": 1771 }, { "epoch": 0.51, "learning_rate": 0.0008721056361173616, "loss": 3.3057, "step": 1772 }, { "epoch": 0.51, "learning_rate": 0.0008719489039427256, "loss": 3.3193, "step": 1773 }, { "epoch": 0.51, "learning_rate": 0.0008717920898929355, "loss": 3.3594, "step": 1774 }, { "epoch": 0.51, "learning_rate": 0.0008716351940025097, "loss": 3.3979, "step": 1775 }, { "epoch": 0.51, "learning_rate": 0.0008714782163059852, "loss": 3.3464, "step": 1776 }, { "epoch": 0.51, "learning_rate": 0.0008713211568379166, "loss": 3.3244, "step": 1777 }, { "epoch": 0.51, "learning_rate": 0.0008711640156328764, "loss": 3.2849, "step": 1778 }, { "epoch": 0.52, "learning_rate": 0.0008710067927254555, "loss": 3.3964, "step": 1779 }, { "epoch": 0.52, "learning_rate": 0.0008708494881502622, "loss": 3.3725, "step": 1780 }, { "epoch": 0.52, "learning_rate": 0.0008706921019419236, "loss": 3.4193, "step": 1781 }, { "epoch": 0.52, "learning_rate": 0.0008705346341350839, "loss": 3.5048, "step": 1782 }, { "epoch": 0.52, "learning_rate": 0.0008703770847644058, "loss": 3.3834, "step": 1783 }, { "epoch": 0.52, "learning_rate": 0.0008702194538645698, "loss": 3.3296, "step": 1784 }, { "epoch": 0.52, "learning_rate": 0.0008700617414702745, "loss": 3.5018, "step": 1785 }, { "epoch": 0.52, "learning_rate": 0.000869903947616236, "loss": 3.3917, "step": 1786 }, { "epoch": 0.52, "learning_rate": 0.0008697460723371891, "loss": 3.4325, "step": 1787 }, { "epoch": 0.52, "learning_rate": 0.0008695881156678856, "loss": 3.2443, "step": 1788 }, { "epoch": 0.52, "learning_rate": 0.0008694300776430958, "loss": 3.4001, "step": 1789 }, { "epoch": 0.52, "learning_rate": 0.0008692719582976078, "loss": 3.5383, "step": 1790 }, { "epoch": 0.52, "learning_rate": 0.0008691137576662275, "loss": 3.4524, "step": 1791 }, { "epoch": 0.52, "learning_rate": 0.0008689554757837792, "loss": 3.3572, "step": 1792 }, { "epoch": 0.52, "learning_rate": 0.0008687971126851041, "loss": 3.4432, "step": 1793 }, { "epoch": 0.52, "learning_rate": 0.000868638668405062, "loss": 3.5079, "step": 1794 }, { "epoch": 0.52, "learning_rate": 0.0008684801429785305, "loss": 3.3333, "step": 1795 }, { "epoch": 0.52, "learning_rate": 0.0008683215364404051, "loss": 3.291, "step": 1796 }, { "epoch": 0.52, "learning_rate": 0.0008681628488255986, "loss": 3.4844, "step": 1797 }, { "epoch": 0.52, "learning_rate": 0.0008680040801690425, "loss": 3.4243, "step": 1798 }, { "epoch": 0.52, "learning_rate": 0.0008678452305056854, "loss": 3.236, "step": 1799 }, { "epoch": 0.52, "learning_rate": 0.0008676862998704944, "loss": 3.2824, "step": 1800 }, { "epoch": 0.52, "learning_rate": 0.0008675272882984536, "loss": 3.4751, "step": 1801 }, { "epoch": 0.52, "learning_rate": 0.0008673681958245657, "loss": 3.3383, "step": 1802 }, { "epoch": 0.52, "learning_rate": 0.0008672090224838508, "loss": 3.3328, "step": 1803 }, { "epoch": 0.52, "learning_rate": 0.000867049768311347, "loss": 3.4494, "step": 1804 }, { "epoch": 0.52, "learning_rate": 0.0008668904333421097, "loss": 3.3132, "step": 1805 }, { "epoch": 0.52, "learning_rate": 0.000866731017611213, "loss": 3.292, "step": 1806 }, { "epoch": 0.52, "learning_rate": 0.000866571521153748, "loss": 3.278, "step": 1807 }, { "epoch": 0.52, "learning_rate": 0.0008664119440048236, "loss": 3.3565, "step": 1808 }, { "epoch": 0.52, "learning_rate": 0.000866252286199567, "loss": 3.3865, "step": 1809 }, { "epoch": 0.52, "learning_rate": 0.0008660925477731226, "loss": 3.2417, "step": 1810 }, { "epoch": 0.52, "learning_rate": 0.0008659327287606531, "loss": 3.4384, "step": 1811 }, { "epoch": 0.52, "learning_rate": 0.0008657728291973383, "loss": 3.5091, "step": 1812 }, { "epoch": 0.53, "learning_rate": 0.0008656128491183762, "loss": 3.323, "step": 1813 }, { "epoch": 0.53, "learning_rate": 0.0008654527885589823, "loss": 3.395, "step": 1814 }, { "epoch": 0.53, "learning_rate": 0.0008652926475543898, "loss": 3.2598, "step": 1815 }, { "epoch": 0.53, "learning_rate": 0.00086513242613985, "loss": 3.5224, "step": 1816 }, { "epoch": 0.53, "learning_rate": 0.0008649721243506313, "loss": 3.4678, "step": 1817 }, { "epoch": 0.53, "learning_rate": 0.0008648117422220203, "loss": 3.3788, "step": 1818 }, { "epoch": 0.53, "learning_rate": 0.0008646512797893209, "loss": 3.3157, "step": 1819 }, { "epoch": 0.53, "learning_rate": 0.0008644907370878549, "loss": 3.3316, "step": 1820 }, { "epoch": 0.53, "learning_rate": 0.0008643301141529619, "loss": 3.5024, "step": 1821 }, { "epoch": 0.53, "learning_rate": 0.0008641694110199987, "loss": 3.4628, "step": 1822 }, { "epoch": 0.53, "learning_rate": 0.0008640086277243402, "loss": 3.403, "step": 1823 }, { "epoch": 0.53, "learning_rate": 0.0008638477643013787, "loss": 3.4095, "step": 1824 }, { "epoch": 0.53, "learning_rate": 0.0008636868207865244, "loss": 3.3465, "step": 1825 }, { "epoch": 0.53, "learning_rate": 0.0008635257972152046, "loss": 3.3619, "step": 1826 }, { "epoch": 0.53, "learning_rate": 0.0008633646936228649, "loss": 3.289, "step": 1827 }, { "epoch": 0.53, "learning_rate": 0.0008632035100449682, "loss": 3.1592, "step": 1828 }, { "epoch": 0.53, "learning_rate": 0.0008630422465169947, "loss": 3.439, "step": 1829 }, { "epoch": 0.53, "learning_rate": 0.0008628809030744427, "loss": 3.4329, "step": 1830 }, { "epoch": 0.53, "learning_rate": 0.0008627194797528277, "loss": 3.3465, "step": 1831 }, { "epoch": 0.53, "learning_rate": 0.0008625579765876832, "loss": 3.4276, "step": 1832 }, { "epoch": 0.53, "learning_rate": 0.00086239639361456, "loss": 3.4684, "step": 1833 }, { "epoch": 0.53, "learning_rate": 0.0008622347308690263, "loss": 3.3125, "step": 1834 }, { "epoch": 0.53, "learning_rate": 0.0008620729883866685, "loss": 3.5461, "step": 1835 }, { "epoch": 0.53, "learning_rate": 0.0008619111662030896, "loss": 3.3789, "step": 1836 }, { "epoch": 0.53, "learning_rate": 0.0008617492643539109, "loss": 3.4293, "step": 1837 }, { "epoch": 0.53, "learning_rate": 0.0008615872828747711, "loss": 3.2324, "step": 1838 }, { "epoch": 0.53, "learning_rate": 0.0008614252218013263, "loss": 3.4566, "step": 1839 }, { "epoch": 0.53, "learning_rate": 0.00086126308116925, "loss": 3.2924, "step": 1840 }, { "epoch": 0.53, "learning_rate": 0.0008611008610142335, "loss": 3.1984, "step": 1841 }, { "epoch": 0.53, "learning_rate": 0.0008609385613719854, "loss": 3.3983, "step": 1842 }, { "epoch": 0.53, "learning_rate": 0.0008607761822782317, "loss": 3.458, "step": 1843 }, { "epoch": 0.53, "learning_rate": 0.0008606137237687165, "loss": 3.295, "step": 1844 }, { "epoch": 0.53, "learning_rate": 0.0008604511858792005, "loss": 3.2751, "step": 1845 }, { "epoch": 0.53, "learning_rate": 0.0008602885686454627, "loss": 3.2126, "step": 1846 }, { "epoch": 0.53, "learning_rate": 0.0008601258721032988, "loss": 3.4474, "step": 1847 }, { "epoch": 0.54, "learning_rate": 0.0008599630962885226, "loss": 3.4366, "step": 1848 }, { "epoch": 0.54, "learning_rate": 0.0008598002412369647, "loss": 3.335, "step": 1849 }, { "epoch": 0.54, "learning_rate": 0.0008596373069844741, "loss": 3.3877, "step": 1850 }, { "epoch": 0.54, "learning_rate": 0.0008594742935669164, "loss": 3.3503, "step": 1851 }, { "epoch": 0.54, "learning_rate": 0.0008593112010201747, "loss": 3.3764, "step": 1852 }, { "epoch": 0.54, "learning_rate": 0.0008591480293801499, "loss": 3.3217, "step": 1853 }, { "epoch": 0.54, "learning_rate": 0.00085898477868276, "loss": 3.3967, "step": 1854 }, { "epoch": 0.54, "learning_rate": 0.0008588214489639407, "loss": 3.3666, "step": 1855 }, { "epoch": 0.54, "learning_rate": 0.0008586580402596447, "loss": 3.4239, "step": 1856 }, { "epoch": 0.54, "learning_rate": 0.0008584945526058425, "loss": 3.3969, "step": 1857 }, { "epoch": 0.54, "learning_rate": 0.0008583309860385216, "loss": 3.3483, "step": 1858 }, { "epoch": 0.54, "learning_rate": 0.0008581673405936872, "loss": 3.3936, "step": 1859 }, { "epoch": 0.54, "learning_rate": 0.0008580036163073615, "loss": 3.4937, "step": 1860 }, { "epoch": 0.54, "learning_rate": 0.0008578398132155845, "loss": 3.2547, "step": 1861 }, { "epoch": 0.54, "learning_rate": 0.0008576759313544131, "loss": 3.3604, "step": 1862 }, { "epoch": 0.54, "learning_rate": 0.0008575119707599219, "loss": 3.2823, "step": 1863 }, { "epoch": 0.54, "learning_rate": 0.0008573479314682026, "loss": 3.4998, "step": 1864 }, { "epoch": 0.54, "learning_rate": 0.0008571838135153644, "loss": 3.1938, "step": 1865 }, { "epoch": 0.54, "learning_rate": 0.0008570196169375337, "loss": 3.282, "step": 1866 }, { "epoch": 0.54, "learning_rate": 0.0008568553417708542, "loss": 3.3254, "step": 1867 }, { "epoch": 0.54, "learning_rate": 0.000856690988051487, "loss": 3.3979, "step": 1868 }, { "epoch": 0.54, "learning_rate": 0.0008565265558156101, "loss": 3.2725, "step": 1869 }, { "epoch": 0.54, "learning_rate": 0.0008563620450994196, "loss": 3.313, "step": 1870 }, { "epoch": 0.54, "learning_rate": 0.0008561974559391279, "loss": 3.2979, "step": 1871 }, { "epoch": 0.54, "learning_rate": 0.0008560327883709656, "loss": 3.4767, "step": 1872 }, { "epoch": 0.54, "learning_rate": 0.0008558680424311796, "loss": 3.3036, "step": 1873 }, { "epoch": 0.54, "learning_rate": 0.0008557032181560351, "loss": 3.4406, "step": 1874 }, { "epoch": 0.54, "learning_rate": 0.0008555383155818135, "loss": 3.4227, "step": 1875 }, { "epoch": 0.54, "learning_rate": 0.0008553733347448143, "loss": 3.3977, "step": 1876 }, { "epoch": 0.54, "learning_rate": 0.0008552082756813537, "loss": 3.3401, "step": 1877 }, { "epoch": 0.54, "learning_rate": 0.0008550431384277652, "loss": 3.2511, "step": 1878 }, { "epoch": 0.54, "learning_rate": 0.0008548779230204, "loss": 3.4007, "step": 1879 }, { "epoch": 0.54, "learning_rate": 0.0008547126294956255, "loss": 3.3354, "step": 1880 }, { "epoch": 0.54, "learning_rate": 0.0008545472578898274, "loss": 3.4504, "step": 1881 }, { "epoch": 0.55, "learning_rate": 0.0008543818082394077, "loss": 3.3961, "step": 1882 }, { "epoch": 0.55, "learning_rate": 0.0008542162805807862, "loss": 3.2174, "step": 1883 }, { "epoch": 0.55, "learning_rate": 0.0008540506749503996, "loss": 3.2711, "step": 1884 }, { "epoch": 0.55, "learning_rate": 0.0008538849913847017, "loss": 3.345, "step": 1885 }, { "epoch": 0.55, "learning_rate": 0.0008537192299201637, "loss": 3.2601, "step": 1886 }, { "epoch": 0.55, "learning_rate": 0.0008535533905932737, "loss": 3.4235, "step": 1887 }, { "epoch": 0.55, "learning_rate": 0.0008533874734405371, "loss": 3.4112, "step": 1888 }, { "epoch": 0.55, "learning_rate": 0.0008532214784984762, "loss": 3.3361, "step": 1889 }, { "epoch": 0.55, "learning_rate": 0.0008530554058036308, "loss": 3.3305, "step": 1890 }, { "epoch": 0.55, "learning_rate": 0.0008528892553925576, "loss": 3.4485, "step": 1891 }, { "epoch": 0.55, "learning_rate": 0.0008527230273018303, "loss": 3.3027, "step": 1892 }, { "epoch": 0.55, "learning_rate": 0.0008525567215680398, "loss": 3.2698, "step": 1893 }, { "epoch": 0.55, "learning_rate": 0.0008523903382277941, "loss": 3.4226, "step": 1894 }, { "epoch": 0.55, "learning_rate": 0.0008522238773177184, "loss": 3.3046, "step": 1895 }, { "epoch": 0.55, "learning_rate": 0.0008520573388744548, "loss": 3.3348, "step": 1896 }, { "epoch": 0.55, "learning_rate": 0.0008518907229346626, "loss": 3.3518, "step": 1897 }, { "epoch": 0.55, "learning_rate": 0.000851724029535018, "loss": 3.4021, "step": 1898 }, { "epoch": 0.55, "learning_rate": 0.0008515572587122143, "loss": 3.3515, "step": 1899 }, { "epoch": 0.55, "learning_rate": 0.000851390410502962, "loss": 3.3212, "step": 1900 }, { "epoch": 0.55, "learning_rate": 0.0008512234849439886, "loss": 3.4275, "step": 1901 }, { "epoch": 0.55, "learning_rate": 0.0008510564820720383, "loss": 3.2734, "step": 1902 }, { "epoch": 0.55, "learning_rate": 0.0008508894019238726, "loss": 3.5278, "step": 1903 }, { "epoch": 0.55, "learning_rate": 0.0008507222445362701, "loss": 3.5169, "step": 1904 }, { "epoch": 0.55, "learning_rate": 0.0008505550099460264, "loss": 3.468, "step": 1905 }, { "epoch": 0.55, "learning_rate": 0.0008503876981899536, "loss": 3.3539, "step": 1906 }, { "epoch": 0.55, "learning_rate": 0.0008502203093048815, "loss": 3.2993, "step": 1907 }, { "epoch": 0.55, "learning_rate": 0.0008500528433276562, "loss": 3.3819, "step": 1908 }, { "epoch": 0.55, "learning_rate": 0.0008498853002951414, "loss": 3.3609, "step": 1909 }, { "epoch": 0.55, "learning_rate": 0.0008497176802442175, "loss": 3.4236, "step": 1910 }, { "epoch": 0.55, "learning_rate": 0.0008495499832117815, "loss": 3.3728, "step": 1911 }, { "epoch": 0.55, "learning_rate": 0.000849382209234748, "loss": 3.2567, "step": 1912 }, { "epoch": 0.55, "learning_rate": 0.0008492143583500479, "loss": 3.4208, "step": 1913 }, { "epoch": 0.55, "learning_rate": 0.0008490464305946295, "loss": 3.2458, "step": 1914 }, { "epoch": 0.55, "learning_rate": 0.0008488784260054579, "loss": 3.4283, "step": 1915 }, { "epoch": 0.55, "learning_rate": 0.000848710344619515, "loss": 3.3848, "step": 1916 }, { "epoch": 0.56, "learning_rate": 0.0008485421864737996, "loss": 3.4151, "step": 1917 }, { "epoch": 0.56, "learning_rate": 0.0008483739516053274, "loss": 3.2856, "step": 1918 }, { "epoch": 0.56, "learning_rate": 0.0008482056400511315, "loss": 3.3593, "step": 1919 }, { "epoch": 0.56, "learning_rate": 0.000848037251848261, "loss": 3.3285, "step": 1920 }, { "epoch": 0.56, "learning_rate": 0.0008478687870337824, "loss": 3.4107, "step": 1921 }, { "epoch": 0.56, "learning_rate": 0.0008477002456447791, "loss": 3.4317, "step": 1922 }, { "epoch": 0.56, "learning_rate": 0.0008475316277183509, "loss": 3.3094, "step": 1923 }, { "epoch": 0.56, "learning_rate": 0.0008473629332916153, "loss": 3.3118, "step": 1924 }, { "epoch": 0.56, "learning_rate": 0.0008471941624017057, "loss": 3.5174, "step": 1925 }, { "epoch": 0.56, "learning_rate": 0.0008470253150857729, "loss": 3.3851, "step": 1926 }, { "epoch": 0.56, "learning_rate": 0.0008468563913809843, "loss": 3.489, "step": 1927 }, { "epoch": 0.56, "learning_rate": 0.0008466873913245243, "loss": 3.4461, "step": 1928 }, { "epoch": 0.56, "learning_rate": 0.0008465183149535939, "loss": 3.2474, "step": 1929 }, { "epoch": 0.56, "learning_rate": 0.0008463491623054109, "loss": 3.3681, "step": 1930 }, { "epoch": 0.56, "learning_rate": 0.0008461799334172104, "loss": 3.3447, "step": 1931 }, { "epoch": 0.56, "learning_rate": 0.0008460106283262432, "loss": 3.3607, "step": 1932 }, { "epoch": 0.56, "learning_rate": 0.0008458412470697781, "loss": 3.5179, "step": 1933 }, { "epoch": 0.56, "learning_rate": 0.0008456717896851, "loss": 3.3276, "step": 1934 }, { "epoch": 0.56, "learning_rate": 0.0008455022562095104, "loss": 3.3412, "step": 1935 }, { "epoch": 0.56, "learning_rate": 0.0008453326466803281, "loss": 3.4028, "step": 1936 }, { "epoch": 0.56, "learning_rate": 0.0008451629611348881, "loss": 3.4494, "step": 1937 }, { "epoch": 0.56, "learning_rate": 0.0008449931996105427, "loss": 3.3164, "step": 1938 }, { "epoch": 0.56, "learning_rate": 0.0008448233621446603, "loss": 3.3388, "step": 1939 }, { "epoch": 0.56, "learning_rate": 0.0008446534487746265, "loss": 3.3252, "step": 1940 }, { "epoch": 0.56, "learning_rate": 0.0008444834595378434, "loss": 3.2484, "step": 1941 }, { "epoch": 0.56, "learning_rate": 0.0008443133944717298, "loss": 3.5031, "step": 1942 }, { "epoch": 0.56, "learning_rate": 0.0008441432536137213, "loss": 3.4053, "step": 1943 }, { "epoch": 0.56, "learning_rate": 0.0008439730370012699, "loss": 3.3728, "step": 1944 }, { "epoch": 0.56, "learning_rate": 0.0008438027446718449, "loss": 3.339, "step": 1945 }, { "epoch": 0.56, "learning_rate": 0.0008436323766629314, "loss": 3.2442, "step": 1946 }, { "epoch": 0.56, "learning_rate": 0.0008434619330120319, "loss": 3.3336, "step": 1947 }, { "epoch": 0.56, "learning_rate": 0.0008432914137566651, "loss": 3.2944, "step": 1948 }, { "epoch": 0.56, "learning_rate": 0.0008431208189343669, "loss": 3.2119, "step": 1949 }, { "epoch": 0.56, "learning_rate": 0.0008429501485826889, "loss": 3.3018, "step": 1950 }, { "epoch": 0.57, "learning_rate": 0.0008427794027392002, "loss": 3.3234, "step": 1951 }, { "epoch": 0.57, "learning_rate": 0.0008426085814414859, "loss": 3.2862, "step": 1952 }, { "epoch": 0.57, "learning_rate": 0.0008424376847271482, "loss": 3.3701, "step": 1953 }, { "epoch": 0.57, "learning_rate": 0.0008422667126338057, "loss": 3.3421, "step": 1954 }, { "epoch": 0.57, "learning_rate": 0.0008420956651990937, "loss": 3.3834, "step": 1955 }, { "epoch": 0.57, "learning_rate": 0.0008419245424606638, "loss": 3.3293, "step": 1956 }, { "epoch": 0.57, "learning_rate": 0.0008417533444561843, "loss": 3.3661, "step": 1957 }, { "epoch": 0.57, "learning_rate": 0.0008415820712233402, "loss": 3.2568, "step": 1958 }, { "epoch": 0.57, "learning_rate": 0.0008414107227998329, "loss": 3.364, "step": 1959 }, { "epoch": 0.57, "learning_rate": 0.0008412392992233806, "loss": 3.3625, "step": 1960 }, { "epoch": 0.57, "learning_rate": 0.0008410678005317177, "loss": 3.3368, "step": 1961 }, { "epoch": 0.57, "learning_rate": 0.0008408962267625954, "loss": 3.4848, "step": 1962 }, { "epoch": 0.57, "learning_rate": 0.0008407245779537812, "loss": 3.3013, "step": 1963 }, { "epoch": 0.57, "learning_rate": 0.0008405528541430594, "loss": 3.3423, "step": 1964 }, { "epoch": 0.57, "learning_rate": 0.0008403810553682306, "loss": 3.2173, "step": 1965 }, { "epoch": 0.57, "learning_rate": 0.0008402091816671121, "loss": 3.4899, "step": 1966 }, { "epoch": 0.57, "learning_rate": 0.0008400372330775372, "loss": 3.4638, "step": 1967 }, { "epoch": 0.57, "learning_rate": 0.0008398652096373565, "loss": 3.3639, "step": 1968 }, { "epoch": 0.57, "learning_rate": 0.0008396931113844362, "loss": 3.4823, "step": 1969 }, { "epoch": 0.57, "learning_rate": 0.0008395209383566596, "loss": 3.3618, "step": 1970 }, { "epoch": 0.57, "learning_rate": 0.000839348690591926, "loss": 3.397, "step": 1971 }, { "epoch": 0.57, "learning_rate": 0.000839176368128152, "loss": 3.2737, "step": 1972 }, { "epoch": 0.57, "learning_rate": 0.0008390039710032692, "loss": 3.3572, "step": 1973 }, { "epoch": 0.57, "learning_rate": 0.0008388314992552271, "loss": 3.2798, "step": 1974 }, { "epoch": 0.57, "learning_rate": 0.0008386589529219908, "loss": 3.3979, "step": 1975 }, { "epoch": 0.57, "learning_rate": 0.0008384863320415419, "loss": 3.4746, "step": 1976 }, { "epoch": 0.57, "learning_rate": 0.0008383136366518787, "loss": 3.4024, "step": 1977 }, { "epoch": 0.57, "learning_rate": 0.0008381408667910157, "loss": 3.4492, "step": 1978 }, { "epoch": 0.57, "learning_rate": 0.0008379680224969837, "loss": 3.3682, "step": 1979 }, { "epoch": 0.57, "learning_rate": 0.0008377951038078302, "loss": 3.2377, "step": 1980 }, { "epoch": 0.57, "learning_rate": 0.0008376221107616186, "loss": 3.3355, "step": 1981 }, { "epoch": 0.57, "learning_rate": 0.0008374490433964292, "loss": 3.4091, "step": 1982 }, { "epoch": 0.57, "learning_rate": 0.0008372759017503584, "loss": 3.3204, "step": 1983 }, { "epoch": 0.57, "learning_rate": 0.0008371026858615188, "loss": 3.3658, "step": 1984 }, { "epoch": 0.57, "learning_rate": 0.0008369293957680396, "loss": 3.2496, "step": 1985 }, { "epoch": 0.58, "learning_rate": 0.0008367560315080663, "loss": 3.3147, "step": 1986 }, { "epoch": 0.58, "learning_rate": 0.0008365825931197603, "loss": 3.3209, "step": 1987 }, { "epoch": 0.58, "learning_rate": 0.0008364090806413003, "loss": 3.3813, "step": 1988 }, { "epoch": 0.58, "learning_rate": 0.0008362354941108802, "loss": 3.2619, "step": 1989 }, { "epoch": 0.58, "learning_rate": 0.0008360618335667108, "loss": 3.4472, "step": 1990 }, { "epoch": 0.58, "learning_rate": 0.0008358880990470192, "loss": 3.2791, "step": 1991 }, { "epoch": 0.58, "learning_rate": 0.0008357142905900485, "loss": 3.5078, "step": 1992 }, { "epoch": 0.58, "learning_rate": 0.0008355404082340585, "loss": 3.2592, "step": 1993 }, { "epoch": 0.58, "learning_rate": 0.0008353664520173248, "loss": 3.3373, "step": 1994 }, { "epoch": 0.58, "learning_rate": 0.0008351924219781393, "loss": 3.544, "step": 1995 }, { "epoch": 0.58, "learning_rate": 0.0008350183181548106, "loss": 3.3556, "step": 1996 }, { "epoch": 0.58, "learning_rate": 0.0008348441405856633, "loss": 3.3404, "step": 1997 }, { "epoch": 0.58, "learning_rate": 0.0008346698893090379, "loss": 3.3787, "step": 1998 }, { "epoch": 0.58, "learning_rate": 0.0008344955643632917, "loss": 3.2588, "step": 1999 }, { "epoch": 0.58, "learning_rate": 0.0008343211657867978, "loss": 3.3134, "step": 2000 }, { "epoch": 0.58, "learning_rate": 0.0008341466936179455, "loss": 3.3613, "step": 2001 }, { "epoch": 0.58, "learning_rate": 0.0008339721478951408, "loss": 3.3776, "step": 2002 }, { "epoch": 0.58, "learning_rate": 0.0008337975286568052, "loss": 3.3187, "step": 2003 }, { "epoch": 0.58, "learning_rate": 0.0008336228359413769, "loss": 3.2765, "step": 2004 }, { "epoch": 0.58, "learning_rate": 0.00083344806978731, "loss": 3.4083, "step": 2005 }, { "epoch": 0.58, "learning_rate": 0.0008332732302330748, "loss": 3.3402, "step": 2006 }, { "epoch": 0.58, "learning_rate": 0.000833098317317158, "loss": 3.4671, "step": 2007 }, { "epoch": 0.58, "learning_rate": 0.0008329233310780622, "loss": 3.1967, "step": 2008 }, { "epoch": 0.58, "learning_rate": 0.000832748271554306, "loss": 3.2754, "step": 2009 }, { "epoch": 0.58, "learning_rate": 0.0008325731387844247, "loss": 3.3834, "step": 2010 }, { "epoch": 0.58, "learning_rate": 0.0008323979328069688, "loss": 3.4381, "step": 2011 }, { "epoch": 0.58, "learning_rate": 0.0008322226536605062, "loss": 3.3755, "step": 2012 }, { "epoch": 0.58, "learning_rate": 0.0008320473013836196, "loss": 3.3647, "step": 2013 }, { "epoch": 0.58, "learning_rate": 0.0008318718760149085, "loss": 3.2551, "step": 2014 }, { "epoch": 0.58, "learning_rate": 0.0008316963775929885, "loss": 3.3266, "step": 2015 }, { "epoch": 0.58, "learning_rate": 0.0008315208061564909, "loss": 3.3726, "step": 2016 }, { "epoch": 0.58, "learning_rate": 0.0008313451617440637, "loss": 3.3969, "step": 2017 }, { "epoch": 0.58, "learning_rate": 0.0008311694443943702, "loss": 3.3384, "step": 2018 }, { "epoch": 0.58, "learning_rate": 0.0008309936541460904, "loss": 3.4492, "step": 2019 }, { "epoch": 0.59, "learning_rate": 0.0008308177910379199, "loss": 3.4375, "step": 2020 }, { "epoch": 0.59, "learning_rate": 0.0008306418551085706, "loss": 3.3988, "step": 2021 }, { "epoch": 0.59, "learning_rate": 0.0008304658463967704, "loss": 3.3608, "step": 2022 }, { "epoch": 0.59, "learning_rate": 0.0008302897649412632, "loss": 3.2904, "step": 2023 }, { "epoch": 0.59, "learning_rate": 0.0008301136107808086, "loss": 3.4792, "step": 2024 }, { "epoch": 0.59, "learning_rate": 0.0008299373839541828, "loss": 3.3866, "step": 2025 }, { "epoch": 0.59, "learning_rate": 0.0008297610845001775, "loss": 3.5166, "step": 2026 }, { "epoch": 0.59, "learning_rate": 0.0008295847124576007, "loss": 3.3389, "step": 2027 }, { "epoch": 0.59, "learning_rate": 0.0008294082678652761, "loss": 3.3418, "step": 2028 }, { "epoch": 0.59, "learning_rate": 0.0008292317507620437, "loss": 3.308, "step": 2029 }, { "epoch": 0.59, "learning_rate": 0.0008290551611867593, "loss": 3.399, "step": 2030 }, { "epoch": 0.59, "learning_rate": 0.0008288784991782945, "loss": 3.3189, "step": 2031 }, { "epoch": 0.59, "learning_rate": 0.0008287017647755368, "loss": 3.2981, "step": 2032 }, { "epoch": 0.59, "learning_rate": 0.0008285249580173901, "loss": 3.3544, "step": 2033 }, { "epoch": 0.59, "learning_rate": 0.000828348078942774, "loss": 3.28, "step": 2034 }, { "epoch": 0.59, "learning_rate": 0.0008281711275906237, "loss": 3.2638, "step": 2035 }, { "epoch": 0.59, "learning_rate": 0.0008279941039998906, "loss": 3.3358, "step": 2036 }, { "epoch": 0.59, "learning_rate": 0.0008278170082095422, "loss": 3.3871, "step": 2037 }, { "epoch": 0.59, "learning_rate": 0.0008276398402585614, "loss": 3.4146, "step": 2038 }, { "epoch": 0.59, "learning_rate": 0.0008274626001859475, "loss": 3.3006, "step": 2039 }, { "epoch": 0.59, "learning_rate": 0.0008272852880307154, "loss": 3.41, "step": 2040 }, { "epoch": 0.59, "learning_rate": 0.0008271079038318955, "loss": 3.3944, "step": 2041 }, { "epoch": 0.59, "learning_rate": 0.0008269304476285349, "loss": 3.2374, "step": 2042 }, { "epoch": 0.59, "learning_rate": 0.0008267529194596959, "loss": 3.3536, "step": 2043 }, { "epoch": 0.59, "learning_rate": 0.0008265753193644568, "loss": 3.3419, "step": 2044 }, { "epoch": 0.59, "learning_rate": 0.000826397647381912, "loss": 3.328, "step": 2045 }, { "epoch": 0.59, "learning_rate": 0.0008262199035511713, "loss": 3.4855, "step": 2046 }, { "epoch": 0.59, "learning_rate": 0.0008260420879113605, "loss": 3.412, "step": 2047 }, { "epoch": 0.59, "learning_rate": 0.0008258642005016213, "loss": 3.3747, "step": 2048 }, { "epoch": 0.59, "learning_rate": 0.0008256862413611113, "loss": 3.3767, "step": 2049 }, { "epoch": 0.59, "learning_rate": 0.0008255082105290033, "loss": 3.4244, "step": 2050 }, { "epoch": 0.59, "learning_rate": 0.0008253301080444864, "loss": 3.3287, "step": 2051 }, { "epoch": 0.59, "learning_rate": 0.0008251519339467657, "loss": 3.3744, "step": 2052 }, { "epoch": 0.59, "learning_rate": 0.0008249736882750614, "loss": 3.2847, "step": 2053 }, { "epoch": 0.59, "learning_rate": 0.0008247953710686098, "loss": 3.3765, "step": 2054 }, { "epoch": 0.6, "learning_rate": 0.000824616982366663, "loss": 3.3201, "step": 2055 }, { "epoch": 0.6, "learning_rate": 0.0008244385222084889, "loss": 3.3447, "step": 2056 }, { "epoch": 0.6, "learning_rate": 0.0008242599906333707, "loss": 3.4563, "step": 2057 }, { "epoch": 0.6, "learning_rate": 0.0008240813876806079, "loss": 3.38, "step": 2058 }, { "epoch": 0.6, "learning_rate": 0.0008239027133895153, "loss": 3.4187, "step": 2059 }, { "epoch": 0.6, "learning_rate": 0.0008237239677994234, "loss": 3.2578, "step": 2060 }, { "epoch": 0.6, "learning_rate": 0.0008235451509496788, "loss": 3.3162, "step": 2061 }, { "epoch": 0.6, "learning_rate": 0.0008233662628796435, "loss": 3.5046, "step": 2062 }, { "epoch": 0.6, "learning_rate": 0.0008231873036286947, "loss": 3.327, "step": 2063 }, { "epoch": 0.6, "learning_rate": 0.0008230082732362264, "loss": 3.3191, "step": 2064 }, { "epoch": 0.6, "learning_rate": 0.0008228291717416471, "loss": 3.3624, "step": 2065 }, { "epoch": 0.6, "learning_rate": 0.000822649999184382, "loss": 3.245, "step": 2066 }, { "epoch": 0.6, "learning_rate": 0.0008224707556038711, "loss": 3.3217, "step": 2067 }, { "epoch": 0.6, "learning_rate": 0.00082229144103957, "loss": 3.3457, "step": 2068 }, { "epoch": 0.6, "learning_rate": 0.000822112055530951, "loss": 3.3784, "step": 2069 }, { "epoch": 0.6, "learning_rate": 0.0008219325991175009, "loss": 3.293, "step": 2070 }, { "epoch": 0.6, "learning_rate": 0.0008217530718387223, "loss": 3.3497, "step": 2071 }, { "epoch": 0.6, "learning_rate": 0.000821573473734134, "loss": 3.4371, "step": 2072 }, { "epoch": 0.6, "learning_rate": 0.0008213938048432696, "loss": 3.2978, "step": 2073 }, { "epoch": 0.6, "learning_rate": 0.000821214065205679, "loss": 3.2809, "step": 2074 }, { "epoch": 0.6, "learning_rate": 0.0008210342548609273, "loss": 3.2783, "step": 2075 }, { "epoch": 0.6, "learning_rate": 0.000820854373848595, "loss": 3.2221, "step": 2076 }, { "epoch": 0.6, "learning_rate": 0.0008206744222082785, "loss": 3.3784, "step": 2077 }, { "epoch": 0.6, "learning_rate": 0.0008204943999795896, "loss": 3.4696, "step": 2078 }, { "epoch": 0.6, "learning_rate": 0.0008203143072021555, "loss": 3.3715, "step": 2079 }, { "epoch": 0.6, "learning_rate": 0.0008201341439156194, "loss": 3.3256, "step": 2080 }, { "epoch": 0.6, "learning_rate": 0.0008199539101596393, "loss": 3.386, "step": 2081 }, { "epoch": 0.6, "learning_rate": 0.0008197736059738894, "loss": 3.1622, "step": 2082 }, { "epoch": 0.6, "learning_rate": 0.000819593231398059, "loss": 3.2572, "step": 2083 }, { "epoch": 0.6, "learning_rate": 0.0008194127864718529, "loss": 3.4193, "step": 2084 }, { "epoch": 0.6, "learning_rate": 0.0008192322712349917, "loss": 3.3013, "step": 2085 }, { "epoch": 0.6, "learning_rate": 0.0008190516857272113, "loss": 3.3038, "step": 2086 }, { "epoch": 0.6, "learning_rate": 0.0008188710299882628, "loss": 3.3601, "step": 2087 }, { "epoch": 0.6, "learning_rate": 0.0008186903040579131, "loss": 3.4277, "step": 2088 }, { "epoch": 0.6, "learning_rate": 0.0008185095079759444, "loss": 3.166, "step": 2089 }, { "epoch": 0.61, "learning_rate": 0.0008183286417821548, "loss": 3.3043, "step": 2090 }, { "epoch": 0.61, "learning_rate": 0.0008181477055163567, "loss": 3.3465, "step": 2091 }, { "epoch": 0.61, "learning_rate": 0.0008179666992183791, "loss": 3.3567, "step": 2092 }, { "epoch": 0.61, "learning_rate": 0.0008177856229280659, "loss": 3.198, "step": 2093 }, { "epoch": 0.61, "learning_rate": 0.0008176044766852766, "loss": 3.3882, "step": 2094 }, { "epoch": 0.61, "learning_rate": 0.0008174232605298856, "loss": 3.3909, "step": 2095 }, { "epoch": 0.61, "learning_rate": 0.0008172419745017834, "loss": 3.2309, "step": 2096 }, { "epoch": 0.61, "learning_rate": 0.0008170606186408755, "loss": 3.2033, "step": 2097 }, { "epoch": 0.61, "learning_rate": 0.0008168791929870824, "loss": 3.373, "step": 2098 }, { "epoch": 0.61, "learning_rate": 0.0008166976975803411, "loss": 3.3108, "step": 2099 }, { "epoch": 0.61, "learning_rate": 0.0008165161324606026, "loss": 3.3767, "step": 2100 }, { "epoch": 0.61, "learning_rate": 0.0008163344976678342, "loss": 3.291, "step": 2101 }, { "epoch": 0.61, "learning_rate": 0.0008161527932420181, "loss": 3.2666, "step": 2102 }, { "epoch": 0.61, "learning_rate": 0.000815971019223152, "loss": 3.33, "step": 2103 }, { "epoch": 0.61, "learning_rate": 0.0008157891756512488, "loss": 3.4123, "step": 2104 }, { "epoch": 0.61, "learning_rate": 0.0008156072625663369, "loss": 3.3861, "step": 2105 }, { "epoch": 0.61, "learning_rate": 0.0008154252800084595, "loss": 3.3821, "step": 2106 }, { "epoch": 0.61, "learning_rate": 0.0008152432280176758, "loss": 3.3206, "step": 2107 }, { "epoch": 0.61, "learning_rate": 0.0008150611066340599, "loss": 3.4567, "step": 2108 }, { "epoch": 0.61, "learning_rate": 0.0008148789158977012, "loss": 3.2937, "step": 2109 }, { "epoch": 0.61, "learning_rate": 0.0008146966558487043, "loss": 3.3507, "step": 2110 }, { "epoch": 0.61, "learning_rate": 0.0008145143265271894, "loss": 3.2633, "step": 2111 }, { "epoch": 0.61, "learning_rate": 0.0008143319279732914, "loss": 3.2777, "step": 2112 }, { "epoch": 0.61, "learning_rate": 0.0008141494602271609, "loss": 3.3884, "step": 2113 }, { "epoch": 0.61, "learning_rate": 0.0008139669233289634, "loss": 3.2869, "step": 2114 }, { "epoch": 0.61, "learning_rate": 0.00081378431731888, "loss": 3.3211, "step": 2115 }, { "epoch": 0.61, "learning_rate": 0.0008136016422371065, "loss": 3.3305, "step": 2116 }, { "epoch": 0.61, "learning_rate": 0.0008134188981238546, "loss": 3.3877, "step": 2117 }, { "epoch": 0.61, "learning_rate": 0.0008132360850193506, "loss": 3.2927, "step": 2118 }, { "epoch": 0.61, "learning_rate": 0.0008130532029638361, "loss": 3.3749, "step": 2119 }, { "epoch": 0.61, "learning_rate": 0.0008128702519975679, "loss": 3.1715, "step": 2120 }, { "epoch": 0.61, "learning_rate": 0.0008126872321608184, "loss": 3.2586, "step": 2121 }, { "epoch": 0.61, "learning_rate": 0.0008125041434938746, "loss": 3.412, "step": 2122 }, { "epoch": 0.61, "learning_rate": 0.0008123209860370388, "loss": 3.3046, "step": 2123 }, { "epoch": 0.62, "learning_rate": 0.0008121377598306283, "loss": 3.3415, "step": 2124 }, { "epoch": 0.62, "learning_rate": 0.0008119544649149761, "loss": 3.2916, "step": 2125 }, { "epoch": 0.62, "learning_rate": 0.0008117711013304298, "loss": 3.4712, "step": 2126 }, { "epoch": 0.62, "learning_rate": 0.0008115876691173522, "loss": 3.3304, "step": 2127 }, { "epoch": 0.62, "learning_rate": 0.0008114041683161213, "loss": 3.2743, "step": 2128 }, { "epoch": 0.62, "learning_rate": 0.0008112205989671302, "loss": 3.3253, "step": 2129 }, { "epoch": 0.62, "learning_rate": 0.0008110369611107869, "loss": 3.3865, "step": 2130 }, { "epoch": 0.62, "learning_rate": 0.0008108532547875149, "loss": 3.2456, "step": 2131 }, { "epoch": 0.62, "learning_rate": 0.0008106694800377523, "loss": 3.1915, "step": 2132 }, { "epoch": 0.62, "learning_rate": 0.0008104856369019524, "loss": 3.2667, "step": 2133 }, { "epoch": 0.62, "learning_rate": 0.0008103017254205839, "loss": 3.2965, "step": 2134 }, { "epoch": 0.62, "learning_rate": 0.00081011774563413, "loss": 3.3934, "step": 2135 }, { "epoch": 0.62, "learning_rate": 0.0008099336975830896, "loss": 3.317, "step": 2136 }, { "epoch": 0.62, "learning_rate": 0.0008097495813079756, "loss": 3.3609, "step": 2137 }, { "epoch": 0.62, "learning_rate": 0.000809565396849317, "loss": 3.2618, "step": 2138 }, { "epoch": 0.62, "learning_rate": 0.0008093811442476573, "loss": 3.2941, "step": 2139 }, { "epoch": 0.62, "learning_rate": 0.0008091968235435551, "loss": 3.4445, "step": 2140 }, { "epoch": 0.62, "learning_rate": 0.0008090124347775836, "loss": 3.182, "step": 2141 }, { "epoch": 0.62, "learning_rate": 0.0008088279779903318, "loss": 3.4259, "step": 2142 }, { "epoch": 0.62, "learning_rate": 0.0008086434532224028, "loss": 3.3834, "step": 2143 }, { "epoch": 0.62, "learning_rate": 0.0008084588605144154, "loss": 3.3397, "step": 2144 }, { "epoch": 0.62, "learning_rate": 0.0008082741999070029, "loss": 3.2279, "step": 2145 }, { "epoch": 0.62, "learning_rate": 0.0008080894714408135, "loss": 3.2908, "step": 2146 }, { "epoch": 0.62, "learning_rate": 0.0008079046751565107, "loss": 3.4242, "step": 2147 }, { "epoch": 0.62, "learning_rate": 0.0008077198110947725, "loss": 3.3703, "step": 2148 }, { "epoch": 0.62, "learning_rate": 0.0008075348792962924, "loss": 3.381, "step": 2149 }, { "epoch": 0.62, "learning_rate": 0.0008073498798017785, "loss": 3.254, "step": 2150 }, { "epoch": 0.62, "learning_rate": 0.0008071648126519532, "loss": 3.4739, "step": 2151 }, { "epoch": 0.62, "learning_rate": 0.0008069796778875548, "loss": 3.3563, "step": 2152 }, { "epoch": 0.62, "learning_rate": 0.0008067944755493359, "loss": 3.2915, "step": 2153 }, { "epoch": 0.62, "learning_rate": 0.0008066092056780643, "loss": 3.2568, "step": 2154 }, { "epoch": 0.62, "learning_rate": 0.0008064238683145223, "loss": 3.3293, "step": 2155 }, { "epoch": 0.62, "learning_rate": 0.0008062384634995072, "loss": 3.2936, "step": 2156 }, { "epoch": 0.62, "learning_rate": 0.0008060529912738315, "loss": 3.2912, "step": 2157 }, { "epoch": 0.62, "learning_rate": 0.0008058674516783217, "loss": 3.3809, "step": 2158 }, { "epoch": 0.63, "learning_rate": 0.0008056818447538202, "loss": 3.3233, "step": 2159 }, { "epoch": 0.63, "learning_rate": 0.0008054961705411834, "loss": 3.3607, "step": 2160 }, { "epoch": 0.63, "learning_rate": 0.0008053104290812827, "loss": 3.3175, "step": 2161 }, { "epoch": 0.63, "learning_rate": 0.0008051246204150046, "loss": 3.3384, "step": 2162 }, { "epoch": 0.63, "learning_rate": 0.0008049387445832499, "loss": 3.3329, "step": 2163 }, { "epoch": 0.63, "learning_rate": 0.0008047528016269349, "loss": 3.2286, "step": 2164 }, { "epoch": 0.63, "learning_rate": 0.0008045667915869899, "loss": 3.4024, "step": 2165 }, { "epoch": 0.63, "learning_rate": 0.0008043807145043603, "loss": 3.2363, "step": 2166 }, { "epoch": 0.63, "learning_rate": 0.0008041945704200064, "loss": 3.3358, "step": 2167 }, { "epoch": 0.63, "learning_rate": 0.0008040083593749034, "loss": 3.3777, "step": 2168 }, { "epoch": 0.63, "learning_rate": 0.0008038220814100403, "loss": 3.4601, "step": 2169 }, { "epoch": 0.63, "learning_rate": 0.0008036357365664219, "loss": 3.2846, "step": 2170 }, { "epoch": 0.63, "learning_rate": 0.0008034493248850674, "loss": 3.2696, "step": 2171 }, { "epoch": 0.63, "learning_rate": 0.0008032628464070105, "loss": 3.3914, "step": 2172 }, { "epoch": 0.63, "learning_rate": 0.0008030763011732995, "loss": 3.1885, "step": 2173 }, { "epoch": 0.63, "learning_rate": 0.0008028896892249979, "loss": 3.3052, "step": 2174 }, { "epoch": 0.63, "learning_rate": 0.0008027030106031835, "loss": 3.2597, "step": 2175 }, { "epoch": 0.63, "learning_rate": 0.0008025162653489491, "loss": 3.3002, "step": 2176 }, { "epoch": 0.63, "learning_rate": 0.0008023294535034016, "loss": 3.2688, "step": 2177 }, { "epoch": 0.63, "learning_rate": 0.0008021425751076629, "loss": 3.312, "step": 2178 }, { "epoch": 0.63, "learning_rate": 0.00080195563020287, "loss": 3.3452, "step": 2179 }, { "epoch": 0.63, "learning_rate": 0.0008017686188301737, "loss": 3.4421, "step": 2180 }, { "epoch": 0.63, "learning_rate": 0.0008015815410307398, "loss": 3.3151, "step": 2181 }, { "epoch": 0.63, "learning_rate": 0.0008013943968457491, "loss": 3.3593, "step": 2182 }, { "epoch": 0.63, "learning_rate": 0.0008012071863163964, "loss": 3.3991, "step": 2183 }, { "epoch": 0.63, "learning_rate": 0.0008010199094838914, "loss": 3.4062, "step": 2184 }, { "epoch": 0.63, "learning_rate": 0.0008008325663894586, "loss": 3.3301, "step": 2185 }, { "epoch": 0.63, "learning_rate": 0.0008006451570743365, "loss": 3.2703, "step": 2186 }, { "epoch": 0.63, "learning_rate": 0.0008004576815797786, "loss": 3.386, "step": 2187 }, { "epoch": 0.63, "learning_rate": 0.0008002701399470531, "loss": 3.364, "step": 2188 }, { "epoch": 0.63, "learning_rate": 0.0008000825322174423, "loss": 3.3026, "step": 2189 }, { "epoch": 0.63, "learning_rate": 0.0007998948584322434, "loss": 3.3486, "step": 2190 }, { "epoch": 0.63, "learning_rate": 0.0007997071186327682, "loss": 3.2615, "step": 2191 }, { "epoch": 0.63, "learning_rate": 0.0007995193128603426, "loss": 3.3764, "step": 2192 }, { "epoch": 0.64, "learning_rate": 0.0007993314411563075, "loss": 3.3229, "step": 2193 }, { "epoch": 0.64, "learning_rate": 0.000799143503562018, "loss": 3.3121, "step": 2194 }, { "epoch": 0.64, "learning_rate": 0.000798955500118844, "loss": 3.1774, "step": 2195 }, { "epoch": 0.64, "learning_rate": 0.0007987674308681694, "loss": 3.3891, "step": 2196 }, { "epoch": 0.64, "learning_rate": 0.0007985792958513931, "loss": 3.3273, "step": 2197 }, { "epoch": 0.64, "learning_rate": 0.0007983910951099283, "loss": 3.2368, "step": 2198 }, { "epoch": 0.64, "learning_rate": 0.0007982028286852024, "loss": 3.3407, "step": 2199 }, { "epoch": 0.64, "learning_rate": 0.0007980144966186578, "loss": 3.2784, "step": 2200 }, { "epoch": 0.64, "learning_rate": 0.0007978260989517508, "loss": 3.1925, "step": 2201 }, { "epoch": 0.64, "learning_rate": 0.0007976376357259526, "loss": 3.1958, "step": 2202 }, { "epoch": 0.64, "learning_rate": 0.0007974491069827484, "loss": 3.3584, "step": 2203 }, { "epoch": 0.64, "learning_rate": 0.0007972605127636383, "loss": 3.3274, "step": 2204 }, { "epoch": 0.64, "learning_rate": 0.0007970718531101365, "loss": 3.3039, "step": 2205 }, { "epoch": 0.64, "learning_rate": 0.0007968831280637713, "loss": 3.3252, "step": 2206 }, { "epoch": 0.64, "learning_rate": 0.000796694337666086, "loss": 3.2968, "step": 2207 }, { "epoch": 0.64, "learning_rate": 0.0007965054819586381, "loss": 3.36, "step": 2208 }, { "epoch": 0.64, "learning_rate": 0.0007963165609829995, "loss": 3.3232, "step": 2209 }, { "epoch": 0.64, "learning_rate": 0.0007961275747807562, "loss": 3.4068, "step": 2210 }, { "epoch": 0.64, "learning_rate": 0.0007959385233935086, "loss": 3.3558, "step": 2211 }, { "epoch": 0.64, "learning_rate": 0.0007957494068628718, "loss": 3.4937, "step": 2212 }, { "epoch": 0.64, "learning_rate": 0.0007955602252304752, "loss": 3.187, "step": 2213 }, { "epoch": 0.64, "learning_rate": 0.0007953709785379621, "loss": 3.4378, "step": 2214 }, { "epoch": 0.64, "learning_rate": 0.0007951816668269904, "loss": 3.3885, "step": 2215 }, { "epoch": 0.64, "learning_rate": 0.0007949922901392322, "loss": 3.2606, "step": 2216 }, { "epoch": 0.64, "learning_rate": 0.0007948028485163743, "loss": 3.4462, "step": 2217 }, { "epoch": 0.64, "learning_rate": 0.0007946133420001171, "loss": 3.2161, "step": 2218 }, { "epoch": 0.64, "learning_rate": 0.000794423770632176, "loss": 3.2039, "step": 2219 }, { "epoch": 0.64, "learning_rate": 0.0007942341344542801, "loss": 3.4182, "step": 2220 }, { "epoch": 0.64, "learning_rate": 0.0007940444335081732, "loss": 3.2639, "step": 2221 }, { "epoch": 0.64, "learning_rate": 0.0007938546678356131, "loss": 3.4901, "step": 2222 }, { "epoch": 0.64, "learning_rate": 0.000793664837478372, "loss": 3.2681, "step": 2223 }, { "epoch": 0.64, "learning_rate": 0.0007934749424782361, "loss": 3.2701, "step": 2224 }, { "epoch": 0.64, "learning_rate": 0.0007932849828770061, "loss": 3.347, "step": 2225 }, { "epoch": 0.64, "learning_rate": 0.0007930949587164968, "loss": 3.3469, "step": 2226 }, { "epoch": 0.64, "learning_rate": 0.0007929048700385371, "loss": 3.3277, "step": 2227 }, { "epoch": 0.65, "learning_rate": 0.0007927147168849704, "loss": 3.4065, "step": 2228 }, { "epoch": 0.65, "learning_rate": 0.0007925244992976537, "loss": 3.4147, "step": 2229 }, { "epoch": 0.65, "learning_rate": 0.0007923342173184594, "loss": 3.2844, "step": 2230 }, { "epoch": 0.65, "learning_rate": 0.0007921438709892724, "loss": 3.2478, "step": 2231 }, { "epoch": 0.65, "learning_rate": 0.0007919534603519934, "loss": 3.3189, "step": 2232 }, { "epoch": 0.65, "learning_rate": 0.0007917629854485357, "loss": 3.1947, "step": 2233 }, { "epoch": 0.65, "learning_rate": 0.0007915724463208282, "loss": 3.4606, "step": 2234 }, { "epoch": 0.65, "learning_rate": 0.000791381843010813, "loss": 3.4158, "step": 2235 }, { "epoch": 0.65, "learning_rate": 0.0007911911755604466, "loss": 3.365, "step": 2236 }, { "epoch": 0.65, "learning_rate": 0.0007910004440116996, "loss": 3.3101, "step": 2237 }, { "epoch": 0.65, "learning_rate": 0.0007908096484065569, "loss": 3.3731, "step": 2238 }, { "epoch": 0.65, "learning_rate": 0.0007906187887870172, "loss": 3.4101, "step": 2239 }, { "epoch": 0.65, "learning_rate": 0.0007904278651950933, "loss": 3.3002, "step": 2240 }, { "epoch": 0.65, "learning_rate": 0.0007902368776728125, "loss": 3.4901, "step": 2241 }, { "epoch": 0.65, "learning_rate": 0.0007900458262622156, "loss": 3.2595, "step": 2242 }, { "epoch": 0.65, "learning_rate": 0.0007898547110053581, "loss": 3.2746, "step": 2243 }, { "epoch": 0.65, "learning_rate": 0.0007896635319443087, "loss": 3.2214, "step": 2244 }, { "epoch": 0.65, "learning_rate": 0.000789472289121151, "loss": 3.3826, "step": 2245 }, { "epoch": 0.65, "learning_rate": 0.0007892809825779821, "loss": 3.2913, "step": 2246 }, { "epoch": 0.65, "learning_rate": 0.0007890896123569135, "loss": 3.2191, "step": 2247 }, { "epoch": 0.65, "learning_rate": 0.0007888981785000704, "loss": 3.3184, "step": 2248 }, { "epoch": 0.65, "learning_rate": 0.000788706681049592, "loss": 3.2472, "step": 2249 }, { "epoch": 0.65, "learning_rate": 0.0007885151200476319, "loss": 3.3683, "step": 2250 }, { "epoch": 0.65, "learning_rate": 0.0007883234955363572, "loss": 3.3096, "step": 2251 }, { "epoch": 0.65, "learning_rate": 0.0007881318075579492, "loss": 3.3168, "step": 2252 }, { "epoch": 0.65, "learning_rate": 0.0007879400561546033, "loss": 3.4886, "step": 2253 }, { "epoch": 0.65, "learning_rate": 0.0007877482413685286, "loss": 3.2121, "step": 2254 }, { "epoch": 0.65, "learning_rate": 0.0007875563632419484, "loss": 3.336, "step": 2255 }, { "epoch": 0.65, "learning_rate": 0.0007873644218170996, "loss": 3.3045, "step": 2256 }, { "epoch": 0.65, "learning_rate": 0.0007871724171362336, "loss": 3.3418, "step": 2257 }, { "epoch": 0.65, "learning_rate": 0.000786980349241615, "loss": 3.166, "step": 2258 }, { "epoch": 0.65, "learning_rate": 0.0007867882181755231, "loss": 3.3934, "step": 2259 }, { "epoch": 0.65, "learning_rate": 0.0007865960239802503, "loss": 3.4106, "step": 2260 }, { "epoch": 0.65, "learning_rate": 0.0007864037666981036, "loss": 3.3376, "step": 2261 }, { "epoch": 0.66, "learning_rate": 0.0007862114463714034, "loss": 3.2463, "step": 2262 }, { "epoch": 0.66, "learning_rate": 0.0007860190630424842, "loss": 3.2966, "step": 2263 }, { "epoch": 0.66, "learning_rate": 0.0007858266167536944, "loss": 3.286, "step": 2264 }, { "epoch": 0.66, "learning_rate": 0.0007856341075473961, "loss": 3.3309, "step": 2265 }, { "epoch": 0.66, "learning_rate": 0.0007854415354659654, "loss": 3.3617, "step": 2266 }, { "epoch": 0.66, "learning_rate": 0.0007852489005517922, "loss": 3.3939, "step": 2267 }, { "epoch": 0.66, "learning_rate": 0.0007850562028472801, "loss": 3.4517, "step": 2268 }, { "epoch": 0.66, "learning_rate": 0.0007848634423948469, "loss": 3.3077, "step": 2269 }, { "epoch": 0.66, "learning_rate": 0.0007846706192369235, "loss": 3.3066, "step": 2270 }, { "epoch": 0.66, "learning_rate": 0.0007844777334159554, "loss": 3.3159, "step": 2271 }, { "epoch": 0.66, "learning_rate": 0.0007842847849744013, "loss": 3.3739, "step": 2272 }, { "epoch": 0.66, "learning_rate": 0.0007840917739547342, "loss": 3.2778, "step": 2273 }, { "epoch": 0.66, "learning_rate": 0.0007838987003994405, "loss": 3.4168, "step": 2274 }, { "epoch": 0.66, "learning_rate": 0.0007837055643510202, "loss": 3.3508, "step": 2275 }, { "epoch": 0.66, "learning_rate": 0.0007835123658519878, "loss": 3.3684, "step": 2276 }, { "epoch": 0.66, "learning_rate": 0.0007833191049448706, "loss": 3.3352, "step": 2277 }, { "epoch": 0.66, "learning_rate": 0.0007831257816722104, "loss": 3.294, "step": 2278 }, { "epoch": 0.66, "learning_rate": 0.0007829323960765625, "loss": 3.2529, "step": 2279 }, { "epoch": 0.66, "learning_rate": 0.0007827389482004954, "loss": 3.3472, "step": 2280 }, { "epoch": 0.66, "learning_rate": 0.0007825454380865922, "loss": 3.3397, "step": 2281 }, { "epoch": 0.66, "learning_rate": 0.0007823518657774492, "loss": 3.2644, "step": 2282 }, { "epoch": 0.66, "learning_rate": 0.0007821582313156763, "loss": 3.406, "step": 2283 }, { "epoch": 0.66, "learning_rate": 0.0007819645347438973, "loss": 3.283, "step": 2284 }, { "epoch": 0.66, "learning_rate": 0.0007817707761047497, "loss": 3.4015, "step": 2285 }, { "epoch": 0.66, "learning_rate": 0.0007815769554408845, "loss": 3.3053, "step": 2286 }, { "epoch": 0.66, "learning_rate": 0.0007813830727949663, "loss": 3.3879, "step": 2287 }, { "epoch": 0.66, "learning_rate": 0.0007811891282096737, "loss": 3.2801, "step": 2288 }, { "epoch": 0.66, "learning_rate": 0.0007809951217276985, "loss": 3.2409, "step": 2289 }, { "epoch": 0.66, "learning_rate": 0.0007808010533917464, "loss": 3.3723, "step": 2290 }, { "epoch": 0.66, "learning_rate": 0.0007806069232445368, "loss": 3.3152, "step": 2291 }, { "epoch": 0.66, "learning_rate": 0.0007804127313288023, "loss": 3.4493, "step": 2292 }, { "epoch": 0.66, "learning_rate": 0.0007802184776872894, "loss": 3.2221, "step": 2293 }, { "epoch": 0.66, "learning_rate": 0.0007800241623627582, "loss": 3.2853, "step": 2294 }, { "epoch": 0.66, "learning_rate": 0.0007798297853979823, "loss": 3.2532, "step": 2295 }, { "epoch": 0.66, "learning_rate": 0.0007796353468357489, "loss": 3.2091, "step": 2296 }, { "epoch": 0.67, "learning_rate": 0.0007794408467188586, "loss": 3.3899, "step": 2297 }, { "epoch": 0.67, "learning_rate": 0.0007792462850901258, "loss": 3.3981, "step": 2298 }, { "epoch": 0.67, "learning_rate": 0.0007790516619923783, "loss": 3.2673, "step": 2299 }, { "epoch": 0.67, "learning_rate": 0.0007788569774684575, "loss": 3.3157, "step": 2300 }, { "epoch": 0.67, "learning_rate": 0.0007786622315612182, "loss": 3.3404, "step": 2301 }, { "epoch": 0.67, "learning_rate": 0.0007784674243135289, "loss": 3.4063, "step": 2302 }, { "epoch": 0.67, "learning_rate": 0.0007782725557682713, "loss": 3.3032, "step": 2303 }, { "epoch": 0.67, "learning_rate": 0.0007780776259683411, "loss": 3.3349, "step": 2304 }, { "epoch": 0.67, "learning_rate": 0.000777882634956647, "loss": 3.1847, "step": 2305 }, { "epoch": 0.67, "learning_rate": 0.0007776875827761113, "loss": 3.3071, "step": 2306 }, { "epoch": 0.67, "learning_rate": 0.0007774924694696698, "loss": 3.5699, "step": 2307 }, { "epoch": 0.67, "learning_rate": 0.0007772972950802718, "loss": 3.2079, "step": 2308 }, { "epoch": 0.67, "learning_rate": 0.0007771020596508799, "loss": 3.3501, "step": 2309 }, { "epoch": 0.67, "learning_rate": 0.0007769067632244707, "loss": 3.2777, "step": 2310 }, { "epoch": 0.67, "learning_rate": 0.0007767114058440332, "loss": 3.4477, "step": 2311 }, { "epoch": 0.67, "learning_rate": 0.0007765159875525706, "loss": 3.2204, "step": 2312 }, { "epoch": 0.67, "learning_rate": 0.0007763205083930995, "loss": 3.264, "step": 2313 }, { "epoch": 0.67, "learning_rate": 0.0007761249684086493, "loss": 3.3216, "step": 2314 }, { "epoch": 0.67, "learning_rate": 0.0007759293676422636, "loss": 3.3178, "step": 2315 }, { "epoch": 0.67, "learning_rate": 0.0007757337061369989, "loss": 3.2545, "step": 2316 }, { "epoch": 0.67, "learning_rate": 0.0007755379839359248, "loss": 3.3306, "step": 2317 }, { "epoch": 0.67, "learning_rate": 0.0007753422010821248, "loss": 3.3021, "step": 2318 }, { "epoch": 0.67, "learning_rate": 0.0007751463576186957, "loss": 3.2546, "step": 2319 }, { "epoch": 0.67, "learning_rate": 0.0007749504535887472, "loss": 3.3511, "step": 2320 }, { "epoch": 0.67, "learning_rate": 0.000774754489035403, "loss": 3.1737, "step": 2321 }, { "epoch": 0.67, "learning_rate": 0.0007745584640017995, "loss": 3.267, "step": 2322 }, { "epoch": 0.67, "learning_rate": 0.0007743623785310866, "loss": 3.2648, "step": 2323 }, { "epoch": 0.67, "learning_rate": 0.0007741662326664278, "loss": 3.3597, "step": 2324 }, { "epoch": 0.67, "learning_rate": 0.0007739700264509992, "loss": 3.3107, "step": 2325 }, { "epoch": 0.67, "learning_rate": 0.0007737737599279913, "loss": 3.3417, "step": 2326 }, { "epoch": 0.67, "learning_rate": 0.0007735774331406066, "loss": 3.3739, "step": 2327 }, { "epoch": 0.67, "learning_rate": 0.0007733810461320619, "loss": 3.3832, "step": 2328 }, { "epoch": 0.67, "learning_rate": 0.0007731845989455866, "loss": 3.3474, "step": 2329 }, { "epoch": 0.67, "learning_rate": 0.0007729880916244236, "loss": 3.3403, "step": 2330 }, { "epoch": 0.68, "learning_rate": 0.0007727915242118292, "loss": 3.3854, "step": 2331 }, { "epoch": 0.68, "learning_rate": 0.0007725948967510726, "loss": 3.2718, "step": 2332 }, { "epoch": 0.68, "learning_rate": 0.0007723982092854365, "loss": 3.3024, "step": 2333 }, { "epoch": 0.68, "learning_rate": 0.0007722014618582166, "loss": 3.3894, "step": 2334 }, { "epoch": 0.68, "learning_rate": 0.0007720046545127218, "loss": 3.336, "step": 2335 }, { "epoch": 0.68, "learning_rate": 0.0007718077872922742, "loss": 3.3541, "step": 2336 }, { "epoch": 0.68, "learning_rate": 0.0007716108602402094, "loss": 3.2757, "step": 2337 }, { "epoch": 0.68, "learning_rate": 0.0007714138733998758, "loss": 3.3654, "step": 2338 }, { "epoch": 0.68, "learning_rate": 0.0007712168268146351, "loss": 3.2871, "step": 2339 }, { "epoch": 0.68, "learning_rate": 0.0007710197205278619, "loss": 3.2296, "step": 2340 }, { "epoch": 0.68, "learning_rate": 0.0007708225545829446, "loss": 3.2833, "step": 2341 }, { "epoch": 0.68, "learning_rate": 0.0007706253290232838, "loss": 3.3375, "step": 2342 }, { "epoch": 0.68, "learning_rate": 0.0007704280438922943, "loss": 3.2294, "step": 2343 }, { "epoch": 0.68, "learning_rate": 0.000770230699233403, "loss": 3.3513, "step": 2344 }, { "epoch": 0.68, "learning_rate": 0.0007700332950900504, "loss": 3.2919, "step": 2345 }, { "epoch": 0.68, "learning_rate": 0.00076983583150569, "loss": 3.2693, "step": 2346 }, { "epoch": 0.68, "learning_rate": 0.0007696383085237886, "loss": 3.2395, "step": 2347 }, { "epoch": 0.68, "learning_rate": 0.0007694407261878258, "loss": 3.2252, "step": 2348 }, { "epoch": 0.68, "learning_rate": 0.0007692430845412945, "loss": 3.3333, "step": 2349 }, { "epoch": 0.68, "learning_rate": 0.0007690453836277002, "loss": 3.3296, "step": 2350 }, { "epoch": 0.68, "learning_rate": 0.0007688476234905622, "loss": 3.3576, "step": 2351 }, { "epoch": 0.68, "learning_rate": 0.000768649804173412, "loss": 3.3986, "step": 2352 }, { "epoch": 0.68, "learning_rate": 0.0007684519257197946, "loss": 3.2361, "step": 2353 }, { "epoch": 0.68, "learning_rate": 0.0007682539881732681, "loss": 3.4595, "step": 2354 }, { "epoch": 0.68, "learning_rate": 0.0007680559915774033, "loss": 3.2328, "step": 2355 }, { "epoch": 0.68, "learning_rate": 0.0007678579359757842, "loss": 3.2864, "step": 2356 }, { "epoch": 0.68, "learning_rate": 0.0007676598214120078, "loss": 3.3988, "step": 2357 }, { "epoch": 0.68, "learning_rate": 0.0007674616479296841, "loss": 3.2249, "step": 2358 }, { "epoch": 0.68, "learning_rate": 0.0007672634155724356, "loss": 3.3609, "step": 2359 }, { "epoch": 0.68, "learning_rate": 0.0007670651243838986, "loss": 3.3224, "step": 2360 }, { "epoch": 0.68, "learning_rate": 0.0007668667744077216, "loss": 3.2143, "step": 2361 }, { "epoch": 0.68, "learning_rate": 0.0007666683656875664, "loss": 3.2994, "step": 2362 }, { "epoch": 0.68, "learning_rate": 0.0007664698982671078, "loss": 3.3185, "step": 2363 }, { "epoch": 0.68, "learning_rate": 0.0007662713721900331, "loss": 3.1976, "step": 2364 }, { "epoch": 0.68, "learning_rate": 0.0007660727875000431, "loss": 3.2574, "step": 2365 }, { "epoch": 0.69, "learning_rate": 0.000765874144240851, "loss": 3.2942, "step": 2366 }, { "epoch": 0.69, "learning_rate": 0.0007656754424561834, "loss": 3.3466, "step": 2367 }, { "epoch": 0.69, "learning_rate": 0.000765476682189779, "loss": 3.2743, "step": 2368 }, { "epoch": 0.69, "learning_rate": 0.0007652778634853903, "loss": 3.3277, "step": 2369 }, { "epoch": 0.69, "learning_rate": 0.000765078986386782, "loss": 3.2839, "step": 2370 }, { "epoch": 0.69, "learning_rate": 0.0007648800509377318, "loss": 3.1942, "step": 2371 }, { "epoch": 0.69, "learning_rate": 0.0007646810571820304, "loss": 3.2388, "step": 2372 }, { "epoch": 0.69, "learning_rate": 0.0007644820051634812, "loss": 3.426, "step": 2373 }, { "epoch": 0.69, "learning_rate": 0.0007642828949259007, "loss": 3.3784, "step": 2374 }, { "epoch": 0.69, "learning_rate": 0.0007640837265131177, "loss": 3.3261, "step": 2375 }, { "epoch": 0.69, "learning_rate": 0.0007638844999689743, "loss": 3.307, "step": 2376 }, { "epoch": 0.69, "learning_rate": 0.0007636852153373251, "loss": 3.2962, "step": 2377 }, { "epoch": 0.69, "learning_rate": 0.0007634858726620373, "loss": 3.2425, "step": 2378 }, { "epoch": 0.69, "learning_rate": 0.0007632864719869917, "loss": 3.3222, "step": 2379 }, { "epoch": 0.69, "learning_rate": 0.000763087013356081, "loss": 3.185, "step": 2380 }, { "epoch": 0.69, "learning_rate": 0.0007628874968132111, "loss": 3.3223, "step": 2381 }, { "epoch": 0.69, "learning_rate": 0.0007626879224023002, "loss": 3.2877, "step": 2382 }, { "epoch": 0.69, "learning_rate": 0.00076248829016728, "loss": 3.3245, "step": 2383 }, { "epoch": 0.69, "learning_rate": 0.0007622886001520944, "loss": 3.2963, "step": 2384 }, { "epoch": 0.69, "learning_rate": 0.0007620888524006999, "loss": 3.1869, "step": 2385 }, { "epoch": 0.69, "learning_rate": 0.0007618890469570661, "loss": 3.2662, "step": 2386 }, { "epoch": 0.69, "learning_rate": 0.0007616891838651749, "loss": 3.2476, "step": 2387 }, { "epoch": 0.69, "learning_rate": 0.0007614892631690216, "loss": 3.228, "step": 2388 }, { "epoch": 0.69, "learning_rate": 0.0007612892849126132, "loss": 3.2759, "step": 2389 }, { "epoch": 0.69, "learning_rate": 0.0007610892491399701, "loss": 3.3322, "step": 2390 }, { "epoch": 0.69, "learning_rate": 0.0007608891558951249, "loss": 3.2079, "step": 2391 }, { "epoch": 0.69, "learning_rate": 0.0007606890052221232, "loss": 3.232, "step": 2392 }, { "epoch": 0.69, "learning_rate": 0.0007604887971650233, "loss": 3.3401, "step": 2393 }, { "epoch": 0.69, "learning_rate": 0.0007602885317678957, "loss": 3.1664, "step": 2394 }, { "epoch": 0.69, "learning_rate": 0.0007600882090748238, "loss": 3.2771, "step": 2395 }, { "epoch": 0.69, "learning_rate": 0.0007598878291299036, "loss": 3.3346, "step": 2396 }, { "epoch": 0.69, "learning_rate": 0.0007596873919772438, "loss": 3.3885, "step": 2397 }, { "epoch": 0.69, "learning_rate": 0.0007594868976609657, "loss": 3.2252, "step": 2398 }, { "epoch": 0.69, "learning_rate": 0.0007592863462252024, "loss": 3.1511, "step": 2399 }, { "epoch": 0.7, "learning_rate": 0.0007590857377141009, "loss": 3.3808, "step": 2400 }, { "epoch": 0.7, "learning_rate": 0.0007588850721718199, "loss": 3.3095, "step": 2401 }, { "epoch": 0.7, "learning_rate": 0.0007586843496425309, "loss": 3.3894, "step": 2402 }, { "epoch": 0.7, "learning_rate": 0.0007584835701704176, "loss": 3.3223, "step": 2403 }, { "epoch": 0.7, "learning_rate": 0.0007582827337996768, "loss": 3.3342, "step": 2404 }, { "epoch": 0.7, "learning_rate": 0.0007580818405745176, "loss": 3.3465, "step": 2405 }, { "epoch": 0.7, "learning_rate": 0.0007578808905391615, "loss": 3.3457, "step": 2406 }, { "epoch": 0.7, "learning_rate": 0.0007576798837378425, "loss": 3.3582, "step": 2407 }, { "epoch": 0.7, "learning_rate": 0.0007574788202148071, "loss": 3.2792, "step": 2408 }, { "epoch": 0.7, "learning_rate": 0.0007572777000143145, "loss": 3.3602, "step": 2409 }, { "epoch": 0.7, "learning_rate": 0.0007570765231806362, "loss": 3.254, "step": 2410 }, { "epoch": 0.7, "learning_rate": 0.0007568752897580562, "loss": 3.4319, "step": 2411 }, { "epoch": 0.7, "learning_rate": 0.0007566739997908709, "loss": 3.3807, "step": 2412 }, { "epoch": 0.7, "learning_rate": 0.0007564726533233892, "loss": 3.2483, "step": 2413 }, { "epoch": 0.7, "learning_rate": 0.0007562712503999327, "loss": 3.4165, "step": 2414 }, { "epoch": 0.7, "learning_rate": 0.0007560697910648347, "loss": 3.2272, "step": 2415 }, { "epoch": 0.7, "learning_rate": 0.0007558682753624419, "loss": 3.3466, "step": 2416 }, { "epoch": 0.7, "learning_rate": 0.0007556667033371123, "loss": 3.3784, "step": 2417 }, { "epoch": 0.7, "learning_rate": 0.0007554650750332175, "loss": 3.165, "step": 2418 }, { "epoch": 0.7, "learning_rate": 0.0007552633904951405, "loss": 3.3042, "step": 2419 }, { "epoch": 0.7, "learning_rate": 0.0007550616497672771, "loss": 3.3435, "step": 2420 }, { "epoch": 0.7, "learning_rate": 0.0007548598528940353, "loss": 3.2307, "step": 2421 }, { "epoch": 0.7, "learning_rate": 0.000754657999919836, "loss": 3.3242, "step": 2422 }, { "epoch": 0.7, "learning_rate": 0.0007544560908891116, "loss": 3.255, "step": 2423 }, { "epoch": 0.7, "learning_rate": 0.0007542541258463075, "loss": 3.1781, "step": 2424 }, { "epoch": 0.7, "learning_rate": 0.0007540521048358814, "loss": 3.2379, "step": 2425 }, { "epoch": 0.7, "learning_rate": 0.0007538500279023026, "loss": 3.366, "step": 2426 }, { "epoch": 0.7, "learning_rate": 0.0007536478950900537, "loss": 3.3713, "step": 2427 }, { "epoch": 0.7, "learning_rate": 0.0007534457064436289, "loss": 3.3321, "step": 2428 }, { "epoch": 0.7, "learning_rate": 0.0007532434620075349, "loss": 3.2991, "step": 2429 }, { "epoch": 0.7, "learning_rate": 0.0007530411618262906, "loss": 3.3414, "step": 2430 }, { "epoch": 0.7, "learning_rate": 0.0007528388059444278, "loss": 3.4424, "step": 2431 }, { "epoch": 0.7, "learning_rate": 0.0007526363944064895, "loss": 3.3561, "step": 2432 }, { "epoch": 0.7, "learning_rate": 0.0007524339272570316, "loss": 3.4258, "step": 2433 }, { "epoch": 0.7, "learning_rate": 0.0007522314045406223, "loss": 3.1827, "step": 2434 }, { "epoch": 0.71, "learning_rate": 0.0007520288263018418, "loss": 3.2712, "step": 2435 }, { "epoch": 0.71, "learning_rate": 0.0007518261925852823, "loss": 3.2915, "step": 2436 }, { "epoch": 0.71, "learning_rate": 0.000751623503435549, "loss": 3.3503, "step": 2437 }, { "epoch": 0.71, "learning_rate": 0.0007514207588972583, "loss": 3.2194, "step": 2438 }, { "epoch": 0.71, "learning_rate": 0.0007512179590150396, "loss": 3.2236, "step": 2439 }, { "epoch": 0.71, "learning_rate": 0.000751015103833534, "loss": 3.2936, "step": 2440 }, { "epoch": 0.71, "learning_rate": 0.000750812193397395, "loss": 3.2713, "step": 2441 }, { "epoch": 0.71, "learning_rate": 0.0007506092277512884, "loss": 3.28, "step": 2442 }, { "epoch": 0.71, "learning_rate": 0.0007504062069398918, "loss": 3.2698, "step": 2443 }, { "epoch": 0.71, "learning_rate": 0.0007502031310078949, "loss": 3.378, "step": 2444 }, { "epoch": 0.71, "learning_rate": 0.00075, "loss": 3.3048, "step": 2445 }, { "epoch": 0.71, "learning_rate": 0.0007497968139609213, "loss": 3.4182, "step": 2446 }, { "epoch": 0.71, "learning_rate": 0.0007495935729353849, "loss": 3.3861, "step": 2447 }, { "epoch": 0.71, "learning_rate": 0.0007493902769681292, "loss": 3.325, "step": 2448 }, { "epoch": 0.71, "learning_rate": 0.0007491869261039046, "loss": 3.3921, "step": 2449 }, { "epoch": 0.71, "learning_rate": 0.0007489835203874738, "loss": 3.3489, "step": 2450 }, { "epoch": 0.71, "learning_rate": 0.0007487800598636116, "loss": 3.1312, "step": 2451 }, { "epoch": 0.71, "learning_rate": 0.0007485765445771042, "loss": 3.307, "step": 2452 }, { "epoch": 0.71, "learning_rate": 0.0007483729745727508, "loss": 3.3065, "step": 2453 }, { "epoch": 0.71, "learning_rate": 0.0007481693498953621, "loss": 3.4184, "step": 2454 }, { "epoch": 0.71, "learning_rate": 0.0007479656705897607, "loss": 3.3657, "step": 2455 }, { "epoch": 0.71, "learning_rate": 0.0007477619367007815, "loss": 3.3791, "step": 2456 }, { "epoch": 0.71, "learning_rate": 0.0007475581482732716, "loss": 3.3281, "step": 2457 }, { "epoch": 0.71, "learning_rate": 0.00074735430535209, "loss": 3.4043, "step": 2458 }, { "epoch": 0.71, "learning_rate": 0.000747150407982107, "loss": 3.3448, "step": 2459 }, { "epoch": 0.71, "learning_rate": 0.0007469464562082059, "loss": 3.3861, "step": 2460 }, { "epoch": 0.71, "learning_rate": 0.0007467424500752813, "loss": 3.1929, "step": 2461 }, { "epoch": 0.71, "learning_rate": 0.0007465383896282403, "loss": 3.2409, "step": 2462 }, { "epoch": 0.71, "learning_rate": 0.0007463342749120013, "loss": 3.2858, "step": 2463 }, { "epoch": 0.71, "learning_rate": 0.0007461301059714953, "loss": 3.3522, "step": 2464 }, { "epoch": 0.71, "learning_rate": 0.0007459258828516645, "loss": 3.2691, "step": 2465 }, { "epoch": 0.71, "learning_rate": 0.000745721605597464, "loss": 3.377, "step": 2466 }, { "epoch": 0.71, "learning_rate": 0.0007455172742538599, "loss": 3.3386, "step": 2467 }, { "epoch": 0.71, "learning_rate": 0.0007453128888658307, "loss": 3.3165, "step": 2468 }, { "epoch": 0.72, "learning_rate": 0.0007451084494783667, "loss": 3.207, "step": 2469 }, { "epoch": 0.72, "learning_rate": 0.0007449039561364701, "loss": 3.395, "step": 2470 }, { "epoch": 0.72, "learning_rate": 0.0007446994088851548, "loss": 3.2555, "step": 2471 }, { "epoch": 0.72, "learning_rate": 0.000744494807769447, "loss": 3.3541, "step": 2472 }, { "epoch": 0.72, "learning_rate": 0.0007442901528343841, "loss": 3.3464, "step": 2473 }, { "epoch": 0.72, "learning_rate": 0.0007440854441250159, "loss": 3.4178, "step": 2474 }, { "epoch": 0.72, "learning_rate": 0.0007438806816864039, "loss": 3.418, "step": 2475 }, { "epoch": 0.72, "learning_rate": 0.0007436758655636212, "loss": 3.2953, "step": 2476 }, { "epoch": 0.72, "learning_rate": 0.0007434709958017531, "loss": 3.2259, "step": 2477 }, { "epoch": 0.72, "learning_rate": 0.0007432660724458963, "loss": 3.1261, "step": 2478 }, { "epoch": 0.72, "learning_rate": 0.0007430610955411597, "loss": 3.3843, "step": 2479 }, { "epoch": 0.72, "learning_rate": 0.0007428560651326637, "loss": 3.269, "step": 2480 }, { "epoch": 0.72, "learning_rate": 0.0007426509812655407, "loss": 3.3477, "step": 2481 }, { "epoch": 0.72, "learning_rate": 0.0007424458439849342, "loss": 3.2765, "step": 2482 }, { "epoch": 0.72, "learning_rate": 0.0007422406533360007, "loss": 3.4115, "step": 2483 }, { "epoch": 0.72, "learning_rate": 0.0007420354093639073, "loss": 3.3335, "step": 2484 }, { "epoch": 0.72, "learning_rate": 0.0007418301121138335, "loss": 3.2718, "step": 2485 }, { "epoch": 0.72, "learning_rate": 0.0007416247616309701, "loss": 3.2432, "step": 2486 }, { "epoch": 0.72, "learning_rate": 0.0007414193579605198, "loss": 3.3722, "step": 2487 }, { "epoch": 0.72, "learning_rate": 0.0007412139011476973, "loss": 3.1986, "step": 2488 }, { "epoch": 0.72, "learning_rate": 0.0007410083912377286, "loss": 3.3292, "step": 2489 }, { "epoch": 0.72, "learning_rate": 0.0007408028282758515, "loss": 3.2684, "step": 2490 }, { "epoch": 0.72, "learning_rate": 0.0007405972123073153, "loss": 3.3755, "step": 2491 }, { "epoch": 0.72, "learning_rate": 0.0007403915433773815, "loss": 3.2879, "step": 2492 }, { "epoch": 0.72, "learning_rate": 0.0007401858215313228, "loss": 3.3021, "step": 2493 }, { "epoch": 0.72, "learning_rate": 0.0007399800468144234, "loss": 3.3069, "step": 2494 }, { "epoch": 0.72, "learning_rate": 0.0007397742192719798, "loss": 3.305, "step": 2495 }, { "epoch": 0.72, "learning_rate": 0.0007395683389492995, "loss": 3.2709, "step": 2496 }, { "epoch": 0.72, "learning_rate": 0.0007393624058917019, "loss": 3.3354, "step": 2497 }, { "epoch": 0.72, "learning_rate": 0.0007391564201445181, "loss": 3.2793, "step": 2498 }, { "epoch": 0.72, "learning_rate": 0.0007389503817530904, "loss": 3.2333, "step": 2499 }, { "epoch": 0.72, "learning_rate": 0.0007387442907627732, "loss": 3.2478, "step": 2500 }, { "epoch": 0.72, "learning_rate": 0.0007385381472189321, "loss": 3.2216, "step": 2501 }, { "epoch": 0.72, "learning_rate": 0.0007383319511669443, "loss": 3.3405, "step": 2502 }, { "epoch": 0.72, "learning_rate": 0.0007381257026521988, "loss": 3.2993, "step": 2503 }, { "epoch": 0.73, "learning_rate": 0.0007379194017200958, "loss": 3.2292, "step": 2504 }, { "epoch": 0.73, "learning_rate": 0.0007377130484160476, "loss": 3.3309, "step": 2505 }, { "epoch": 0.73, "learning_rate": 0.0007375066427854774, "loss": 3.4123, "step": 2506 }, { "epoch": 0.73, "learning_rate": 0.0007373001848738202, "loss": 3.2293, "step": 2507 }, { "epoch": 0.73, "learning_rate": 0.0007370936747265226, "loss": 3.2575, "step": 2508 }, { "epoch": 0.73, "learning_rate": 0.0007368871123890425, "loss": 3.2899, "step": 2509 }, { "epoch": 0.73, "learning_rate": 0.0007366804979068493, "loss": 3.2131, "step": 2510 }, { "epoch": 0.73, "learning_rate": 0.0007364738313254243, "loss": 3.3758, "step": 2511 }, { "epoch": 0.73, "learning_rate": 0.0007362671126902594, "loss": 3.3223, "step": 2512 }, { "epoch": 0.73, "learning_rate": 0.0007360603420468589, "loss": 3.206, "step": 2513 }, { "epoch": 0.73, "learning_rate": 0.000735853519440738, "loss": 3.3188, "step": 2514 }, { "epoch": 0.73, "learning_rate": 0.0007356466449174235, "loss": 3.3324, "step": 2515 }, { "epoch": 0.73, "learning_rate": 0.0007354397185224535, "loss": 3.2082, "step": 2516 }, { "epoch": 0.73, "learning_rate": 0.0007352327403013779, "loss": 3.2798, "step": 2517 }, { "epoch": 0.73, "learning_rate": 0.0007350257102997573, "loss": 3.2976, "step": 2518 }, { "epoch": 0.73, "learning_rate": 0.0007348186285631646, "loss": 3.187, "step": 2519 }, { "epoch": 0.73, "learning_rate": 0.000734611495137183, "loss": 3.2669, "step": 2520 }, { "epoch": 0.73, "learning_rate": 0.0007344043100674083, "loss": 3.2487, "step": 2521 }, { "epoch": 0.73, "learning_rate": 0.0007341970733994466, "loss": 3.3945, "step": 2522 }, { "epoch": 0.73, "learning_rate": 0.0007339897851789162, "loss": 3.2723, "step": 2523 }, { "epoch": 0.73, "learning_rate": 0.0007337824454514462, "loss": 3.2243, "step": 2524 }, { "epoch": 0.73, "learning_rate": 0.0007335750542626771, "loss": 3.3568, "step": 2525 }, { "epoch": 0.73, "learning_rate": 0.000733367611658261, "loss": 3.179, "step": 2526 }, { "epoch": 0.73, "learning_rate": 0.0007331601176838612, "loss": 3.2023, "step": 2527 }, { "epoch": 0.73, "learning_rate": 0.0007329525723851519, "loss": 3.3097, "step": 2528 }, { "epoch": 0.73, "learning_rate": 0.0007327449758078193, "loss": 3.4292, "step": 2529 }, { "epoch": 0.73, "learning_rate": 0.0007325373279975602, "loss": 3.2049, "step": 2530 }, { "epoch": 0.73, "learning_rate": 0.0007323296290000835, "loss": 3.362, "step": 2531 }, { "epoch": 0.73, "learning_rate": 0.0007321218788611086, "loss": 3.2865, "step": 2532 }, { "epoch": 0.73, "learning_rate": 0.0007319140776263664, "loss": 3.3078, "step": 2533 }, { "epoch": 0.73, "learning_rate": 0.0007317062253415993, "loss": 3.2789, "step": 2534 }, { "epoch": 0.73, "learning_rate": 0.0007314983220525604, "loss": 3.3022, "step": 2535 }, { "epoch": 0.73, "learning_rate": 0.0007312903678050146, "loss": 3.2268, "step": 2536 }, { "epoch": 0.73, "learning_rate": 0.0007310823626447378, "loss": 3.2673, "step": 2537 }, { "epoch": 0.74, "learning_rate": 0.000730874306617517, "loss": 3.2883, "step": 2538 }, { "epoch": 0.74, "learning_rate": 0.0007306661997691504, "loss": 3.2493, "step": 2539 }, { "epoch": 0.74, "learning_rate": 0.0007304580421454476, "loss": 3.2458, "step": 2540 }, { "epoch": 0.74, "learning_rate": 0.0007302498337922293, "loss": 3.2342, "step": 2541 }, { "epoch": 0.74, "learning_rate": 0.0007300415747553271, "loss": 3.3653, "step": 2542 }, { "epoch": 0.74, "learning_rate": 0.000729833265080584, "loss": 3.2753, "step": 2543 }, { "epoch": 0.74, "learning_rate": 0.0007296249048138543, "loss": 3.1613, "step": 2544 }, { "epoch": 0.74, "learning_rate": 0.0007294164940010031, "loss": 3.2398, "step": 2545 }, { "epoch": 0.74, "learning_rate": 0.0007292080326879067, "loss": 3.3029, "step": 2546 }, { "epoch": 0.74, "learning_rate": 0.0007289995209204529, "loss": 3.3486, "step": 2547 }, { "epoch": 0.74, "learning_rate": 0.0007287909587445398, "loss": 3.2953, "step": 2548 }, { "epoch": 0.74, "learning_rate": 0.0007285823462060776, "loss": 3.3083, "step": 2549 }, { "epoch": 0.74, "learning_rate": 0.0007283736833509867, "loss": 3.314, "step": 2550 }, { "epoch": 0.74, "learning_rate": 0.0007281649702251993, "loss": 3.1833, "step": 2551 }, { "epoch": 0.74, "learning_rate": 0.0007279562068746581, "loss": 3.3149, "step": 2552 }, { "epoch": 0.74, "learning_rate": 0.0007277473933453169, "loss": 3.2666, "step": 2553 }, { "epoch": 0.74, "learning_rate": 0.0007275385296831413, "loss": 3.3772, "step": 2554 }, { "epoch": 0.74, "learning_rate": 0.0007273296159341069, "loss": 3.4231, "step": 2555 }, { "epoch": 0.74, "learning_rate": 0.0007271206521442007, "loss": 3.3128, "step": 2556 }, { "epoch": 0.74, "learning_rate": 0.0007269116383594211, "loss": 3.3952, "step": 2557 }, { "epoch": 0.74, "learning_rate": 0.000726702574625777, "loss": 3.2771, "step": 2558 }, { "epoch": 0.74, "learning_rate": 0.0007264934609892888, "loss": 3.214, "step": 2559 }, { "epoch": 0.74, "learning_rate": 0.0007262842974959873, "loss": 3.2437, "step": 2560 }, { "epoch": 0.74, "learning_rate": 0.0007260750841919144, "loss": 3.2546, "step": 2561 }, { "epoch": 0.74, "learning_rate": 0.0007258658211231235, "loss": 3.2326, "step": 2562 }, { "epoch": 0.74, "learning_rate": 0.0007256565083356785, "loss": 3.3274, "step": 2563 }, { "epoch": 0.74, "learning_rate": 0.0007254471458756543, "loss": 3.2817, "step": 2564 }, { "epoch": 0.74, "learning_rate": 0.0007252377337891364, "loss": 3.301, "step": 2565 }, { "epoch": 0.74, "learning_rate": 0.0007250282721222222, "loss": 3.1949, "step": 2566 }, { "epoch": 0.74, "learning_rate": 0.000724818760921019, "loss": 3.2885, "step": 2567 }, { "epoch": 0.74, "learning_rate": 0.0007246092002316456, "loss": 3.2824, "step": 2568 }, { "epoch": 0.74, "learning_rate": 0.0007243995901002312, "loss": 3.3139, "step": 2569 }, { "epoch": 0.74, "learning_rate": 0.0007241899305729162, "loss": 3.1892, "step": 2570 }, { "epoch": 0.74, "learning_rate": 0.0007239802216958522, "loss": 3.3741, "step": 2571 }, { "epoch": 0.74, "learning_rate": 0.0007237704635152011, "loss": 3.3037, "step": 2572 }, { "epoch": 0.75, "learning_rate": 0.0007235606560771359, "loss": 3.3483, "step": 2573 }, { "epoch": 0.75, "learning_rate": 0.0007233507994278403, "loss": 3.2818, "step": 2574 }, { "epoch": 0.75, "learning_rate": 0.0007231408936135091, "loss": 3.1891, "step": 2575 }, { "epoch": 0.75, "learning_rate": 0.0007229309386803476, "loss": 3.3037, "step": 2576 }, { "epoch": 0.75, "learning_rate": 0.0007227209346745723, "loss": 3.3566, "step": 2577 }, { "epoch": 0.75, "learning_rate": 0.00072251088164241, "loss": 3.2835, "step": 2578 }, { "epoch": 0.75, "learning_rate": 0.0007223007796300986, "loss": 3.2505, "step": 2579 }, { "epoch": 0.75, "learning_rate": 0.0007220906286838868, "loss": 3.2368, "step": 2580 }, { "epoch": 0.75, "learning_rate": 0.0007218804288500342, "loss": 3.3388, "step": 2581 }, { "epoch": 0.75, "learning_rate": 0.000721670180174811, "loss": 3.2866, "step": 2582 }, { "epoch": 0.75, "learning_rate": 0.0007214598827044979, "loss": 3.3073, "step": 2583 }, { "epoch": 0.75, "learning_rate": 0.0007212495364853866, "loss": 3.3432, "step": 2584 }, { "epoch": 0.75, "learning_rate": 0.0007210391415637796, "loss": 3.2473, "step": 2585 }, { "epoch": 0.75, "learning_rate": 0.0007208286979859899, "loss": 3.2712, "step": 2586 }, { "epoch": 0.75, "learning_rate": 0.0007206182057983415, "loss": 3.2972, "step": 2587 }, { "epoch": 0.75, "learning_rate": 0.0007204076650471689, "loss": 3.2631, "step": 2588 }, { "epoch": 0.75, "learning_rate": 0.0007201970757788173, "loss": 3.2795, "step": 2589 }, { "epoch": 0.75, "learning_rate": 0.0007199864380396424, "loss": 3.3941, "step": 2590 }, { "epoch": 0.75, "learning_rate": 0.0007197757518760112, "loss": 3.3492, "step": 2591 }, { "epoch": 0.75, "learning_rate": 0.0007195650173343007, "loss": 3.2969, "step": 2592 }, { "epoch": 0.75, "learning_rate": 0.0007193542344608987, "loss": 3.367, "step": 2593 }, { "epoch": 0.75, "learning_rate": 0.0007191434033022038, "loss": 3.2465, "step": 2594 }, { "epoch": 0.75, "learning_rate": 0.0007189325239046253, "loss": 3.2793, "step": 2595 }, { "epoch": 0.75, "learning_rate": 0.0007187215963145827, "loss": 3.3054, "step": 2596 }, { "epoch": 0.75, "learning_rate": 0.0007185106205785067, "loss": 3.3007, "step": 2597 }, { "epoch": 0.75, "learning_rate": 0.0007182995967428379, "loss": 3.3126, "step": 2598 }, { "epoch": 0.75, "learning_rate": 0.0007180885248540283, "loss": 3.2236, "step": 2599 }, { "epoch": 0.75, "learning_rate": 0.0007178774049585397, "loss": 3.0986, "step": 2600 }, { "epoch": 0.75, "learning_rate": 0.000717666237102845, "loss": 3.2269, "step": 2601 }, { "epoch": 0.75, "learning_rate": 0.0007174550213334273, "loss": 3.2539, "step": 2602 }, { "epoch": 0.75, "learning_rate": 0.0007172437576967805, "loss": 3.4594, "step": 2603 }, { "epoch": 0.75, "learning_rate": 0.0007170324462394091, "loss": 3.2076, "step": 2604 }, { "epoch": 0.75, "learning_rate": 0.0007168210870078277, "loss": 3.4187, "step": 2605 }, { "epoch": 0.75, "learning_rate": 0.0007166096800485618, "loss": 3.2836, "step": 2606 }, { "epoch": 0.76, "learning_rate": 0.0007163982254081475, "loss": 3.2759, "step": 2607 }, { "epoch": 0.76, "learning_rate": 0.0007161867231331309, "loss": 3.2833, "step": 2608 }, { "epoch": 0.76, "learning_rate": 0.000715975173270069, "loss": 3.3048, "step": 2609 }, { "epoch": 0.76, "learning_rate": 0.0007157635758655294, "loss": 3.3058, "step": 2610 }, { "epoch": 0.76, "learning_rate": 0.0007155519309660896, "loss": 3.2224, "step": 2611 }, { "epoch": 0.76, "learning_rate": 0.0007153402386183378, "loss": 3.2768, "step": 2612 }, { "epoch": 0.76, "learning_rate": 0.000715128498868873, "loss": 3.3039, "step": 2613 }, { "epoch": 0.76, "learning_rate": 0.0007149167117643043, "loss": 3.3904, "step": 2614 }, { "epoch": 0.76, "learning_rate": 0.0007147048773512513, "loss": 3.2719, "step": 2615 }, { "epoch": 0.76, "learning_rate": 0.0007144929956763437, "loss": 3.4837, "step": 2616 }, { "epoch": 0.76, "learning_rate": 0.0007142810667862223, "loss": 3.1921, "step": 2617 }, { "epoch": 0.76, "learning_rate": 0.0007140690907275376, "loss": 3.3359, "step": 2618 }, { "epoch": 0.76, "learning_rate": 0.0007138570675469511, "loss": 3.1794, "step": 2619 }, { "epoch": 0.76, "learning_rate": 0.0007136449972911339, "loss": 3.3222, "step": 2620 }, { "epoch": 0.76, "learning_rate": 0.0007134328800067684, "loss": 3.3043, "step": 2621 }, { "epoch": 0.76, "learning_rate": 0.0007132207157405462, "loss": 3.3152, "step": 2622 }, { "epoch": 0.76, "learning_rate": 0.0007130085045391706, "loss": 3.3887, "step": 2623 }, { "epoch": 0.76, "learning_rate": 0.0007127962464493542, "loss": 3.2894, "step": 2624 }, { "epoch": 0.76, "learning_rate": 0.0007125839415178203, "loss": 3.1888, "step": 2625 }, { "epoch": 0.76, "learning_rate": 0.0007123715897913025, "loss": 3.3921, "step": 2626 }, { "epoch": 0.76, "learning_rate": 0.0007121591913165446, "loss": 3.3132, "step": 2627 }, { "epoch": 0.76, "learning_rate": 0.0007119467461403009, "loss": 3.2523, "step": 2628 }, { "epoch": 0.76, "learning_rate": 0.0007117342543093357, "loss": 3.4282, "step": 2629 }, { "epoch": 0.76, "learning_rate": 0.0007115217158704237, "loss": 3.3204, "step": 2630 }, { "epoch": 0.76, "learning_rate": 0.0007113091308703497, "loss": 3.2632, "step": 2631 }, { "epoch": 0.76, "learning_rate": 0.0007110964993559094, "loss": 3.2652, "step": 2632 }, { "epoch": 0.76, "learning_rate": 0.0007108838213739078, "loss": 3.239, "step": 2633 }, { "epoch": 0.76, "learning_rate": 0.000710671096971161, "loss": 3.4205, "step": 2634 }, { "epoch": 0.76, "learning_rate": 0.0007104583261944943, "loss": 3.2946, "step": 2635 }, { "epoch": 0.76, "learning_rate": 0.0007102455090907445, "loss": 3.3062, "step": 2636 }, { "epoch": 0.76, "learning_rate": 0.0007100326457067575, "loss": 3.3325, "step": 2637 }, { "epoch": 0.76, "learning_rate": 0.0007098197360893898, "loss": 3.3041, "step": 2638 }, { "epoch": 0.76, "learning_rate": 0.0007096067802855082, "loss": 3.3117, "step": 2639 }, { "epoch": 0.76, "learning_rate": 0.0007093937783419896, "loss": 3.356, "step": 2640 }, { "epoch": 0.76, "learning_rate": 0.0007091807303057207, "loss": 3.1935, "step": 2641 }, { "epoch": 0.77, "learning_rate": 0.0007089676362235991, "loss": 3.1475, "step": 2642 }, { "epoch": 0.77, "learning_rate": 0.0007087544961425316, "loss": 3.2449, "step": 2643 }, { "epoch": 0.77, "learning_rate": 0.0007085413101094358, "loss": 3.3046, "step": 2644 }, { "epoch": 0.77, "learning_rate": 0.0007083280781712394, "loss": 3.3148, "step": 2645 }, { "epoch": 0.77, "learning_rate": 0.0007081148003748799, "loss": 3.275, "step": 2646 }, { "epoch": 0.77, "learning_rate": 0.0007079014767673047, "loss": 3.3395, "step": 2647 }, { "epoch": 0.77, "learning_rate": 0.0007076881073954721, "loss": 3.3111, "step": 2648 }, { "epoch": 0.77, "learning_rate": 0.0007074746923063497, "loss": 3.3318, "step": 2649 }, { "epoch": 0.77, "learning_rate": 0.0007072612315469155, "loss": 3.2956, "step": 2650 }, { "epoch": 0.77, "learning_rate": 0.0007070477251641575, "loss": 3.2945, "step": 2651 }, { "epoch": 0.77, "learning_rate": 0.0007068341732050737, "loss": 3.3266, "step": 2652 }, { "epoch": 0.77, "learning_rate": 0.0007066205757166722, "loss": 3.3074, "step": 2653 }, { "epoch": 0.77, "learning_rate": 0.000706406932745971, "loss": 3.332, "step": 2654 }, { "epoch": 0.77, "learning_rate": 0.0007061932443399985, "loss": 3.3611, "step": 2655 }, { "epoch": 0.77, "learning_rate": 0.0007059795105457925, "loss": 3.1384, "step": 2656 }, { "epoch": 0.77, "learning_rate": 0.0007057657314104011, "loss": 3.1879, "step": 2657 }, { "epoch": 0.77, "learning_rate": 0.0007055519069808827, "loss": 3.2258, "step": 2658 }, { "epoch": 0.77, "learning_rate": 0.000705338037304305, "loss": 3.3975, "step": 2659 }, { "epoch": 0.77, "learning_rate": 0.000705124122427746, "loss": 3.1883, "step": 2660 }, { "epoch": 0.77, "learning_rate": 0.0007049101623982937, "loss": 3.3707, "step": 2661 }, { "epoch": 0.77, "learning_rate": 0.0007046961572630462, "loss": 3.3252, "step": 2662 }, { "epoch": 0.77, "learning_rate": 0.0007044821070691113, "loss": 3.4178, "step": 2663 }, { "epoch": 0.77, "learning_rate": 0.0007042680118636065, "loss": 3.3224, "step": 2664 }, { "epoch": 0.77, "learning_rate": 0.0007040538716936597, "loss": 3.3855, "step": 2665 }, { "epoch": 0.77, "learning_rate": 0.0007038396866064083, "loss": 3.4197, "step": 2666 }, { "epoch": 0.77, "learning_rate": 0.0007036254566489997, "loss": 3.2673, "step": 2667 }, { "epoch": 0.77, "learning_rate": 0.0007034111818685913, "loss": 3.2082, "step": 2668 }, { "epoch": 0.77, "learning_rate": 0.0007031968623123502, "loss": 3.4433, "step": 2669 }, { "epoch": 0.77, "learning_rate": 0.0007029824980274535, "loss": 3.4111, "step": 2670 }, { "epoch": 0.77, "learning_rate": 0.0007027680890610881, "loss": 3.3303, "step": 2671 }, { "epoch": 0.77, "learning_rate": 0.0007025536354604508, "loss": 3.1658, "step": 2672 }, { "epoch": 0.77, "learning_rate": 0.000702339137272748, "loss": 3.2231, "step": 2673 }, { "epoch": 0.77, "learning_rate": 0.0007021245945451961, "loss": 3.2974, "step": 2674 }, { "epoch": 0.77, "learning_rate": 0.0007019100073250212, "loss": 3.2037, "step": 2675 }, { "epoch": 0.77, "learning_rate": 0.0007016953756594594, "loss": 3.3163, "step": 2676 }, { "epoch": 0.78, "learning_rate": 0.0007014806995957562, "loss": 3.2365, "step": 2677 }, { "epoch": 0.78, "learning_rate": 0.0007012659791811674, "loss": 3.3193, "step": 2678 }, { "epoch": 0.78, "learning_rate": 0.000701051214462958, "loss": 3.2385, "step": 2679 }, { "epoch": 0.78, "learning_rate": 0.0007008364054884032, "loss": 3.3654, "step": 2680 }, { "epoch": 0.78, "learning_rate": 0.0007006215523047879, "loss": 3.3026, "step": 2681 }, { "epoch": 0.78, "learning_rate": 0.0007004066549594063, "loss": 3.2484, "step": 2682 }, { "epoch": 0.78, "learning_rate": 0.0007001917134995627, "loss": 3.1977, "step": 2683 }, { "epoch": 0.78, "learning_rate": 0.000699976727972571, "loss": 3.392, "step": 2684 }, { "epoch": 0.78, "learning_rate": 0.0006997616984257552, "loss": 3.2801, "step": 2685 }, { "epoch": 0.78, "learning_rate": 0.0006995466249064482, "loss": 3.3344, "step": 2686 }, { "epoch": 0.78, "learning_rate": 0.0006993315074619931, "loss": 3.3117, "step": 2687 }, { "epoch": 0.78, "learning_rate": 0.0006991163461397425, "loss": 3.1371, "step": 2688 }, { "epoch": 0.78, "learning_rate": 0.0006989011409870591, "loss": 3.3533, "step": 2689 }, { "epoch": 0.78, "learning_rate": 0.0006986858920513145, "loss": 3.2598, "step": 2690 }, { "epoch": 0.78, "learning_rate": 0.0006984705993798904, "loss": 3.3226, "step": 2691 }, { "epoch": 0.78, "learning_rate": 0.0006982552630201782, "loss": 3.2958, "step": 2692 }, { "epoch": 0.78, "learning_rate": 0.0006980398830195785, "loss": 3.2262, "step": 2693 }, { "epoch": 0.78, "learning_rate": 0.0006978244594255019, "loss": 3.1237, "step": 2694 }, { "epoch": 0.78, "learning_rate": 0.0006976089922853685, "loss": 3.332, "step": 2695 }, { "epoch": 0.78, "learning_rate": 0.0006973934816466078, "loss": 3.2218, "step": 2696 }, { "epoch": 0.78, "learning_rate": 0.0006971779275566593, "loss": 3.217, "step": 2697 }, { "epoch": 0.78, "learning_rate": 0.0006969623300629716, "loss": 3.2229, "step": 2698 }, { "epoch": 0.78, "learning_rate": 0.000696746689213003, "loss": 3.2771, "step": 2699 }, { "epoch": 0.78, "learning_rate": 0.0006965310050542215, "loss": 3.2674, "step": 2700 }, { "epoch": 0.78, "learning_rate": 0.0006963152776341043, "loss": 3.3478, "step": 2701 }, { "epoch": 0.78, "learning_rate": 0.0006960995070001387, "loss": 3.3323, "step": 2702 }, { "epoch": 0.78, "learning_rate": 0.0006958836931998209, "loss": 3.282, "step": 2703 }, { "epoch": 0.78, "learning_rate": 0.000695667836280657, "loss": 3.1812, "step": 2704 }, { "epoch": 0.78, "learning_rate": 0.0006954519362901621, "loss": 3.1915, "step": 2705 }, { "epoch": 0.78, "learning_rate": 0.0006952359932758616, "loss": 3.2424, "step": 2706 }, { "epoch": 0.78, "learning_rate": 0.0006950200072852896, "loss": 3.2786, "step": 2707 }, { "epoch": 0.78, "learning_rate": 0.00069480397836599, "loss": 3.3386, "step": 2708 }, { "epoch": 0.78, "learning_rate": 0.0006945879065655164, "loss": 3.3075, "step": 2709 }, { "epoch": 0.78, "learning_rate": 0.0006943717919314311, "loss": 3.2141, "step": 2710 }, { "epoch": 0.79, "learning_rate": 0.0006941556345113066, "loss": 3.2514, "step": 2711 }, { "epoch": 0.79, "learning_rate": 0.0006939394343527243, "loss": 3.2487, "step": 2712 }, { "epoch": 0.79, "learning_rate": 0.0006937231915032751, "loss": 3.2856, "step": 2713 }, { "epoch": 0.79, "learning_rate": 0.0006935069060105599, "loss": 3.1706, "step": 2714 }, { "epoch": 0.79, "learning_rate": 0.000693290577922188, "loss": 3.3809, "step": 2715 }, { "epoch": 0.79, "learning_rate": 0.0006930742072857787, "loss": 3.4066, "step": 2716 }, { "epoch": 0.79, "learning_rate": 0.0006928577941489606, "loss": 3.2659, "step": 2717 }, { "epoch": 0.79, "learning_rate": 0.0006926413385593715, "loss": 3.4152, "step": 2718 }, { "epoch": 0.79, "learning_rate": 0.0006924248405646589, "loss": 3.2901, "step": 2719 }, { "epoch": 0.79, "learning_rate": 0.0006922083002124789, "loss": 3.3123, "step": 2720 }, { "epoch": 0.79, "learning_rate": 0.0006919917175504977, "loss": 3.3301, "step": 2721 }, { "epoch": 0.79, "learning_rate": 0.0006917750926263903, "loss": 3.324, "step": 2722 }, { "epoch": 0.79, "learning_rate": 0.0006915584254878416, "loss": 3.2039, "step": 2723 }, { "epoch": 0.79, "learning_rate": 0.000691341716182545, "loss": 3.309, "step": 2724 }, { "epoch": 0.79, "learning_rate": 0.0006911249647582037, "loss": 3.5544, "step": 2725 }, { "epoch": 0.79, "learning_rate": 0.0006909081712625299, "loss": 3.2108, "step": 2726 }, { "epoch": 0.79, "learning_rate": 0.0006906913357432454, "loss": 3.1977, "step": 2727 }, { "epoch": 0.79, "learning_rate": 0.0006904744582480812, "loss": 3.1041, "step": 2728 }, { "epoch": 0.79, "learning_rate": 0.0006902575388247772, "loss": 3.3276, "step": 2729 }, { "epoch": 0.79, "learning_rate": 0.0006900405775210828, "loss": 3.4057, "step": 2730 }, { "epoch": 0.79, "learning_rate": 0.0006898235743847563, "loss": 3.2728, "step": 2731 }, { "epoch": 0.79, "learning_rate": 0.0006896065294635657, "loss": 3.26, "step": 2732 }, { "epoch": 0.79, "learning_rate": 0.000689389442805288, "loss": 3.2951, "step": 2733 }, { "epoch": 0.79, "learning_rate": 0.0006891723144577094, "loss": 3.2983, "step": 2734 }, { "epoch": 0.79, "learning_rate": 0.0006889551444686249, "loss": 3.195, "step": 2735 }, { "epoch": 0.79, "learning_rate": 0.0006887379328858392, "loss": 3.2447, "step": 2736 }, { "epoch": 0.79, "learning_rate": 0.0006885206797571659, "loss": 3.166, "step": 2737 }, { "epoch": 0.79, "learning_rate": 0.0006883033851304279, "loss": 3.2477, "step": 2738 }, { "epoch": 0.79, "learning_rate": 0.0006880860490534568, "loss": 3.2551, "step": 2739 }, { "epoch": 0.79, "learning_rate": 0.0006878686715740942, "loss": 3.1548, "step": 2740 }, { "epoch": 0.79, "learning_rate": 0.0006876512527401897, "loss": 3.2234, "step": 2741 }, { "epoch": 0.79, "learning_rate": 0.0006874337925996028, "loss": 3.1728, "step": 2742 }, { "epoch": 0.79, "learning_rate": 0.000687216291200202, "loss": 3.1858, "step": 2743 }, { "epoch": 0.79, "learning_rate": 0.0006869987485898643, "loss": 3.2024, "step": 2744 }, { "epoch": 0.79, "learning_rate": 0.0006867811648164768, "loss": 3.2454, "step": 2745 }, { "epoch": 0.8, "learning_rate": 0.0006865635399279347, "loss": 3.2286, "step": 2746 }, { "epoch": 0.8, "learning_rate": 0.0006863458739721428, "loss": 3.2516, "step": 2747 }, { "epoch": 0.8, "learning_rate": 0.0006861281669970143, "loss": 3.294, "step": 2748 }, { "epoch": 0.8, "learning_rate": 0.0006859104190504725, "loss": 3.4409, "step": 2749 }, { "epoch": 0.8, "learning_rate": 0.0006856926301804488, "loss": 3.3722, "step": 2750 }, { "epoch": 0.8, "learning_rate": 0.000685474800434884, "loss": 3.3042, "step": 2751 }, { "epoch": 0.8, "learning_rate": 0.0006852569298617277, "loss": 3.3402, "step": 2752 }, { "epoch": 0.8, "learning_rate": 0.0006850390185089385, "loss": 3.2451, "step": 2753 }, { "epoch": 0.8, "learning_rate": 0.0006848210664244846, "loss": 3.3127, "step": 2754 }, { "epoch": 0.8, "learning_rate": 0.0006846030736563422, "loss": 3.2795, "step": 2755 }, { "epoch": 0.8, "learning_rate": 0.000684385040252497, "loss": 3.2501, "step": 2756 }, { "epoch": 0.8, "learning_rate": 0.0006841669662609437, "loss": 3.2865, "step": 2757 }, { "epoch": 0.8, "learning_rate": 0.0006839488517296855, "loss": 3.3314, "step": 2758 }, { "epoch": 0.8, "learning_rate": 0.0006837306967067349, "loss": 3.2642, "step": 2759 }, { "epoch": 0.8, "learning_rate": 0.0006835125012401133, "loss": 3.3216, "step": 2760 }, { "epoch": 0.8, "learning_rate": 0.0006832942653778509, "loss": 3.1626, "step": 2761 }, { "epoch": 0.8, "learning_rate": 0.0006830759891679866, "loss": 3.173, "step": 2762 }, { "epoch": 0.8, "learning_rate": 0.0006828576726585686, "loss": 3.4248, "step": 2763 }, { "epoch": 0.8, "learning_rate": 0.000682639315897654, "loss": 3.2822, "step": 2764 }, { "epoch": 0.8, "learning_rate": 0.0006824209189333081, "loss": 3.2014, "step": 2765 }, { "epoch": 0.8, "learning_rate": 0.0006822024818136056, "loss": 3.5576, "step": 2766 }, { "epoch": 0.8, "learning_rate": 0.0006819840045866301, "loss": 3.4001, "step": 2767 }, { "epoch": 0.8, "learning_rate": 0.0006817654873004737, "loss": 3.2767, "step": 2768 }, { "epoch": 0.8, "learning_rate": 0.0006815469300032373, "loss": 3.2805, "step": 2769 }, { "epoch": 0.8, "learning_rate": 0.000681328332743031, "loss": 3.3265, "step": 2770 }, { "epoch": 0.8, "learning_rate": 0.0006811096955679734, "loss": 3.4123, "step": 2771 }, { "epoch": 0.8, "learning_rate": 0.0006808910185261918, "loss": 3.2895, "step": 2772 }, { "epoch": 0.8, "learning_rate": 0.0006806723016658228, "loss": 3.3089, "step": 2773 }, { "epoch": 0.8, "learning_rate": 0.0006804535450350111, "loss": 3.1922, "step": 2774 }, { "epoch": 0.8, "learning_rate": 0.0006802347486819104, "loss": 3.2314, "step": 2775 }, { "epoch": 0.8, "learning_rate": 0.0006800159126546833, "loss": 3.273, "step": 2776 }, { "epoch": 0.8, "learning_rate": 0.000679797037001501, "loss": 3.3195, "step": 2777 }, { "epoch": 0.8, "learning_rate": 0.0006795781217705435, "loss": 3.2962, "step": 2778 }, { "epoch": 0.8, "learning_rate": 0.0006793591670099994, "loss": 3.305, "step": 2779 }, { "epoch": 0.81, "learning_rate": 0.0006791401727680662, "loss": 3.3173, "step": 2780 }, { "epoch": 0.81, "learning_rate": 0.0006789211390929496, "loss": 3.2163, "step": 2781 }, { "epoch": 0.81, "learning_rate": 0.0006787020660328646, "loss": 3.3347, "step": 2782 }, { "epoch": 0.81, "learning_rate": 0.0006784829536360347, "loss": 3.1824, "step": 2783 }, { "epoch": 0.81, "learning_rate": 0.0006782638019506917, "loss": 3.252, "step": 2784 }, { "epoch": 0.81, "learning_rate": 0.0006780446110250765, "loss": 3.2716, "step": 2785 }, { "epoch": 0.81, "learning_rate": 0.0006778253809074384, "loss": 3.3218, "step": 2786 }, { "epoch": 0.81, "learning_rate": 0.0006776061116460353, "loss": 3.2824, "step": 2787 }, { "epoch": 0.81, "learning_rate": 0.0006773868032891335, "loss": 3.2143, "step": 2788 }, { "epoch": 0.81, "learning_rate": 0.0006771674558850088, "loss": 3.2272, "step": 2789 }, { "epoch": 0.81, "learning_rate": 0.0006769480694819447, "loss": 3.345, "step": 2790 }, { "epoch": 0.81, "learning_rate": 0.0006767286441282336, "loss": 3.3204, "step": 2791 }, { "epoch": 0.81, "learning_rate": 0.0006765091798721763, "loss": 3.2337, "step": 2792 }, { "epoch": 0.81, "learning_rate": 0.0006762896767620826, "loss": 3.2564, "step": 2793 }, { "epoch": 0.81, "learning_rate": 0.0006760701348462703, "loss": 3.2738, "step": 2794 }, { "epoch": 0.81, "learning_rate": 0.0006758505541730661, "loss": 3.4115, "step": 2795 }, { "epoch": 0.81, "learning_rate": 0.0006756309347908052, "loss": 3.1912, "step": 2796 }, { "epoch": 0.81, "learning_rate": 0.0006754112767478313, "loss": 3.2432, "step": 2797 }, { "epoch": 0.81, "learning_rate": 0.0006751915800924963, "loss": 3.315, "step": 2798 }, { "epoch": 0.81, "learning_rate": 0.0006749718448731611, "loss": 3.2842, "step": 2799 }, { "epoch": 0.81, "learning_rate": 0.0006747520711381948, "loss": 3.1246, "step": 2800 }, { "epoch": 0.81, "learning_rate": 0.000674532258935975, "loss": 3.1839, "step": 2801 }, { "epoch": 0.81, "learning_rate": 0.0006743124083148878, "loss": 3.2736, "step": 2802 }, { "epoch": 0.81, "learning_rate": 0.0006740925193233281, "loss": 3.2739, "step": 2803 }, { "epoch": 0.81, "learning_rate": 0.0006738725920096983, "loss": 3.3954, "step": 2804 }, { "epoch": 0.81, "learning_rate": 0.0006736526264224101, "loss": 3.3439, "step": 2805 }, { "epoch": 0.81, "learning_rate": 0.0006734326226098835, "loss": 3.2505, "step": 2806 }, { "epoch": 0.81, "learning_rate": 0.0006732125806205468, "loss": 3.356, "step": 2807 }, { "epoch": 0.81, "learning_rate": 0.0006729925005028365, "loss": 3.2247, "step": 2808 }, { "epoch": 0.81, "learning_rate": 0.0006727723823051979, "loss": 3.4164, "step": 2809 }, { "epoch": 0.81, "learning_rate": 0.000672552226076084, "loss": 3.1969, "step": 2810 }, { "epoch": 0.81, "learning_rate": 0.0006723320318639572, "loss": 3.2439, "step": 2811 }, { "epoch": 0.81, "learning_rate": 0.0006721117997172874, "loss": 3.2822, "step": 2812 }, { "epoch": 0.81, "learning_rate": 0.0006718915296845531, "loss": 3.2659, "step": 2813 }, { "epoch": 0.81, "learning_rate": 0.0006716712218142413, "loss": 3.1938, "step": 2814 }, { "epoch": 0.82, "learning_rate": 0.0006714508761548472, "loss": 3.3545, "step": 2815 }, { "epoch": 0.82, "learning_rate": 0.0006712304927548742, "loss": 3.2272, "step": 2816 }, { "epoch": 0.82, "learning_rate": 0.0006710100716628344, "loss": 3.2744, "step": 2817 }, { "epoch": 0.82, "learning_rate": 0.0006707896129272477, "loss": 3.2233, "step": 2818 }, { "epoch": 0.82, "learning_rate": 0.0006705691165966426, "loss": 3.4276, "step": 2819 }, { "epoch": 0.82, "learning_rate": 0.0006703485827195557, "loss": 3.241, "step": 2820 }, { "epoch": 0.82, "learning_rate": 0.0006701280113445324, "loss": 3.2323, "step": 2821 }, { "epoch": 0.82, "learning_rate": 0.0006699074025201253, "loss": 3.2669, "step": 2822 }, { "epoch": 0.82, "learning_rate": 0.0006696867562948961, "loss": 3.2367, "step": 2823 }, { "epoch": 0.82, "learning_rate": 0.0006694660727174146, "loss": 3.328, "step": 2824 }, { "epoch": 0.82, "learning_rate": 0.0006692453518362586, "loss": 3.2613, "step": 2825 }, { "epoch": 0.82, "learning_rate": 0.0006690245937000144, "loss": 3.1883, "step": 2826 }, { "epoch": 0.82, "learning_rate": 0.0006688037983572762, "loss": 3.2014, "step": 2827 }, { "epoch": 0.82, "learning_rate": 0.0006685829658566462, "loss": 3.3016, "step": 2828 }, { "epoch": 0.82, "learning_rate": 0.0006683620962467358, "loss": 3.2019, "step": 2829 }, { "epoch": 0.82, "learning_rate": 0.0006681411895761633, "loss": 3.3221, "step": 2830 }, { "epoch": 0.82, "learning_rate": 0.0006679202458935559, "loss": 3.3052, "step": 2831 }, { "epoch": 0.82, "learning_rate": 0.0006676992652475486, "loss": 3.328, "step": 2832 }, { "epoch": 0.82, "learning_rate": 0.000667478247686785, "loss": 3.2886, "step": 2833 }, { "epoch": 0.82, "learning_rate": 0.0006672571932599164, "loss": 3.1662, "step": 2834 }, { "epoch": 0.82, "learning_rate": 0.0006670361020156024, "loss": 3.1904, "step": 2835 }, { "epoch": 0.82, "learning_rate": 0.0006668149740025104, "loss": 3.3312, "step": 2836 }, { "epoch": 0.82, "learning_rate": 0.0006665938092693165, "loss": 3.2363, "step": 2837 }, { "epoch": 0.82, "learning_rate": 0.0006663726078647045, "loss": 3.2995, "step": 2838 }, { "epoch": 0.82, "learning_rate": 0.000666151369837366, "loss": 3.3215, "step": 2839 }, { "epoch": 0.82, "learning_rate": 0.0006659300952360011, "loss": 3.2241, "step": 2840 }, { "epoch": 0.82, "learning_rate": 0.0006657087841093179, "loss": 3.1086, "step": 2841 }, { "epoch": 0.82, "learning_rate": 0.0006654874365060325, "loss": 3.2566, "step": 2842 }, { "epoch": 0.82, "learning_rate": 0.0006652660524748688, "loss": 3.2437, "step": 2843 }, { "epoch": 0.82, "learning_rate": 0.000665044632064559, "loss": 3.2176, "step": 2844 }, { "epoch": 0.82, "learning_rate": 0.000664823175323843, "loss": 3.1715, "step": 2845 }, { "epoch": 0.82, "learning_rate": 0.0006646016823014694, "loss": 3.3201, "step": 2846 }, { "epoch": 0.82, "learning_rate": 0.0006643801530461938, "loss": 3.0813, "step": 2847 }, { "epoch": 0.82, "learning_rate": 0.0006641585876067806, "loss": 3.2653, "step": 2848 }, { "epoch": 0.83, "learning_rate": 0.0006639369860320016, "loss": 3.3378, "step": 2849 }, { "epoch": 0.83, "learning_rate": 0.0006637153483706369, "loss": 3.2339, "step": 2850 }, { "epoch": 0.83, "learning_rate": 0.0006634936746714745, "loss": 3.2191, "step": 2851 }, { "epoch": 0.83, "learning_rate": 0.0006632719649833101, "loss": 3.2473, "step": 2852 }, { "epoch": 0.83, "learning_rate": 0.0006630502193549475, "loss": 3.3005, "step": 2853 }, { "epoch": 0.83, "learning_rate": 0.0006628284378351985, "loss": 3.3315, "step": 2854 }, { "epoch": 0.83, "learning_rate": 0.0006626066204728826, "loss": 3.3383, "step": 2855 }, { "epoch": 0.83, "learning_rate": 0.0006623847673168276, "loss": 3.1044, "step": 2856 }, { "epoch": 0.83, "learning_rate": 0.0006621628784158685, "loss": 3.256, "step": 2857 }, { "epoch": 0.83, "learning_rate": 0.0006619409538188486, "loss": 3.1375, "step": 2858 }, { "epoch": 0.83, "learning_rate": 0.0006617189935746191, "loss": 3.2039, "step": 2859 }, { "epoch": 0.83, "learning_rate": 0.0006614969977320388, "loss": 3.2952, "step": 2860 }, { "epoch": 0.83, "learning_rate": 0.0006612749663399747, "loss": 3.2014, "step": 2861 }, { "epoch": 0.83, "learning_rate": 0.0006610528994473011, "loss": 3.2924, "step": 2862 }, { "epoch": 0.83, "learning_rate": 0.0006608307971029008, "loss": 3.2879, "step": 2863 }, { "epoch": 0.83, "learning_rate": 0.0006606086593556636, "loss": 3.3133, "step": 2864 }, { "epoch": 0.83, "learning_rate": 0.0006603864862544878, "loss": 3.2257, "step": 2865 }, { "epoch": 0.83, "learning_rate": 0.0006601642778482792, "loss": 3.1595, "step": 2866 }, { "epoch": 0.83, "learning_rate": 0.0006599420341859511, "loss": 3.2108, "step": 2867 }, { "epoch": 0.83, "learning_rate": 0.0006597197553164251, "loss": 3.2792, "step": 2868 }, { "epoch": 0.83, "learning_rate": 0.0006594974412886302, "loss": 3.2907, "step": 2869 }, { "epoch": 0.83, "learning_rate": 0.0006592750921515031, "loss": 3.3022, "step": 2870 }, { "epoch": 0.83, "learning_rate": 0.0006590527079539884, "loss": 3.3651, "step": 2871 }, { "epoch": 0.83, "learning_rate": 0.0006588302887450386, "loss": 3.2856, "step": 2872 }, { "epoch": 0.83, "learning_rate": 0.0006586078345736134, "loss": 3.3225, "step": 2873 }, { "epoch": 0.83, "learning_rate": 0.0006583853454886806, "loss": 3.3379, "step": 2874 }, { "epoch": 0.83, "learning_rate": 0.0006581628215392152, "loss": 3.1857, "step": 2875 }, { "epoch": 0.83, "learning_rate": 0.0006579402627742008, "loss": 3.2187, "step": 2876 }, { "epoch": 0.83, "learning_rate": 0.0006577176692426278, "loss": 3.1648, "step": 2877 }, { "epoch": 0.83, "learning_rate": 0.0006574950409934945, "loss": 3.1616, "step": 2878 }, { "epoch": 0.83, "learning_rate": 0.0006572723780758069, "loss": 3.3054, "step": 2879 }, { "epoch": 0.83, "learning_rate": 0.0006570496805385787, "loss": 3.1509, "step": 2880 }, { "epoch": 0.83, "learning_rate": 0.000656826948430831, "loss": 3.2289, "step": 2881 }, { "epoch": 0.83, "learning_rate": 0.0006566041818015927, "loss": 3.0798, "step": 2882 }, { "epoch": 0.83, "learning_rate": 0.0006563813806999005, "loss": 3.2209, "step": 2883 }, { "epoch": 0.84, "learning_rate": 0.000656158545174798, "loss": 3.2085, "step": 2884 }, { "epoch": 0.84, "learning_rate": 0.0006559356752753371, "loss": 3.1369, "step": 2885 }, { "epoch": 0.84, "learning_rate": 0.000655712771050577, "loss": 3.3144, "step": 2886 }, { "epoch": 0.84, "learning_rate": 0.0006554898325495842, "loss": 3.0628, "step": 2887 }, { "epoch": 0.84, "learning_rate": 0.0006552668598214331, "loss": 3.4049, "step": 2888 }, { "epoch": 0.84, "learning_rate": 0.0006550438529152055, "loss": 3.3048, "step": 2889 }, { "epoch": 0.84, "learning_rate": 0.0006548208118799908, "loss": 3.4122, "step": 2890 }, { "epoch": 0.84, "learning_rate": 0.0006545977367648859, "loss": 3.2144, "step": 2891 }, { "epoch": 0.84, "learning_rate": 0.0006543746276189951, "loss": 3.4288, "step": 2892 }, { "epoch": 0.84, "learning_rate": 0.0006541514844914299, "loss": 3.2563, "step": 2893 }, { "epoch": 0.84, "learning_rate": 0.0006539283074313101, "loss": 3.2746, "step": 2894 }, { "epoch": 0.84, "learning_rate": 0.0006537050964877625, "loss": 3.282, "step": 2895 }, { "epoch": 0.84, "learning_rate": 0.0006534818517099209, "loss": 3.1768, "step": 2896 }, { "epoch": 0.84, "learning_rate": 0.0006532585731469274, "loss": 3.2541, "step": 2897 }, { "epoch": 0.84, "learning_rate": 0.000653035260847931, "loss": 3.3542, "step": 2898 }, { "epoch": 0.84, "learning_rate": 0.0006528119148620882, "loss": 3.3448, "step": 2899 }, { "epoch": 0.84, "learning_rate": 0.0006525885352385631, "loss": 3.3586, "step": 2900 }, { "epoch": 0.84, "learning_rate": 0.0006523651220265269, "loss": 3.2899, "step": 2901 }, { "epoch": 0.84, "learning_rate": 0.0006521416752751586, "loss": 3.2887, "step": 2902 }, { "epoch": 0.84, "learning_rate": 0.0006519181950336441, "loss": 3.2911, "step": 2903 }, { "epoch": 0.84, "learning_rate": 0.0006516946813511774, "loss": 3.3432, "step": 2904 }, { "epoch": 0.84, "learning_rate": 0.0006514711342769588, "loss": 3.2196, "step": 2905 }, { "epoch": 0.84, "learning_rate": 0.0006512475538601968, "loss": 3.2621, "step": 2906 }, { "epoch": 0.84, "learning_rate": 0.000651023940150107, "loss": 3.2837, "step": 2907 }, { "epoch": 0.84, "learning_rate": 0.0006508002931959125, "loss": 3.2013, "step": 2908 }, { "epoch": 0.84, "learning_rate": 0.0006505766130468431, "loss": 3.2471, "step": 2909 }, { "epoch": 0.84, "learning_rate": 0.0006503528997521365, "loss": 3.2407, "step": 2910 }, { "epoch": 0.84, "learning_rate": 0.0006501291533610377, "loss": 3.1515, "step": 2911 }, { "epoch": 0.84, "learning_rate": 0.0006499053739227989, "loss": 3.2824, "step": 2912 }, { "epoch": 0.84, "learning_rate": 0.0006496815614866791, "loss": 3.3025, "step": 2913 }, { "epoch": 0.84, "learning_rate": 0.0006494577161019451, "loss": 3.13, "step": 2914 }, { "epoch": 0.84, "learning_rate": 0.0006492338378178708, "loss": 3.2099, "step": 2915 }, { "epoch": 0.84, "learning_rate": 0.0006490099266837376, "loss": 3.3331, "step": 2916 }, { "epoch": 0.84, "learning_rate": 0.0006487859827488336, "loss": 3.1048, "step": 2917 }, { "epoch": 0.85, "learning_rate": 0.0006485620060624545, "loss": 3.2354, "step": 2918 }, { "epoch": 0.85, "learning_rate": 0.0006483379966739028, "loss": 3.4434, "step": 2919 }, { "epoch": 0.85, "learning_rate": 0.0006481139546324891, "loss": 3.2768, "step": 2920 }, { "epoch": 0.85, "learning_rate": 0.0006478898799875302, "loss": 3.2981, "step": 2921 }, { "epoch": 0.85, "learning_rate": 0.0006476657727883506, "loss": 3.2779, "step": 2922 }, { "epoch": 0.85, "learning_rate": 0.0006474416330842818, "loss": 3.3172, "step": 2923 }, { "epoch": 0.85, "learning_rate": 0.0006472174609246625, "loss": 3.3083, "step": 2924 }, { "epoch": 0.85, "learning_rate": 0.0006469932563588386, "loss": 3.3294, "step": 2925 }, { "epoch": 0.85, "learning_rate": 0.0006467690194361628, "loss": 3.3147, "step": 2926 }, { "epoch": 0.85, "learning_rate": 0.0006465447502059954, "loss": 3.2609, "step": 2927 }, { "epoch": 0.85, "learning_rate": 0.0006463204487177037, "loss": 3.1807, "step": 2928 }, { "epoch": 0.85, "learning_rate": 0.0006460961150206619, "loss": 3.1629, "step": 2929 }, { "epoch": 0.85, "learning_rate": 0.0006458717491642514, "loss": 3.298, "step": 2930 }, { "epoch": 0.85, "learning_rate": 0.0006456473511978607, "loss": 3.3014, "step": 2931 }, { "epoch": 0.85, "learning_rate": 0.0006454229211708853, "loss": 3.2526, "step": 2932 }, { "epoch": 0.85, "learning_rate": 0.0006451984591327278, "loss": 3.4198, "step": 2933 }, { "epoch": 0.85, "learning_rate": 0.0006449739651327979, "loss": 3.1244, "step": 2934 }, { "epoch": 0.85, "learning_rate": 0.0006447494392205122, "loss": 3.3049, "step": 2935 }, { "epoch": 0.85, "learning_rate": 0.0006445248814452944, "loss": 3.2626, "step": 2936 }, { "epoch": 0.85, "learning_rate": 0.0006443002918565754, "loss": 3.06, "step": 2937 }, { "epoch": 0.85, "learning_rate": 0.0006440756705037927, "loss": 3.3338, "step": 2938 }, { "epoch": 0.85, "learning_rate": 0.0006438510174363912, "loss": 3.2738, "step": 2939 }, { "epoch": 0.85, "learning_rate": 0.0006436263327038224, "loss": 3.2552, "step": 2940 }, { "epoch": 0.85, "learning_rate": 0.0006434016163555452, "loss": 3.1693, "step": 2941 }, { "epoch": 0.85, "learning_rate": 0.0006431768684410251, "loss": 3.2715, "step": 2942 }, { "epoch": 0.85, "learning_rate": 0.0006429520890097346, "loss": 3.1939, "step": 2943 }, { "epoch": 0.85, "learning_rate": 0.0006427272781111533, "loss": 3.2692, "step": 2944 }, { "epoch": 0.85, "learning_rate": 0.0006425024357947677, "loss": 3.1504, "step": 2945 }, { "epoch": 0.85, "learning_rate": 0.000642277562110071, "loss": 3.3287, "step": 2946 }, { "epoch": 0.85, "learning_rate": 0.0006420526571065636, "loss": 3.1353, "step": 2947 }, { "epoch": 0.85, "learning_rate": 0.0006418277208337526, "loss": 3.1197, "step": 2948 }, { "epoch": 0.85, "learning_rate": 0.000641602753341152, "loss": 3.2429, "step": 2949 }, { "epoch": 0.85, "learning_rate": 0.0006413777546782828, "loss": 3.2656, "step": 2950 }, { "epoch": 0.85, "learning_rate": 0.0006411527248946728, "loss": 3.2815, "step": 2951 }, { "epoch": 0.85, "learning_rate": 0.0006409276640398564, "loss": 3.3028, "step": 2952 }, { "epoch": 0.86, "learning_rate": 0.0006407025721633752, "loss": 3.381, "step": 2953 }, { "epoch": 0.86, "learning_rate": 0.0006404774493147777, "loss": 3.3677, "step": 2954 }, { "epoch": 0.86, "learning_rate": 0.0006402522955436187, "loss": 3.2937, "step": 2955 }, { "epoch": 0.86, "learning_rate": 0.0006400271108994602, "loss": 3.1929, "step": 2956 }, { "epoch": 0.86, "learning_rate": 0.000639801895431871, "loss": 3.1225, "step": 2957 }, { "epoch": 0.86, "learning_rate": 0.0006395766491904262, "loss": 3.3399, "step": 2958 }, { "epoch": 0.86, "learning_rate": 0.0006393513722247089, "loss": 3.2407, "step": 2959 }, { "epoch": 0.86, "learning_rate": 0.0006391260645843074, "loss": 3.2491, "step": 2960 }, { "epoch": 0.86, "learning_rate": 0.0006389007263188176, "loss": 3.2679, "step": 2961 }, { "epoch": 0.86, "learning_rate": 0.0006386753574778421, "loss": 3.2853, "step": 2962 }, { "epoch": 0.86, "learning_rate": 0.0006384499581109903, "loss": 3.2665, "step": 2963 }, { "epoch": 0.86, "learning_rate": 0.0006382245282678781, "loss": 3.2671, "step": 2964 }, { "epoch": 0.86, "learning_rate": 0.0006379990679981281, "loss": 3.2733, "step": 2965 }, { "epoch": 0.86, "learning_rate": 0.0006377735773513697, "loss": 3.1639, "step": 2966 }, { "epoch": 0.86, "learning_rate": 0.000637548056377239, "loss": 3.1967, "step": 2967 }, { "epoch": 0.86, "learning_rate": 0.000637322505125379, "loss": 3.1617, "step": 2968 }, { "epoch": 0.86, "learning_rate": 0.0006370969236454385, "loss": 3.2546, "step": 2969 }, { "epoch": 0.86, "learning_rate": 0.0006368713119870742, "loss": 3.253, "step": 2970 }, { "epoch": 0.86, "learning_rate": 0.0006366456701999483, "loss": 3.2741, "step": 2971 }, { "epoch": 0.86, "learning_rate": 0.0006364199983337305, "loss": 3.3579, "step": 2972 }, { "epoch": 0.86, "learning_rate": 0.0006361942964380967, "loss": 3.2905, "step": 2973 }, { "epoch": 0.86, "learning_rate": 0.0006359685645627294, "loss": 3.2896, "step": 2974 }, { "epoch": 0.86, "learning_rate": 0.0006357428027573174, "loss": 3.2191, "step": 2975 }, { "epoch": 0.86, "learning_rate": 0.0006355170110715571, "loss": 3.258, "step": 2976 }, { "epoch": 0.86, "learning_rate": 0.0006352911895551507, "loss": 3.1467, "step": 2977 }, { "epoch": 0.86, "learning_rate": 0.0006350653382578068, "loss": 3.2894, "step": 2978 }, { "epoch": 0.86, "learning_rate": 0.0006348394572292411, "loss": 3.249, "step": 2979 }, { "epoch": 0.86, "learning_rate": 0.0006346135465191754, "loss": 3.2811, "step": 2980 }, { "epoch": 0.86, "learning_rate": 0.0006343876061773385, "loss": 3.2736, "step": 2981 }, { "epoch": 0.86, "learning_rate": 0.0006341616362534652, "loss": 3.3147, "step": 2982 }, { "epoch": 0.86, "learning_rate": 0.0006339356367972973, "loss": 3.1584, "step": 2983 }, { "epoch": 0.86, "learning_rate": 0.0006337096078585825, "loss": 3.2012, "step": 2984 }, { "epoch": 0.86, "learning_rate": 0.0006334835494870759, "loss": 3.259, "step": 2985 }, { "epoch": 0.86, "learning_rate": 0.0006332574617325381, "loss": 3.1947, "step": 2986 }, { "epoch": 0.87, "learning_rate": 0.0006330313446447368, "loss": 3.2572, "step": 2987 }, { "epoch": 0.87, "learning_rate": 0.000632805198273446, "loss": 3.2683, "step": 2988 }, { "epoch": 0.87, "learning_rate": 0.0006325790226684459, "loss": 3.2501, "step": 2989 }, { "epoch": 0.87, "learning_rate": 0.0006323528178795236, "loss": 3.2299, "step": 2990 }, { "epoch": 0.87, "learning_rate": 0.0006321265839564722, "loss": 3.2231, "step": 2991 }, { "epoch": 0.87, "learning_rate": 0.0006319003209490914, "loss": 3.3582, "step": 2992 }, { "epoch": 0.87, "learning_rate": 0.0006316740289071872, "loss": 3.2329, "step": 2993 }, { "epoch": 0.87, "learning_rate": 0.0006314477078805723, "loss": 3.3179, "step": 2994 }, { "epoch": 0.87, "learning_rate": 0.0006312213579190655, "loss": 3.2708, "step": 2995 }, { "epoch": 0.87, "learning_rate": 0.0006309949790724922, "loss": 3.2385, "step": 2996 }, { "epoch": 0.87, "learning_rate": 0.0006307685713906834, "loss": 3.2586, "step": 2997 }, { "epoch": 0.87, "learning_rate": 0.0006305421349234774, "loss": 3.0786, "step": 2998 }, { "epoch": 0.87, "learning_rate": 0.0006303156697207187, "loss": 3.266, "step": 2999 }, { "epoch": 0.87, "learning_rate": 0.0006300891758322575, "loss": 3.2404, "step": 3000 }, { "epoch": 0.87, "learning_rate": 0.0006298626533079508, "loss": 3.1514, "step": 3001 }, { "epoch": 0.87, "learning_rate": 0.0006296361021976619, "loss": 3.2389, "step": 3002 }, { "epoch": 0.87, "learning_rate": 0.0006294095225512603, "loss": 3.2675, "step": 3003 }, { "epoch": 0.87, "learning_rate": 0.0006291829144186218, "loss": 3.211, "step": 3004 }, { "epoch": 0.87, "learning_rate": 0.0006289562778496285, "loss": 3.3841, "step": 3005 }, { "epoch": 0.87, "learning_rate": 0.0006287296128941684, "loss": 3.3493, "step": 3006 }, { "epoch": 0.87, "learning_rate": 0.0006285029196021365, "loss": 3.2789, "step": 3007 }, { "epoch": 0.87, "learning_rate": 0.0006282761980234332, "loss": 3.2232, "step": 3008 }, { "epoch": 0.87, "learning_rate": 0.000628049448207966, "loss": 3.2507, "step": 3009 }, { "epoch": 0.87, "learning_rate": 0.0006278226702056476, "loss": 3.3205, "step": 3010 }, { "epoch": 0.87, "learning_rate": 0.0006275958640663976, "loss": 3.3396, "step": 3011 }, { "epoch": 0.87, "learning_rate": 0.000627369029840142, "loss": 3.2751, "step": 3012 }, { "epoch": 0.87, "learning_rate": 0.0006271421675768123, "loss": 3.3025, "step": 3013 }, { "epoch": 0.87, "learning_rate": 0.0006269152773263467, "loss": 3.3544, "step": 3014 }, { "epoch": 0.87, "learning_rate": 0.0006266883591386891, "loss": 3.2833, "step": 3015 }, { "epoch": 0.87, "learning_rate": 0.0006264614130637899, "loss": 3.2594, "step": 3016 }, { "epoch": 0.87, "learning_rate": 0.0006262344391516056, "loss": 3.234, "step": 3017 }, { "epoch": 0.87, "learning_rate": 0.0006260074374520988, "loss": 3.189, "step": 3018 }, { "epoch": 0.87, "learning_rate": 0.000625780408015238, "loss": 3.3295, "step": 3019 }, { "epoch": 0.87, "learning_rate": 0.0006255533508909982, "loss": 3.2597, "step": 3020 }, { "epoch": 0.87, "learning_rate": 0.0006253262661293602, "loss": 3.3978, "step": 3021 }, { "epoch": 0.88, "learning_rate": 0.0006250991537803111, "loss": 3.2721, "step": 3022 }, { "epoch": 0.88, "learning_rate": 0.0006248720138938438, "loss": 3.2833, "step": 3023 }, { "epoch": 0.88, "learning_rate": 0.0006246448465199576, "loss": 3.2559, "step": 3024 }, { "epoch": 0.88, "learning_rate": 0.0006244176517086572, "loss": 3.2385, "step": 3025 }, { "epoch": 0.88, "learning_rate": 0.0006241904295099545, "loss": 3.3482, "step": 3026 }, { "epoch": 0.88, "learning_rate": 0.0006239631799738658, "loss": 3.1604, "step": 3027 }, { "epoch": 0.88, "learning_rate": 0.0006237359031504153, "loss": 3.1872, "step": 3028 }, { "epoch": 0.88, "learning_rate": 0.0006235085990896317, "loss": 3.22, "step": 3029 }, { "epoch": 0.88, "learning_rate": 0.0006232812678415504, "loss": 3.2486, "step": 3030 }, { "epoch": 0.88, "learning_rate": 0.0006230539094562125, "loss": 3.2382, "step": 3031 }, { "epoch": 0.88, "learning_rate": 0.0006228265239836653, "loss": 3.3039, "step": 3032 }, { "epoch": 0.88, "learning_rate": 0.0006225991114739622, "loss": 3.2392, "step": 3033 }, { "epoch": 0.88, "learning_rate": 0.0006223716719771619, "loss": 3.3457, "step": 3034 }, { "epoch": 0.88, "learning_rate": 0.0006221442055433297, "loss": 3.3264, "step": 3035 }, { "epoch": 0.88, "learning_rate": 0.0006219167122225365, "loss": 3.283, "step": 3036 }, { "epoch": 0.88, "learning_rate": 0.0006216891920648593, "loss": 3.2296, "step": 3037 }, { "epoch": 0.88, "learning_rate": 0.000621461645120381, "loss": 3.3089, "step": 3038 }, { "epoch": 0.88, "learning_rate": 0.00062123407143919, "loss": 3.1844, "step": 3039 }, { "epoch": 0.88, "learning_rate": 0.0006210064710713811, "loss": 3.174, "step": 3040 }, { "epoch": 0.88, "learning_rate": 0.0006207788440670546, "loss": 3.2376, "step": 3041 }, { "epoch": 0.88, "learning_rate": 0.0006205511904763172, "loss": 3.1327, "step": 3042 }, { "epoch": 0.88, "learning_rate": 0.000620323510349281, "loss": 3.2842, "step": 3043 }, { "epoch": 0.88, "learning_rate": 0.0006200958037360636, "loss": 3.3093, "step": 3044 }, { "epoch": 0.88, "learning_rate": 0.0006198680706867892, "loss": 3.2023, "step": 3045 }, { "epoch": 0.88, "learning_rate": 0.0006196403112515874, "loss": 3.351, "step": 3046 }, { "epoch": 0.88, "learning_rate": 0.0006194125254805938, "loss": 3.2677, "step": 3047 }, { "epoch": 0.88, "learning_rate": 0.0006191847134239495, "loss": 3.2655, "step": 3048 }, { "epoch": 0.88, "learning_rate": 0.0006189568751318018, "loss": 3.2504, "step": 3049 }, { "epoch": 0.88, "learning_rate": 0.0006187290106543032, "loss": 3.2788, "step": 3050 }, { "epoch": 0.88, "learning_rate": 0.0006185011200416128, "loss": 3.1929, "step": 3051 }, { "epoch": 0.88, "learning_rate": 0.0006182732033438945, "loss": 3.3505, "step": 3052 }, { "epoch": 0.88, "learning_rate": 0.0006180452606113185, "loss": 3.3154, "step": 3053 }, { "epoch": 0.88, "learning_rate": 0.0006178172918940608, "loss": 3.2751, "step": 3054 }, { "epoch": 0.88, "learning_rate": 0.0006175892972423029, "loss": 3.1969, "step": 3055 }, { "epoch": 0.89, "learning_rate": 0.000617361276706232, "loss": 3.4048, "step": 3056 }, { "epoch": 0.89, "learning_rate": 0.0006171332303360411, "loss": 3.2562, "step": 3057 }, { "epoch": 0.89, "learning_rate": 0.0006169051581819288, "loss": 3.3747, "step": 3058 }, { "epoch": 0.89, "learning_rate": 0.0006166770602940994, "loss": 3.1618, "step": 3059 }, { "epoch": 0.89, "learning_rate": 0.0006164489367227633, "loss": 3.2322, "step": 3060 }, { "epoch": 0.89, "learning_rate": 0.0006162207875181353, "loss": 3.3375, "step": 3061 }, { "epoch": 0.89, "learning_rate": 0.0006159926127304373, "loss": 3.2261, "step": 3062 }, { "epoch": 0.89, "learning_rate": 0.0006157644124098963, "loss": 3.2276, "step": 3063 }, { "epoch": 0.89, "learning_rate": 0.0006155361866067445, "loss": 3.1939, "step": 3064 }, { "epoch": 0.89, "learning_rate": 0.0006153079353712201, "loss": 3.2969, "step": 3065 }, { "epoch": 0.89, "learning_rate": 0.0006150796587535669, "loss": 3.2332, "step": 3066 }, { "epoch": 0.89, "learning_rate": 0.0006148513568040344, "loss": 3.3344, "step": 3067 }, { "epoch": 0.89, "learning_rate": 0.0006146230295728771, "loss": 3.2634, "step": 3068 }, { "epoch": 0.89, "learning_rate": 0.000614394677110356, "loss": 3.2296, "step": 3069 }, { "epoch": 0.89, "learning_rate": 0.0006141662994667368, "loss": 3.1233, "step": 3070 }, { "epoch": 0.89, "learning_rate": 0.0006139378966922908, "loss": 3.2563, "step": 3071 }, { "epoch": 0.89, "learning_rate": 0.0006137094688372956, "loss": 3.3209, "step": 3072 }, { "epoch": 0.89, "learning_rate": 0.0006134810159520337, "loss": 3.1583, "step": 3073 }, { "epoch": 0.89, "learning_rate": 0.000613252538086793, "loss": 3.2603, "step": 3074 }, { "epoch": 0.89, "learning_rate": 0.0006130240352918674, "loss": 3.2493, "step": 3075 }, { "epoch": 0.89, "learning_rate": 0.0006127955076175559, "loss": 3.1237, "step": 3076 }, { "epoch": 0.89, "learning_rate": 0.000612566955114163, "loss": 3.3243, "step": 3077 }, { "epoch": 0.89, "learning_rate": 0.0006123383778319991, "loss": 3.2638, "step": 3078 }, { "epoch": 0.89, "learning_rate": 0.0006121097758213793, "loss": 3.2308, "step": 3079 }, { "epoch": 0.89, "learning_rate": 0.0006118811491326249, "loss": 3.2996, "step": 3080 }, { "epoch": 0.89, "learning_rate": 0.0006116524978160619, "loss": 3.3955, "step": 3081 }, { "epoch": 0.89, "learning_rate": 0.0006114238219220223, "loss": 3.2582, "step": 3082 }, { "epoch": 0.89, "learning_rate": 0.0006111951215008437, "loss": 3.263, "step": 3083 }, { "epoch": 0.89, "learning_rate": 0.0006109663966028681, "loss": 3.1795, "step": 3084 }, { "epoch": 0.89, "learning_rate": 0.0006107376472784437, "loss": 3.2425, "step": 3085 }, { "epoch": 0.89, "learning_rate": 0.000610508873577924, "loss": 3.1689, "step": 3086 }, { "epoch": 0.89, "learning_rate": 0.0006102800755516679, "loss": 3.3359, "step": 3087 }, { "epoch": 0.89, "learning_rate": 0.0006100512532500393, "loss": 3.4719, "step": 3088 }, { "epoch": 0.89, "learning_rate": 0.0006098224067234077, "loss": 3.198, "step": 3089 }, { "epoch": 0.89, "learning_rate": 0.0006095935360221476, "loss": 3.2933, "step": 3090 }, { "epoch": 0.9, "learning_rate": 0.0006093646411966396, "loss": 3.2318, "step": 3091 }, { "epoch": 0.9, "learning_rate": 0.0006091357222972687, "loss": 3.217, "step": 3092 }, { "epoch": 0.9, "learning_rate": 0.0006089067793744258, "loss": 3.2589, "step": 3093 }, { "epoch": 0.9, "learning_rate": 0.0006086778124785068, "loss": 3.3251, "step": 3094 }, { "epoch": 0.9, "learning_rate": 0.0006084488216599133, "loss": 3.1981, "step": 3095 }, { "epoch": 0.9, "learning_rate": 0.0006082198069690515, "loss": 3.2685, "step": 3096 }, { "epoch": 0.9, "learning_rate": 0.0006079907684563333, "loss": 3.393, "step": 3097 }, { "epoch": 0.9, "learning_rate": 0.0006077617061721756, "loss": 3.2113, "step": 3098 }, { "epoch": 0.9, "learning_rate": 0.000607532620167001, "loss": 3.1016, "step": 3099 }, { "epoch": 0.9, "learning_rate": 0.0006073035104912367, "loss": 3.2169, "step": 3100 }, { "epoch": 0.9, "learning_rate": 0.0006070743771953157, "loss": 3.2454, "step": 3101 }, { "epoch": 0.9, "learning_rate": 0.0006068452203296754, "loss": 3.2152, "step": 3102 }, { "epoch": 0.9, "learning_rate": 0.0006066160399447594, "loss": 3.2339, "step": 3103 }, { "epoch": 0.9, "learning_rate": 0.0006063868360910159, "loss": 3.227, "step": 3104 }, { "epoch": 0.9, "learning_rate": 0.000606157608818898, "loss": 3.1516, "step": 3105 }, { "epoch": 0.9, "learning_rate": 0.0006059283581788648, "loss": 3.119, "step": 3106 }, { "epoch": 0.9, "learning_rate": 0.0006056990842213796, "loss": 3.2774, "step": 3107 }, { "epoch": 0.9, "learning_rate": 0.0006054697869969114, "loss": 3.3809, "step": 3108 }, { "epoch": 0.9, "learning_rate": 0.0006052404665559341, "loss": 3.297, "step": 3109 }, { "epoch": 0.9, "learning_rate": 0.000605011122948927, "loss": 3.2112, "step": 3110 }, { "epoch": 0.9, "learning_rate": 0.0006047817562263744, "loss": 3.2805, "step": 3111 }, { "epoch": 0.9, "learning_rate": 0.0006045523664387652, "loss": 3.3048, "step": 3112 }, { "epoch": 0.9, "learning_rate": 0.0006043229536365941, "loss": 3.3136, "step": 3113 }, { "epoch": 0.9, "learning_rate": 0.0006040935178703604, "loss": 3.1619, "step": 3114 }, { "epoch": 0.9, "learning_rate": 0.0006038640591905685, "loss": 3.2138, "step": 3115 }, { "epoch": 0.9, "learning_rate": 0.0006036345776477281, "loss": 3.0883, "step": 3116 }, { "epoch": 0.9, "learning_rate": 0.0006034050732923537, "loss": 3.3112, "step": 3117 }, { "epoch": 0.9, "learning_rate": 0.0006031755461749648, "loss": 3.2973, "step": 3118 }, { "epoch": 0.9, "learning_rate": 0.0006029459963460861, "loss": 3.2762, "step": 3119 }, { "epoch": 0.9, "learning_rate": 0.0006027164238562472, "loss": 3.2457, "step": 3120 }, { "epoch": 0.9, "learning_rate": 0.0006024868287559826, "loss": 3.36, "step": 3121 }, { "epoch": 0.9, "learning_rate": 0.0006022572110958319, "loss": 3.2776, "step": 3122 }, { "epoch": 0.9, "learning_rate": 0.0006020275709263397, "loss": 3.1317, "step": 3123 }, { "epoch": 0.9, "learning_rate": 0.0006017979082980553, "loss": 3.3039, "step": 3124 }, { "epoch": 0.91, "learning_rate": 0.0006015682232615336, "loss": 3.3179, "step": 3125 }, { "epoch": 0.91, "learning_rate": 0.0006013385158673333, "loss": 3.2328, "step": 3126 }, { "epoch": 0.91, "learning_rate": 0.000601108786166019, "loss": 3.2392, "step": 3127 }, { "epoch": 0.91, "learning_rate": 0.00060087903420816, "loss": 3.2182, "step": 3128 }, { "epoch": 0.91, "learning_rate": 0.0006006492600443301, "loss": 3.2096, "step": 3129 }, { "epoch": 0.91, "learning_rate": 0.0006004194637251085, "loss": 3.2318, "step": 3130 }, { "epoch": 0.91, "learning_rate": 0.000600189645301079, "loss": 3.3465, "step": 3131 }, { "epoch": 0.91, "learning_rate": 0.0005999598048228302, "loss": 3.3014, "step": 3132 }, { "epoch": 0.91, "learning_rate": 0.0005997299423409559, "loss": 3.0876, "step": 3133 }, { "epoch": 0.91, "learning_rate": 0.0005995000579060545, "loss": 3.2957, "step": 3134 }, { "epoch": 0.91, "learning_rate": 0.0005992701515687291, "loss": 3.2702, "step": 3135 }, { "epoch": 0.91, "learning_rate": 0.0005990402233795877, "loss": 3.3656, "step": 3136 }, { "epoch": 0.91, "learning_rate": 0.0005988102733892434, "loss": 3.1968, "step": 3137 }, { "epoch": 0.91, "learning_rate": 0.0005985803016483138, "loss": 3.2172, "step": 3138 }, { "epoch": 0.91, "learning_rate": 0.0005983503082074213, "loss": 3.3685, "step": 3139 }, { "epoch": 0.91, "learning_rate": 0.0005981202931171933, "loss": 3.2492, "step": 3140 }, { "epoch": 0.91, "learning_rate": 0.0005978902564282616, "loss": 3.3206, "step": 3141 }, { "epoch": 0.91, "learning_rate": 0.0005976601981912632, "loss": 3.2116, "step": 3142 }, { "epoch": 0.91, "learning_rate": 0.0005974301184568395, "loss": 3.2235, "step": 3143 }, { "epoch": 0.91, "learning_rate": 0.0005972000172756366, "loss": 3.2545, "step": 3144 }, { "epoch": 0.91, "learning_rate": 0.0005969698946983055, "loss": 3.2059, "step": 3145 }, { "epoch": 0.91, "learning_rate": 0.0005967397507755022, "loss": 3.3206, "step": 3146 }, { "epoch": 0.91, "learning_rate": 0.0005965095855578869, "loss": 3.1173, "step": 3147 }, { "epoch": 0.91, "learning_rate": 0.0005962793990961243, "loss": 3.2464, "step": 3148 }, { "epoch": 0.91, "learning_rate": 0.0005960491914408845, "loss": 3.1963, "step": 3149 }, { "epoch": 0.91, "learning_rate": 0.0005958189626428419, "loss": 3.2827, "step": 3150 }, { "epoch": 0.91, "learning_rate": 0.0005955887127526754, "loss": 3.2277, "step": 3151 }, { "epoch": 0.91, "learning_rate": 0.0005953584418210689, "loss": 3.2113, "step": 3152 }, { "epoch": 0.91, "learning_rate": 0.0005951281498987105, "loss": 3.2404, "step": 3153 }, { "epoch": 0.91, "learning_rate": 0.0005948978370362933, "loss": 3.1788, "step": 3154 }, { "epoch": 0.91, "learning_rate": 0.0005946675032845148, "loss": 3.2937, "step": 3155 }, { "epoch": 0.91, "learning_rate": 0.0005944371486940772, "loss": 3.2668, "step": 3156 }, { "epoch": 0.91, "learning_rate": 0.0005942067733156871, "loss": 3.2224, "step": 3157 }, { "epoch": 0.91, "learning_rate": 0.0005939763772000559, "loss": 3.2991, "step": 3158 }, { "epoch": 0.91, "learning_rate": 0.0005937459603978997, "loss": 3.3603, "step": 3159 }, { "epoch": 0.92, "learning_rate": 0.0005935155229599386, "loss": 3.2502, "step": 3160 }, { "epoch": 0.92, "learning_rate": 0.0005932850649368978, "loss": 3.2464, "step": 3161 }, { "epoch": 0.92, "learning_rate": 0.0005930545863795067, "loss": 3.188, "step": 3162 }, { "epoch": 0.92, "learning_rate": 0.0005928240873384993, "loss": 3.1663, "step": 3163 }, { "epoch": 0.92, "learning_rate": 0.0005925935678646144, "loss": 3.2398, "step": 3164 }, { "epoch": 0.92, "learning_rate": 0.0005923630280085947, "loss": 3.2748, "step": 3165 }, { "epoch": 0.92, "learning_rate": 0.0005921324678211879, "loss": 3.1523, "step": 3166 }, { "epoch": 0.92, "learning_rate": 0.000591901887353146, "loss": 3.2155, "step": 3167 }, { "epoch": 0.92, "learning_rate": 0.0005916712866552254, "loss": 3.3291, "step": 3168 }, { "epoch": 0.92, "learning_rate": 0.0005914406657781871, "loss": 3.2218, "step": 3169 }, { "epoch": 0.92, "learning_rate": 0.0005912100247727964, "loss": 3.2184, "step": 3170 }, { "epoch": 0.92, "learning_rate": 0.0005909793636898231, "loss": 3.3097, "step": 3171 }, { "epoch": 0.92, "learning_rate": 0.0005907486825800414, "loss": 3.267, "step": 3172 }, { "epoch": 0.92, "learning_rate": 0.0005905179814942301, "loss": 3.2012, "step": 3173 }, { "epoch": 0.92, "learning_rate": 0.0005902872604831719, "loss": 3.315, "step": 3174 }, { "epoch": 0.92, "learning_rate": 0.0005900565195976543, "loss": 3.2357, "step": 3175 }, { "epoch": 0.92, "learning_rate": 0.0005898257588884691, "loss": 3.3202, "step": 3176 }, { "epoch": 0.92, "learning_rate": 0.0005895949784064127, "loss": 3.3149, "step": 3177 }, { "epoch": 0.92, "learning_rate": 0.0005893641782022852, "loss": 3.2979, "step": 3178 }, { "epoch": 0.92, "learning_rate": 0.0005891333583268916, "loss": 3.3004, "step": 3179 }, { "epoch": 0.92, "learning_rate": 0.000588902518831041, "loss": 3.1125, "step": 3180 }, { "epoch": 0.92, "learning_rate": 0.0005886716597655471, "loss": 3.3378, "step": 3181 }, { "epoch": 0.92, "learning_rate": 0.0005884407811812275, "loss": 3.3147, "step": 3182 }, { "epoch": 0.92, "learning_rate": 0.0005882098831289043, "loss": 3.2988, "step": 3183 }, { "epoch": 0.92, "learning_rate": 0.0005879789656594037, "loss": 3.2934, "step": 3184 }, { "epoch": 0.92, "learning_rate": 0.0005877480288235568, "loss": 3.4488, "step": 3185 }, { "epoch": 0.92, "learning_rate": 0.0005875170726721982, "loss": 3.1786, "step": 3186 }, { "epoch": 0.92, "learning_rate": 0.0005872860972561671, "loss": 3.2047, "step": 3187 }, { "epoch": 0.92, "learning_rate": 0.0005870551026263069, "loss": 3.1891, "step": 3188 }, { "epoch": 0.92, "learning_rate": 0.0005868240888334653, "loss": 3.218, "step": 3189 }, { "epoch": 0.92, "learning_rate": 0.000586593055928494, "loss": 3.2997, "step": 3190 }, { "epoch": 0.92, "learning_rate": 0.0005863620039622491, "loss": 3.0456, "step": 3191 }, { "epoch": 0.92, "learning_rate": 0.000586130932985591, "loss": 3.2112, "step": 3192 }, { "epoch": 0.92, "learning_rate": 0.0005858998430493841, "loss": 3.242, "step": 3193 }, { "epoch": 0.93, "learning_rate": 0.0005856687342044968, "loss": 3.2181, "step": 3194 }, { "epoch": 0.93, "learning_rate": 0.0005854376065018021, "loss": 3.2327, "step": 3195 }, { "epoch": 0.93, "learning_rate": 0.000585206459992177, "loss": 3.2053, "step": 3196 }, { "epoch": 0.93, "learning_rate": 0.0005849752947265023, "loss": 3.3143, "step": 3197 }, { "epoch": 0.93, "learning_rate": 0.0005847441107556632, "loss": 3.2194, "step": 3198 }, { "epoch": 0.93, "learning_rate": 0.0005845129081305492, "loss": 3.1319, "step": 3199 }, { "epoch": 0.93, "learning_rate": 0.0005842816869020538, "loss": 3.2735, "step": 3200 }, { "epoch": 0.93, "learning_rate": 0.0005840504471210741, "loss": 3.2891, "step": 3201 }, { "epoch": 0.93, "learning_rate": 0.0005838191888385121, "loss": 3.2134, "step": 3202 }, { "epoch": 0.93, "learning_rate": 0.0005835879121052731, "loss": 3.2739, "step": 3203 }, { "epoch": 0.93, "learning_rate": 0.0005833566169722674, "loss": 3.1864, "step": 3204 }, { "epoch": 0.93, "learning_rate": 0.0005831253034904082, "loss": 3.303, "step": 3205 }, { "epoch": 0.93, "learning_rate": 0.0005828939717106137, "loss": 3.2396, "step": 3206 }, { "epoch": 0.93, "learning_rate": 0.0005826626216838053, "loss": 3.1644, "step": 3207 }, { "epoch": 0.93, "learning_rate": 0.0005824312534609094, "loss": 3.4487, "step": 3208 }, { "epoch": 0.93, "learning_rate": 0.0005821998670928556, "loss": 3.2508, "step": 3209 }, { "epoch": 0.93, "learning_rate": 0.0005819684626305776, "loss": 3.2202, "step": 3210 }, { "epoch": 0.93, "learning_rate": 0.0005817370401250136, "loss": 3.2755, "step": 3211 }, { "epoch": 0.93, "learning_rate": 0.0005815055996271052, "loss": 3.1585, "step": 3212 }, { "epoch": 0.93, "learning_rate": 0.0005812741411877982, "loss": 3.3328, "step": 3213 }, { "epoch": 0.93, "learning_rate": 0.0005810426648580425, "loss": 3.1315, "step": 3214 }, { "epoch": 0.93, "learning_rate": 0.0005808111706887913, "loss": 3.316, "step": 3215 }, { "epoch": 0.93, "learning_rate": 0.0005805796587310028, "loss": 3.144, "step": 3216 }, { "epoch": 0.93, "learning_rate": 0.0005803481290356382, "loss": 3.2485, "step": 3217 }, { "epoch": 0.93, "learning_rate": 0.0005801165816536628, "loss": 3.3051, "step": 3218 }, { "epoch": 0.93, "learning_rate": 0.0005798850166360461, "loss": 3.2119, "step": 3219 }, { "epoch": 0.93, "learning_rate": 0.0005796534340337614, "loss": 3.1534, "step": 3220 }, { "epoch": 0.93, "learning_rate": 0.0005794218338977853, "loss": 3.3262, "step": 3221 }, { "epoch": 0.93, "learning_rate": 0.0005791902162790992, "loss": 3.2959, "step": 3222 }, { "epoch": 0.93, "learning_rate": 0.0005789585812286877, "loss": 3.2719, "step": 3223 }, { "epoch": 0.93, "learning_rate": 0.0005787269287975392, "loss": 3.2357, "step": 3224 }, { "epoch": 0.93, "learning_rate": 0.0005784952590366464, "loss": 3.235, "step": 3225 }, { "epoch": 0.93, "learning_rate": 0.0005782635719970055, "loss": 3.1888, "step": 3226 }, { "epoch": 0.93, "learning_rate": 0.0005780318677296166, "loss": 3.2985, "step": 3227 }, { "epoch": 0.93, "learning_rate": 0.0005778001462854831, "loss": 3.3284, "step": 3228 }, { "epoch": 0.94, "learning_rate": 0.0005775684077156132, "loss": 3.1872, "step": 3229 }, { "epoch": 0.94, "learning_rate": 0.0005773366520710181, "loss": 3.2582, "step": 3230 }, { "epoch": 0.94, "learning_rate": 0.0005771048794027128, "loss": 3.2592, "step": 3231 }, { "epoch": 0.94, "learning_rate": 0.0005768730897617164, "loss": 3.2764, "step": 3232 }, { "epoch": 0.94, "learning_rate": 0.0005766412831990513, "loss": 3.218, "step": 3233 }, { "epoch": 0.94, "learning_rate": 0.000576409459765744, "loss": 3.3444, "step": 3234 }, { "epoch": 0.94, "learning_rate": 0.0005761776195128248, "loss": 3.2446, "step": 3235 }, { "epoch": 0.94, "learning_rate": 0.0005759457624913269, "loss": 3.1785, "step": 3236 }, { "epoch": 0.94, "learning_rate": 0.0005757138887522884, "loss": 3.2727, "step": 3237 }, { "epoch": 0.94, "learning_rate": 0.00057548199834675, "loss": 3.301, "step": 3238 }, { "epoch": 0.94, "learning_rate": 0.0005752500913257568, "loss": 3.2736, "step": 3239 }, { "epoch": 0.94, "learning_rate": 0.0005750181677403569, "loss": 3.2852, "step": 3240 }, { "epoch": 0.94, "learning_rate": 0.0005747862276416028, "loss": 3.1935, "step": 3241 }, { "epoch": 0.94, "learning_rate": 0.0005745542710805502, "loss": 3.2986, "step": 3242 }, { "epoch": 0.94, "learning_rate": 0.0005743222981082582, "loss": 3.228, "step": 3243 }, { "epoch": 0.94, "learning_rate": 0.0005740903087757902, "loss": 3.2497, "step": 3244 }, { "epoch": 0.94, "learning_rate": 0.0005738583031342123, "loss": 3.3076, "step": 3245 }, { "epoch": 0.94, "learning_rate": 0.0005736262812345951, "loss": 3.3299, "step": 3246 }, { "epoch": 0.94, "learning_rate": 0.0005733942431280123, "loss": 3.2272, "step": 3247 }, { "epoch": 0.94, "learning_rate": 0.0005731621888655409, "loss": 3.2522, "step": 3248 }, { "epoch": 0.94, "learning_rate": 0.0005729301184982621, "loss": 3.2882, "step": 3249 }, { "epoch": 0.94, "learning_rate": 0.0005726980320772602, "loss": 3.1968, "step": 3250 }, { "epoch": 0.94, "learning_rate": 0.0005724659296536234, "loss": 3.2209, "step": 3251 }, { "epoch": 0.94, "learning_rate": 0.0005722338112784429, "loss": 3.1716, "step": 3252 }, { "epoch": 0.94, "learning_rate": 0.0005720016770028138, "loss": 3.2655, "step": 3253 }, { "epoch": 0.94, "learning_rate": 0.0005717695268778344, "loss": 3.1404, "step": 3254 }, { "epoch": 0.94, "learning_rate": 0.000571537360954607, "loss": 3.1403, "step": 3255 }, { "epoch": 0.94, "learning_rate": 0.0005713051792842371, "loss": 3.2154, "step": 3256 }, { "epoch": 0.94, "learning_rate": 0.0005710729819178333, "loss": 3.318, "step": 3257 }, { "epoch": 0.94, "learning_rate": 0.0005710729819178333, "loss": 3.2924, "step": 3258 }, { "epoch": 0.94, "learning_rate": 0.0005708407689065082, "loss": 3.2685, "step": 3259 }, { "epoch": 0.94, "learning_rate": 0.0005706085403013773, "loss": 3.2382, "step": 3260 }, { "epoch": 0.94, "learning_rate": 0.0005703762961535604, "loss": 3.3208, "step": 3261 }, { "epoch": 0.94, "learning_rate": 0.0005701440365141799, "loss": 3.0944, "step": 3262 }, { "epoch": 0.94, "learning_rate": 0.0005699117614343618, "loss": 3.175, "step": 3263 }, { "epoch": 0.95, "learning_rate": 0.0005696794709652358, "loss": 3.1948, "step": 3264 }, { "epoch": 0.95, "learning_rate": 0.0005694471651579346, "loss": 3.1416, "step": 3265 }, { "epoch": 0.95, "learning_rate": 0.0005692148440635946, "loss": 3.1449, "step": 3266 }, { "epoch": 0.95, "learning_rate": 0.0005689825077333552, "loss": 3.3023, "step": 3267 }, { "epoch": 0.95, "learning_rate": 0.0005687501562183596, "loss": 3.358, "step": 3268 }, { "epoch": 0.95, "learning_rate": 0.0005685177895697541, "loss": 3.2512, "step": 3269 }, { "epoch": 0.95, "learning_rate": 0.0005682854078386882, "loss": 3.2648, "step": 3270 }, { "epoch": 0.95, "learning_rate": 0.000568053011076315, "loss": 3.0966, "step": 3271 }, { "epoch": 0.95, "learning_rate": 0.0005678205993337907, "loss": 3.2642, "step": 3272 }, { "epoch": 0.95, "learning_rate": 0.0005675881726622749, "loss": 3.2958, "step": 3273 }, { "epoch": 0.95, "learning_rate": 0.0005673557311129306, "loss": 3.2105, "step": 3274 }, { "epoch": 0.95, "learning_rate": 0.0005671232747369236, "loss": 3.299, "step": 3275 }, { "epoch": 0.95, "learning_rate": 0.0005668908035854236, "loss": 3.2727, "step": 3276 }, { "epoch": 0.95, "learning_rate": 0.0005666583177096029, "loss": 3.1718, "step": 3277 }, { "epoch": 0.95, "learning_rate": 0.0005664258171606379, "loss": 3.1595, "step": 3278 }, { "epoch": 0.95, "learning_rate": 0.0005661933019897074, "loss": 3.1777, "step": 3279 }, { "epoch": 0.95, "learning_rate": 0.0005659607722479938, "loss": 3.2947, "step": 3280 }, { "epoch": 0.95, "learning_rate": 0.0005657282279866826, "loss": 3.187, "step": 3281 }, { "epoch": 0.95, "learning_rate": 0.0005654956692569627, "loss": 3.1988, "step": 3282 }, { "epoch": 0.95, "learning_rate": 0.000565263096110026, "loss": 3.2905, "step": 3283 }, { "epoch": 0.95, "learning_rate": 0.0005650305085970672, "loss": 3.283, "step": 3284 }, { "epoch": 0.95, "learning_rate": 0.0005647979067692851, "loss": 3.2282, "step": 3285 }, { "epoch": 0.95, "learning_rate": 0.0005645652906778808, "loss": 3.232, "step": 3286 }, { "epoch": 0.95, "learning_rate": 0.000564332660374059, "loss": 3.3272, "step": 3287 }, { "epoch": 0.95, "learning_rate": 0.0005641000159090272, "loss": 3.3957, "step": 3288 }, { "epoch": 0.95, "learning_rate": 0.0005638673573339966, "loss": 3.2655, "step": 3289 }, { "epoch": 0.95, "learning_rate": 0.0005636346847001806, "loss": 3.2619, "step": 3290 }, { "epoch": 0.95, "learning_rate": 0.0005634019980587968, "loss": 3.249, "step": 3291 }, { "epoch": 0.95, "learning_rate": 0.0005631692974610648, "loss": 3.2037, "step": 3292 }, { "epoch": 0.95, "learning_rate": 0.0005629365829582077, "loss": 3.2209, "step": 3293 }, { "epoch": 0.95, "learning_rate": 0.0005627038546014519, "loss": 3.1988, "step": 3294 }, { "epoch": 0.95, "learning_rate": 0.0005624711124420269, "loss": 3.33, "step": 3295 }, { "epoch": 0.95, "learning_rate": 0.0005622383565311646, "loss": 3.2762, "step": 3296 }, { "epoch": 0.95, "learning_rate": 0.0005620055869201007, "loss": 3.1339, "step": 3297 }, { "epoch": 0.96, "learning_rate": 0.0005617728036600734, "loss": 3.1705, "step": 3298 }, { "epoch": 0.96, "learning_rate": 0.0005615400068023238, "loss": 3.3136, "step": 3299 }, { "epoch": 0.96, "learning_rate": 0.0005613071963980966, "loss": 3.1917, "step": 3300 }, { "epoch": 0.96, "learning_rate": 0.000561074372498639, "loss": 3.2731, "step": 3301 }, { "epoch": 0.96, "learning_rate": 0.0005608415351552013, "loss": 3.2817, "step": 3302 }, { "epoch": 0.96, "learning_rate": 0.0005606086844190367, "loss": 3.3092, "step": 3303 }, { "epoch": 0.96, "learning_rate": 0.0005603758203414015, "loss": 3.242, "step": 3304 }, { "epoch": 0.96, "learning_rate": 0.0005601429429735547, "loss": 3.2815, "step": 3305 }, { "epoch": 0.96, "learning_rate": 0.0005599100523667587, "loss": 3.2599, "step": 3306 }, { "epoch": 0.96, "learning_rate": 0.0005596771485722781, "loss": 3.1389, "step": 3307 }, { "epoch": 0.96, "learning_rate": 0.000559444231641381, "loss": 3.2742, "step": 3308 }, { "epoch": 0.96, "learning_rate": 0.000559211301625338, "loss": 3.2601, "step": 3309 }, { "epoch": 0.96, "learning_rate": 0.0005589783585754232, "loss": 3.3026, "step": 3310 }, { "epoch": 0.96, "learning_rate": 0.0005587454025429126, "loss": 3.2131, "step": 3311 }, { "epoch": 0.96, "learning_rate": 0.0005585124335790857, "loss": 3.1579, "step": 3312 }, { "epoch": 0.96, "learning_rate": 0.0005582794517352249, "loss": 3.2619, "step": 3313 }, { "epoch": 0.96, "learning_rate": 0.0005580464570626152, "loss": 3.3872, "step": 3314 }, { "epoch": 0.96, "learning_rate": 0.0005578134496125445, "loss": 3.2571, "step": 3315 }, { "epoch": 0.96, "learning_rate": 0.0005575804294363034, "loss": 3.344, "step": 3316 }, { "epoch": 0.96, "learning_rate": 0.0005573473965851855, "loss": 3.3545, "step": 3317 }, { "epoch": 0.96, "learning_rate": 0.0005571143511104873, "loss": 3.1947, "step": 3318 }, { "epoch": 0.96, "learning_rate": 0.0005568812930635076, "loss": 3.2134, "step": 3319 }, { "epoch": 0.96, "learning_rate": 0.000556648222495548, "loss": 3.2624, "step": 3320 }, { "epoch": 0.96, "learning_rate": 0.0005564151394579136, "loss": 3.2663, "step": 3321 }, { "epoch": 0.96, "learning_rate": 0.0005561820440019116, "loss": 3.147, "step": 3322 }, { "epoch": 0.96, "learning_rate": 0.000555948936178852, "loss": 3.2098, "step": 3323 }, { "epoch": 0.96, "learning_rate": 0.0005557158160400476, "loss": 3.2802, "step": 3324 }, { "epoch": 0.96, "learning_rate": 0.0005554826836368139, "loss": 3.2689, "step": 3325 }, { "epoch": 0.96, "learning_rate": 0.0005552495390204691, "loss": 3.0766, "step": 3326 }, { "epoch": 0.96, "learning_rate": 0.0005550163822423341, "loss": 3.1545, "step": 3327 }, { "epoch": 0.96, "learning_rate": 0.0005547832133537327, "loss": 3.347, "step": 3328 }, { "epoch": 0.96, "learning_rate": 0.0005545500324059907, "loss": 3.2382, "step": 3329 }, { "epoch": 0.96, "learning_rate": 0.0005543168394504374, "loss": 3.2939, "step": 3330 }, { "epoch": 0.96, "learning_rate": 0.0005540836345384042, "loss": 3.2832, "step": 3331 }, { "epoch": 0.96, "learning_rate": 0.0005538504177212251, "loss": 3.1392, "step": 3332 }, { "epoch": 0.97, "learning_rate": 0.000553617189050237, "loss": 3.1596, "step": 3333 }, { "epoch": 0.97, "learning_rate": 0.0005533839485767795, "loss": 3.2845, "step": 3334 }, { "epoch": 0.97, "learning_rate": 0.0005531506963521943, "loss": 3.3786, "step": 3335 }, { "epoch": 0.97, "learning_rate": 0.0005529174324278261, "loss": 3.2478, "step": 3336 }, { "epoch": 0.97, "learning_rate": 0.0005526841568550223, "loss": 3.0776, "step": 3337 }, { "epoch": 0.97, "learning_rate": 0.0005524508696851322, "loss": 3.2214, "step": 3338 }, { "epoch": 0.97, "learning_rate": 0.0005522175709695084, "loss": 3.2402, "step": 3339 }, { "epoch": 0.97, "learning_rate": 0.0005519842607595055, "loss": 3.2881, "step": 3340 }, { "epoch": 0.97, "learning_rate": 0.0005517509391064809, "loss": 3.2784, "step": 3341 }, { "epoch": 0.97, "learning_rate": 0.0005515176060617945, "loss": 3.2176, "step": 3342 }, { "epoch": 0.97, "learning_rate": 0.0005512842616768087, "loss": 3.1566, "step": 3343 }, { "epoch": 0.97, "learning_rate": 0.0005510509060028885, "loss": 3.2161, "step": 3344 }, { "epoch": 0.97, "learning_rate": 0.000550817539091401, "loss": 3.1768, "step": 3345 }, { "epoch": 0.97, "learning_rate": 0.0005505841609937161, "loss": 3.2538, "step": 3346 }, { "epoch": 0.97, "learning_rate": 0.0005503507717612062, "loss": 3.3809, "step": 3347 }, { "epoch": 0.97, "learning_rate": 0.000550117371445246, "loss": 3.3108, "step": 3348 }, { "epoch": 0.97, "learning_rate": 0.0005498839600972126, "loss": 3.1988, "step": 3349 }, { "epoch": 0.97, "learning_rate": 0.0005496505377684857, "loss": 3.2589, "step": 3350 }, { "epoch": 0.97, "learning_rate": 0.0005494171045104472, "loss": 3.1137, "step": 3351 }, { "epoch": 0.97, "learning_rate": 0.0005491836603744817, "loss": 3.3186, "step": 3352 }, { "epoch": 0.97, "learning_rate": 0.000548950205411976, "loss": 3.2261, "step": 3353 }, { "epoch": 0.97, "learning_rate": 0.0005487167396743194, "loss": 3.3226, "step": 3354 }, { "epoch": 0.97, "learning_rate": 0.0005484832632129031, "loss": 3.2397, "step": 3355 }, { "epoch": 0.97, "learning_rate": 0.0005482497760791214, "loss": 3.0879, "step": 3356 }, { "epoch": 0.97, "learning_rate": 0.0005480162783243706, "loss": 3.2543, "step": 3357 }, { "epoch": 0.97, "learning_rate": 0.0005477827700000492, "loss": 3.3057, "step": 3358 }, { "epoch": 0.97, "learning_rate": 0.000547549251157558, "loss": 3.2111, "step": 3359 }, { "epoch": 0.97, "learning_rate": 0.0005473157218483006, "loss": 3.3418, "step": 3360 }, { "epoch": 0.97, "learning_rate": 0.0005470821821236823, "loss": 3.205, "step": 3361 }, { "epoch": 0.97, "learning_rate": 0.0005468486320351113, "loss": 3.363, "step": 3362 }, { "epoch": 0.97, "learning_rate": 0.0005466150716339975, "loss": 3.2649, "step": 3363 }, { "epoch": 0.97, "learning_rate": 0.0005463815009717533, "loss": 3.1846, "step": 3364 }, { "epoch": 0.97, "learning_rate": 0.0005461479200997935, "loss": 3.2673, "step": 3365 }, { "epoch": 0.97, "learning_rate": 0.0005459143290695351, "loss": 3.2658, "step": 3366 }, { "epoch": 0.98, "learning_rate": 0.0005456807279323971, "loss": 3.2788, "step": 3367 }, { "epoch": 0.98, "learning_rate": 0.0005454471167398008, "loss": 3.2777, "step": 3368 }, { "epoch": 0.98, "learning_rate": 0.00054521349554317, "loss": 3.2331, "step": 3369 }, { "epoch": 0.98, "learning_rate": 0.0005449798643939305, "loss": 3.315, "step": 3370 }, { "epoch": 0.98, "learning_rate": 0.0005447462233435102, "loss": 3.2811, "step": 3371 }, { "epoch": 0.98, "learning_rate": 0.0005445125724433393, "loss": 3.2094, "step": 3372 }, { "epoch": 0.98, "learning_rate": 0.0005442789117448501, "loss": 3.236, "step": 3373 }, { "epoch": 0.98, "learning_rate": 0.0005440452412994774, "loss": 3.2111, "step": 3374 }, { "epoch": 0.98, "learning_rate": 0.0005438115611586575, "loss": 3.1314, "step": 3375 }, { "epoch": 0.98, "learning_rate": 0.0005435778713738292, "loss": 3.3152, "step": 3376 }, { "epoch": 0.98, "learning_rate": 0.0005433441719964333, "loss": 3.183, "step": 3377 }, { "epoch": 0.98, "learning_rate": 0.0005431104630779133, "loss": 3.2483, "step": 3378 }, { "epoch": 0.98, "learning_rate": 0.0005428767446697139, "loss": 3.2409, "step": 3379 }, { "epoch": 0.98, "learning_rate": 0.0005426430168232826, "loss": 3.1943, "step": 3380 }, { "epoch": 0.98, "learning_rate": 0.0005424092795900682, "loss": 3.1581, "step": 3381 }, { "epoch": 0.98, "learning_rate": 0.0005421755330215223, "loss": 3.1605, "step": 3382 }, { "epoch": 0.98, "learning_rate": 0.0005419417771690987, "loss": 3.3837, "step": 3383 }, { "epoch": 0.98, "learning_rate": 0.0005417080120842523, "loss": 3.1582, "step": 3384 }, { "epoch": 0.98, "learning_rate": 0.0005414742378184408, "loss": 3.2823, "step": 3385 }, { "epoch": 0.98, "learning_rate": 0.0005412404544231235, "loss": 3.2681, "step": 3386 }, { "epoch": 0.98, "learning_rate": 0.0005410066619497623, "loss": 3.1736, "step": 3387 }, { "epoch": 0.98, "learning_rate": 0.0005407728604498204, "loss": 3.238, "step": 3388 }, { "epoch": 0.98, "learning_rate": 0.0005405390499747633, "loss": 3.2183, "step": 3389 }, { "epoch": 0.98, "learning_rate": 0.0005403052305760586, "loss": 3.1151, "step": 3390 }, { "epoch": 0.98, "learning_rate": 0.0005400714023051754, "loss": 3.3011, "step": 3391 }, { "epoch": 0.98, "learning_rate": 0.0005398375652135858, "loss": 3.2663, "step": 3392 }, { "epoch": 0.98, "learning_rate": 0.0005396037193527626, "loss": 3.1158, "step": 3393 }, { "epoch": 0.98, "learning_rate": 0.0005393698647741809, "loss": 3.0783, "step": 3394 }, { "epoch": 0.98, "learning_rate": 0.0005391360015293183, "loss": 3.0968, "step": 3395 }, { "epoch": 0.98, "learning_rate": 0.0005389021296696537, "loss": 3.1548, "step": 3396 }, { "epoch": 0.98, "learning_rate": 0.0005386682492466681, "loss": 3.2438, "step": 3397 }, { "epoch": 0.98, "learning_rate": 0.0005384343603118443, "loss": 3.1949, "step": 3398 }, { "epoch": 0.98, "learning_rate": 0.0005382004629166671, "loss": 3.2351, "step": 3399 }, { "epoch": 0.98, "learning_rate": 0.0005379665571126231, "loss": 3.2981, "step": 3400 }, { "epoch": 0.98, "learning_rate": 0.0005377326429512008, "loss": 3.1211, "step": 3401 }, { "epoch": 0.99, "learning_rate": 0.0005374987204838906, "loss": 3.2632, "step": 3402 }, { "epoch": 0.99, "learning_rate": 0.0005372647897621842, "loss": 3.2347, "step": 3403 }, { "epoch": 0.99, "learning_rate": 0.0005370308508375758, "loss": 3.1656, "step": 3404 }, { "epoch": 0.99, "learning_rate": 0.0005367969037615613, "loss": 3.1788, "step": 3405 }, { "epoch": 0.99, "learning_rate": 0.0005365629485856381, "loss": 3.2396, "step": 3406 }, { "epoch": 0.99, "learning_rate": 0.0005363289853613053, "loss": 3.2726, "step": 3407 }, { "epoch": 0.99, "learning_rate": 0.0005360950141400643, "loss": 3.2178, "step": 3408 }, { "epoch": 0.99, "learning_rate": 0.0005358610349734178, "loss": 3.3275, "step": 3409 }, { "epoch": 0.99, "learning_rate": 0.0005356270479128706, "loss": 3.1435, "step": 3410 }, { "epoch": 0.99, "learning_rate": 0.000535393053009929, "loss": 3.2059, "step": 3411 }, { "epoch": 0.99, "learning_rate": 0.0005351590503161007, "loss": 3.2206, "step": 3412 }, { "epoch": 0.99, "learning_rate": 0.0005349250398828958, "loss": 3.2107, "step": 3413 }, { "epoch": 0.99, "learning_rate": 0.0005346910217618259, "loss": 3.2472, "step": 3414 }, { "epoch": 0.99, "learning_rate": 0.000534456996004404, "loss": 3.1822, "step": 3415 }, { "epoch": 0.99, "learning_rate": 0.0005342229626621447, "loss": 3.3078, "step": 3416 }, { "epoch": 0.99, "learning_rate": 0.0005339889217865649, "loss": 3.1884, "step": 3417 }, { "epoch": 0.99, "learning_rate": 0.0005337548734291826, "loss": 3.3543, "step": 3418 }, { "epoch": 0.99, "learning_rate": 0.0005335208176415179, "loss": 3.1568, "step": 3419 }, { "epoch": 0.99, "learning_rate": 0.000533286754475092, "loss": 3.2582, "step": 3420 }, { "epoch": 0.99, "learning_rate": 0.000533052683981428, "loss": 3.3259, "step": 3421 }, { "epoch": 0.99, "learning_rate": 0.0005328186062120508, "loss": 3.1561, "step": 3422 }, { "epoch": 0.99, "learning_rate": 0.0005325845212184866, "loss": 3.2334, "step": 3423 }, { "epoch": 0.99, "learning_rate": 0.0005323504290522631, "loss": 3.2394, "step": 3424 }, { "epoch": 0.99, "learning_rate": 0.0005321163297649099, "loss": 3.3106, "step": 3425 }, { "epoch": 0.99, "learning_rate": 0.0005318822234079584, "loss": 3.2746, "step": 3426 }, { "epoch": 0.99, "learning_rate": 0.0005316481100329408, "loss": 3.2677, "step": 3427 }, { "epoch": 0.99, "learning_rate": 0.0005314139896913912, "loss": 3.3079, "step": 3428 }, { "epoch": 0.99, "learning_rate": 0.0005311798624348457, "loss": 3.2361, "step": 3429 }, { "epoch": 0.99, "learning_rate": 0.000530945728314841, "loss": 3.3234, "step": 3430 }, { "epoch": 0.99, "learning_rate": 0.0005307115873829161, "loss": 3.1643, "step": 3431 }, { "epoch": 0.99, "learning_rate": 0.0005304774396906111, "loss": 3.2006, "step": 3432 }, { "epoch": 0.99, "learning_rate": 0.0005302432852894677, "loss": 3.3604, "step": 3433 }, { "epoch": 0.99, "learning_rate": 0.0005300091242310292, "loss": 3.1586, "step": 3434 }, { "epoch": 0.99, "learning_rate": 0.0005297749565668402, "loss": 3.1498, "step": 3435 }, { "epoch": 1.0, "learning_rate": 0.0005295407823484467, "loss": 3.2096, "step": 3436 }, { "epoch": 1.0, "learning_rate": 0.0005293066016273963, "loss": 3.2743, "step": 3437 }, { "epoch": 1.0, "learning_rate": 0.0005290724144552379, "loss": 3.1144, "step": 3438 }, { "epoch": 1.0, "learning_rate": 0.0005288382208835221, "loss": 3.2211, "step": 3439 }, { "epoch": 1.0, "learning_rate": 0.0005286040209638005, "loss": 3.2673, "step": 3440 }, { "epoch": 1.0, "learning_rate": 0.0005283698147476263, "loss": 3.2618, "step": 3441 }, { "epoch": 1.0, "learning_rate": 0.0005281356022865542, "loss": 3.2454, "step": 3442 }, { "epoch": 1.0, "learning_rate": 0.0005279013836321399, "loss": 3.2549, "step": 3443 }, { "epoch": 1.0, "learning_rate": 0.000527667158835941, "loss": 3.3002, "step": 3444 }, { "epoch": 1.0, "learning_rate": 0.0005274329279495162, "loss": 3.2493, "step": 3445 }, { "epoch": 1.0, "learning_rate": 0.0005271986910244253, "loss": 3.1588, "step": 3446 }, { "epoch": 1.0, "learning_rate": 0.0005269644481122295, "loss": 3.1964, "step": 3447 }, { "epoch": 1.0, "learning_rate": 0.0005267301992644922, "loss": 3.1834, "step": 3448 }, { "epoch": 1.0, "learning_rate": 0.0005264959445327765, "loss": 3.2016, "step": 3449 }, { "epoch": 1.0, "learning_rate": 0.0005262616839686481, "loss": 3.1399, "step": 3450 }, { "epoch": 1.0, "learning_rate": 0.0005260274176236734, "loss": 3.2123, "step": 3451 }, { "epoch": 1.0, "learning_rate": 0.0005257931455494203, "loss": 3.1565, "step": 3452 }, { "epoch": 1.0, "learning_rate": 0.0005255588677974579, "loss": 3.1769, "step": 3453 }, { "epoch": 1.0, "learning_rate": 0.0005253245844193564, "loss": 3.1369, "step": 3454 }, { "epoch": 1.0, "learning_rate": 0.0005250902954666876, "loss": 3.1813, "step": 3455 }, { "epoch": 1.0, "learning_rate": 0.0005248560009910239, "loss": 3.203, "step": 3456 }, { "epoch": 1.0, "learning_rate": 0.0005246217010439398, "loss": 3.1443, "step": 3457 }, { "epoch": 1.0, "learning_rate": 0.0005243873956770101, "loss": 3.1865, "step": 3458 }, { "epoch": 1.0, "learning_rate": 0.0005241530849418115, "loss": 3.0818, "step": 3459 }, { "epoch": 1.0, "learning_rate": 0.0005239187688899213, "loss": 3.1461, "step": 3460 }, { "epoch": 1.0, "learning_rate": 0.0005236844475729185, "loss": 3.2421, "step": 3461 }, { "epoch": 1.0, "learning_rate": 0.000523450121042383, "loss": 3.1737, "step": 3462 }, { "epoch": 1.0, "learning_rate": 0.0005232157893498957, "loss": 3.0103, "step": 3463 }, { "epoch": 1.0, "learning_rate": 0.000522981452547039, "loss": 3.0988, "step": 3464 }, { "epoch": 1.0, "learning_rate": 0.000522747110685396, "loss": 3.2677, "step": 3465 }, { "epoch": 1.0, "learning_rate": 0.0005225127638165514, "loss": 3.1246, "step": 3466 }, { "epoch": 1.0, "learning_rate": 0.0005222784119920907, "loss": 3.2292, "step": 3467 }, { "epoch": 1.0, "learning_rate": 0.0005220440552636003, "loss": 3.3352, "step": 3468 }, { "epoch": 1.0, "learning_rate": 0.0005218096936826681, "loss": 3.1104, "step": 3469 }, { "epoch": 1.0, "learning_rate": 0.0005215753273008827, "loss": 3.2108, "step": 3470 }, { "epoch": 1.01, "learning_rate": 0.0005213409561698343, "loss": 3.1207, "step": 3471 }, { "epoch": 1.01, "learning_rate": 0.0005211065803411135, "loss": 3.3316, "step": 3472 }, { "epoch": 1.01, "learning_rate": 0.0005208721998663122, "loss": 3.1799, "step": 3473 }, { "epoch": 1.01, "learning_rate": 0.0005206378147970235, "loss": 3.1977, "step": 3474 }, { "epoch": 1.01, "learning_rate": 0.0005204034251848413, "loss": 3.1366, "step": 3475 }, { "epoch": 1.01, "learning_rate": 0.0005201690310813605, "loss": 3.2945, "step": 3476 }, { "epoch": 1.01, "learning_rate": 0.000519934632538177, "loss": 3.1545, "step": 3477 }, { "epoch": 1.01, "learning_rate": 0.0005197002296068878, "loss": 3.3732, "step": 3478 }, { "epoch": 1.01, "learning_rate": 0.0005194658223390909, "loss": 3.1622, "step": 3479 }, { "epoch": 1.01, "learning_rate": 0.0005192314107863848, "loss": 3.1235, "step": 3480 }, { "epoch": 1.01, "learning_rate": 0.0005189969950003697, "loss": 3.2994, "step": 3481 }, { "epoch": 1.01, "learning_rate": 0.0005187625750326458, "loss": 3.2308, "step": 3482 }, { "epoch": 1.01, "learning_rate": 0.0005185281509348153, "loss": 3.2322, "step": 3483 }, { "epoch": 1.01, "learning_rate": 0.0005182937227584805, "loss": 3.113, "step": 3484 }, { "epoch": 1.01, "learning_rate": 0.0005180592905552447, "loss": 3.1934, "step": 3485 }, { "epoch": 1.01, "learning_rate": 0.0005178248543767122, "loss": 3.1936, "step": 3486 }, { "epoch": 1.01, "learning_rate": 0.0005175904142744885, "loss": 3.1288, "step": 3487 }, { "epoch": 1.01, "learning_rate": 0.0005173559703001796, "loss": 3.1299, "step": 3488 }, { "epoch": 1.01, "learning_rate": 0.0005171215225053921, "loss": 3.1182, "step": 3489 }, { "epoch": 1.01, "learning_rate": 0.0005168870709417342, "loss": 3.1636, "step": 3490 }, { "epoch": 1.01, "learning_rate": 0.000516652615660814, "loss": 3.2376, "step": 3491 }, { "epoch": 1.01, "learning_rate": 0.0005164181567142415, "loss": 3.1495, "step": 3492 }, { "epoch": 1.01, "learning_rate": 0.0005161836941536263, "loss": 3.232, "step": 3493 }, { "epoch": 1.01, "learning_rate": 0.00051594922803058, "loss": 3.1684, "step": 3494 }, { "epoch": 1.01, "learning_rate": 0.0005157147583967139, "loss": 3.2577, "step": 3495 }, { "epoch": 1.01, "learning_rate": 0.0005154802853036408, "loss": 3.1508, "step": 3496 }, { "epoch": 1.01, "learning_rate": 0.000515245808802974, "loss": 3.2368, "step": 3497 }, { "epoch": 1.01, "learning_rate": 0.0005150113289463275, "loss": 3.2307, "step": 3498 }, { "epoch": 1.01, "learning_rate": 0.0005147768457853162, "loss": 3.1892, "step": 3499 }, { "epoch": 1.01, "learning_rate": 0.0005145423593715557, "loss": 3.2918, "step": 3500 }, { "epoch": 1.01, "learning_rate": 0.0005143078697566622, "loss": 3.2671, "step": 3501 }, { "epoch": 1.01, "learning_rate": 0.0005140733769922525, "loss": 3.3621, "step": 3502 }, { "epoch": 1.01, "learning_rate": 0.0005138388811299446, "loss": 3.2079, "step": 3503 }, { "epoch": 1.01, "learning_rate": 0.0005136043822213566, "loss": 3.1294, "step": 3504 }, { "epoch": 1.02, "learning_rate": 0.0005133698803181075, "loss": 3.272, "step": 3505 }, { "epoch": 1.02, "learning_rate": 0.0005131353754718171, "loss": 3.0887, "step": 3506 }, { "epoch": 1.02, "learning_rate": 0.0005129008677341057, "loss": 3.1982, "step": 3507 }, { "epoch": 1.02, "learning_rate": 0.000512666357156594, "loss": 3.2666, "step": 3508 }, { "epoch": 1.02, "learning_rate": 0.0005124318437909038, "loss": 3.2653, "step": 3509 }, { "epoch": 1.02, "learning_rate": 0.0005121973276886575, "loss": 3.3024, "step": 3510 }, { "epoch": 1.02, "learning_rate": 0.0005119628089014774, "loss": 3.1919, "step": 3511 }, { "epoch": 1.02, "learning_rate": 0.0005117282874809873, "loss": 3.1976, "step": 3512 }, { "epoch": 1.02, "learning_rate": 0.000511493763478811, "loss": 3.2923, "step": 3513 }, { "epoch": 1.02, "learning_rate": 0.0005112592369465731, "loss": 3.1581, "step": 3514 }, { "epoch": 1.02, "learning_rate": 0.0005110247079358986, "loss": 3.2226, "step": 3515 }, { "epoch": 1.02, "learning_rate": 0.0005107901764984132, "loss": 3.2401, "step": 3516 }, { "epoch": 1.02, "learning_rate": 0.0005105556426857429, "loss": 3.1795, "step": 3517 }, { "epoch": 1.02, "learning_rate": 0.0005103211065495146, "loss": 3.1341, "step": 3518 }, { "epoch": 1.02, "learning_rate": 0.0005100865681413556, "loss": 3.1452, "step": 3519 }, { "epoch": 1.02, "learning_rate": 0.0005098520275128934, "loss": 3.2214, "step": 3520 }, { "epoch": 1.02, "learning_rate": 0.0005096174847157562, "loss": 3.2096, "step": 3521 }, { "epoch": 1.02, "learning_rate": 0.0005093829398015728, "loss": 3.1336, "step": 3522 }, { "epoch": 1.02, "learning_rate": 0.0005091483928219723, "loss": 3.1596, "step": 3523 }, { "epoch": 1.02, "learning_rate": 0.0005089138438285841, "loss": 3.1607, "step": 3524 }, { "epoch": 1.02, "learning_rate": 0.0005086792928730383, "loss": 3.1573, "step": 3525 }, { "epoch": 1.02, "learning_rate": 0.0005084447400069655, "loss": 3.2674, "step": 3526 }, { "epoch": 1.02, "learning_rate": 0.0005082101852819967, "loss": 3.1944, "step": 3527 }, { "epoch": 1.02, "learning_rate": 0.0005079756287497629, "loss": 3.073, "step": 3528 }, { "epoch": 1.02, "learning_rate": 0.000507741070461896, "loss": 3.2572, "step": 3529 }, { "epoch": 1.02, "learning_rate": 0.000507506510470028, "loss": 3.1718, "step": 3530 }, { "epoch": 1.02, "learning_rate": 0.0005072719488257914, "loss": 3.406, "step": 3531 }, { "epoch": 1.02, "learning_rate": 0.0005070373855808189, "loss": 3.2454, "step": 3532 }, { "epoch": 1.02, "learning_rate": 0.0005068028207867439, "loss": 3.1961, "step": 3533 }, { "epoch": 1.02, "learning_rate": 0.0005065682544951995, "loss": 3.0997, "step": 3534 }, { "epoch": 1.02, "learning_rate": 0.0005063336867578201, "loss": 3.1942, "step": 3535 }, { "epoch": 1.02, "learning_rate": 0.0005060991176262393, "loss": 3.1659, "step": 3536 }, { "epoch": 1.02, "learning_rate": 0.0005058645471520922, "loss": 3.2644, "step": 3537 }, { "epoch": 1.02, "learning_rate": 0.000505629975387013, "loss": 3.1847, "step": 3538 }, { "epoch": 1.02, "learning_rate": 0.0005053954023826368, "loss": 3.1335, "step": 3539 }, { "epoch": 1.03, "learning_rate": 0.0005051608281905994, "loss": 3.2163, "step": 3540 }, { "epoch": 1.03, "learning_rate": 0.0005049262528625359, "loss": 3.1759, "step": 3541 }, { "epoch": 1.03, "learning_rate": 0.0005046916764500824, "loss": 3.1575, "step": 3542 }, { "epoch": 1.03, "learning_rate": 0.0005044570990048747, "loss": 3.1792, "step": 3543 }, { "epoch": 1.03, "learning_rate": 0.0005042225205785493, "loss": 3.2795, "step": 3544 }, { "epoch": 1.03, "learning_rate": 0.0005039879412227428, "loss": 3.1508, "step": 3545 }, { "epoch": 1.03, "learning_rate": 0.0005037533609890916, "loss": 3.2232, "step": 3546 }, { "epoch": 1.03, "learning_rate": 0.0005035187799292329, "loss": 3.1986, "step": 3547 }, { "epoch": 1.03, "learning_rate": 0.0005032841980948036, "loss": 3.2635, "step": 3548 }, { "epoch": 1.03, "learning_rate": 0.0005030496155374412, "loss": 3.1836, "step": 3549 }, { "epoch": 1.03, "learning_rate": 0.000502815032308783, "loss": 3.1746, "step": 3550 }, { "epoch": 1.03, "learning_rate": 0.0005025804484604664, "loss": 3.2842, "step": 3551 }, { "epoch": 1.03, "learning_rate": 0.0005023458640441294, "loss": 3.29, "step": 3552 }, { "epoch": 1.03, "learning_rate": 0.0005021112791114096, "loss": 3.2743, "step": 3553 }, { "epoch": 1.03, "learning_rate": 0.0005018766937139452, "loss": 3.2754, "step": 3554 }, { "epoch": 1.03, "learning_rate": 0.0005016421079033739, "loss": 3.3299, "step": 3555 }, { "epoch": 1.03, "learning_rate": 0.0005014075217313341, "loss": 3.1686, "step": 3556 }, { "epoch": 1.03, "learning_rate": 0.0005011729352494641, "loss": 3.1705, "step": 3557 }, { "epoch": 1.03, "learning_rate": 0.000500938348509402, "loss": 3.1121, "step": 3558 }, { "epoch": 1.03, "learning_rate": 0.000500703761562786, "loss": 3.1733, "step": 3559 }, { "epoch": 1.03, "learning_rate": 0.0005004691744612547, "loss": 3.2115, "step": 3560 }, { "epoch": 1.03, "learning_rate": 0.0005002345872564467, "loss": 3.1998, "step": 3561 }, { "epoch": 1.03, "learning_rate": 0.0005, "loss": 3.0932, "step": 3562 }, { "epoch": 1.03, "learning_rate": 0.0004997654127435536, "loss": 3.2241, "step": 3563 }, { "epoch": 1.03, "learning_rate": 0.0004995308255387453, "loss": 3.2415, "step": 3564 }, { "epoch": 1.03, "learning_rate": 0.0004992962384372141, "loss": 3.2524, "step": 3565 }, { "epoch": 1.03, "learning_rate": 0.0004990616514905981, "loss": 3.2116, "step": 3566 }, { "epoch": 1.03, "learning_rate": 0.0004988270647505361, "loss": 3.096, "step": 3567 }, { "epoch": 1.03, "learning_rate": 0.0004985924782686658, "loss": 3.2048, "step": 3568 }, { "epoch": 1.03, "learning_rate": 0.0004983578920966261, "loss": 3.1418, "step": 3569 }, { "epoch": 1.03, "learning_rate": 0.0004981233062860548, "loss": 3.1074, "step": 3570 }, { "epoch": 1.03, "learning_rate": 0.0004978887208885904, "loss": 3.2035, "step": 3571 }, { "epoch": 1.03, "learning_rate": 0.0004976541359558707, "loss": 3.3873, "step": 3572 }, { "epoch": 1.03, "learning_rate": 0.0004974195515395337, "loss": 3.3064, "step": 3573 }, { "epoch": 1.04, "learning_rate": 0.0004971849676912172, "loss": 3.1625, "step": 3574 }, { "epoch": 1.04, "learning_rate": 0.0004969503844625588, "loss": 3.2012, "step": 3575 }, { "epoch": 1.04, "learning_rate": 0.0004967158019051965, "loss": 3.1308, "step": 3576 }, { "epoch": 1.04, "learning_rate": 0.0004964812200707672, "loss": 3.2408, "step": 3577 }, { "epoch": 1.04, "learning_rate": 0.0004962466390109085, "loss": 3.258, "step": 3578 }, { "epoch": 1.04, "learning_rate": 0.0004960120587772573, "loss": 3.2491, "step": 3579 }, { "epoch": 1.04, "learning_rate": 0.0004957774794214508, "loss": 3.137, "step": 3580 }, { "epoch": 1.04, "learning_rate": 0.0004955429009951255, "loss": 3.2677, "step": 3581 }, { "epoch": 1.04, "learning_rate": 0.0004953083235499177, "loss": 3.2497, "step": 3582 }, { "epoch": 1.04, "learning_rate": 0.0004950737471374643, "loss": 3.1953, "step": 3583 }, { "epoch": 1.04, "learning_rate": 0.0004948391718094006, "loss": 3.2716, "step": 3584 }, { "epoch": 1.04, "learning_rate": 0.0004946045976173632, "loss": 3.1777, "step": 3585 }, { "epoch": 1.04, "learning_rate": 0.000494370024612987, "loss": 3.144, "step": 3586 }, { "epoch": 1.04, "learning_rate": 0.000494135452847908, "loss": 3.2341, "step": 3587 }, { "epoch": 1.04, "learning_rate": 0.0004939008823737606, "loss": 3.2268, "step": 3588 }, { "epoch": 1.04, "learning_rate": 0.00049366631324218, "loss": 3.3093, "step": 3589 }, { "epoch": 1.04, "learning_rate": 0.0004934317455048005, "loss": 3.191, "step": 3590 }, { "epoch": 1.04, "learning_rate": 0.0004931971792132562, "loss": 3.2268, "step": 3591 }, { "epoch": 1.04, "learning_rate": 0.0004929626144191812, "loss": 3.2901, "step": 3592 }, { "epoch": 1.04, "learning_rate": 0.0004927280511742086, "loss": 3.3023, "step": 3593 }, { "epoch": 1.04, "learning_rate": 0.0004924934895299721, "loss": 3.3352, "step": 3594 }, { "epoch": 1.04, "learning_rate": 0.000492258929538104, "loss": 3.2554, "step": 3595 }, { "epoch": 1.04, "learning_rate": 0.0004920243712502371, "loss": 3.1866, "step": 3596 }, { "epoch": 1.04, "learning_rate": 0.0004917898147180033, "loss": 3.2505, "step": 3597 }, { "epoch": 1.04, "learning_rate": 0.0004915552599930346, "loss": 3.1956, "step": 3598 }, { "epoch": 1.04, "learning_rate": 0.0004913207071269618, "loss": 3.1773, "step": 3599 }, { "epoch": 1.04, "learning_rate": 0.000491086156171416, "loss": 3.1524, "step": 3600 }, { "epoch": 1.04, "learning_rate": 0.0004908516071780281, "loss": 3.1307, "step": 3601 }, { "epoch": 1.04, "learning_rate": 0.0004906170601984272, "loss": 3.1272, "step": 3602 }, { "epoch": 1.04, "learning_rate": 0.0004903825152842438, "loss": 3.2893, "step": 3603 }, { "epoch": 1.04, "learning_rate": 0.0004901479724871066, "loss": 3.2318, "step": 3604 }, { "epoch": 1.04, "learning_rate": 0.0004899134318586445, "loss": 3.2093, "step": 3605 }, { "epoch": 1.04, "learning_rate": 0.0004896788934504853, "loss": 3.0065, "step": 3606 }, { "epoch": 1.04, "learning_rate": 0.0004894443573142572, "loss": 3.2493, "step": 3607 }, { "epoch": 1.04, "learning_rate": 0.000489209823501587, "loss": 3.2217, "step": 3608 }, { "epoch": 1.05, "learning_rate": 0.0004889752920641014, "loss": 3.2456, "step": 3609 }, { "epoch": 1.05, "learning_rate": 0.0004887407630534271, "loss": 3.1262, "step": 3610 }, { "epoch": 1.05, "learning_rate": 0.000488506236521189, "loss": 3.2429, "step": 3611 }, { "epoch": 1.05, "learning_rate": 0.00048827171251901273, "loss": 3.1723, "step": 3612 }, { "epoch": 1.05, "learning_rate": 0.0004880371910985225, "loss": 3.2008, "step": 3613 }, { "epoch": 1.05, "learning_rate": 0.0004878026723113427, "loss": 3.1396, "step": 3614 }, { "epoch": 1.05, "learning_rate": 0.00048756815620909613, "loss": 3.2738, "step": 3615 }, { "epoch": 1.05, "learning_rate": 0.00048733364284340617, "loss": 3.116, "step": 3616 }, { "epoch": 1.05, "learning_rate": 0.0004870991322658944, "loss": 3.1744, "step": 3617 }, { "epoch": 1.05, "learning_rate": 0.00048686462452818296, "loss": 3.2628, "step": 3618 }, { "epoch": 1.05, "learning_rate": 0.00048663011968189265, "loss": 3.2704, "step": 3619 }, { "epoch": 1.05, "learning_rate": 0.0004863956177786435, "loss": 3.1901, "step": 3620 }, { "epoch": 1.05, "learning_rate": 0.0004861611188700555, "loss": 3.3276, "step": 3621 }, { "epoch": 1.05, "learning_rate": 0.00048592662300774736, "loss": 3.2386, "step": 3622 }, { "epoch": 1.05, "learning_rate": 0.0004856921302433379, "loss": 3.1664, "step": 3623 }, { "epoch": 1.05, "learning_rate": 0.0004854576406284443, "loss": 3.305, "step": 3624 }, { "epoch": 1.05, "learning_rate": 0.0004852231542146839, "loss": 3.2886, "step": 3625 }, { "epoch": 1.05, "learning_rate": 0.0004849886710536725, "loss": 3.0892, "step": 3626 }, { "epoch": 1.05, "learning_rate": 0.0004847541911970261, "loss": 3.2835, "step": 3627 }, { "epoch": 1.05, "learning_rate": 0.0004845197146963594, "loss": 3.1733, "step": 3628 }, { "epoch": 1.05, "learning_rate": 0.00048428524160328625, "loss": 3.3403, "step": 3629 }, { "epoch": 1.05, "learning_rate": 0.00048405077196942017, "loss": 3.2257, "step": 3630 }, { "epoch": 1.05, "learning_rate": 0.0004838163058463736, "loss": 3.1959, "step": 3631 }, { "epoch": 1.05, "learning_rate": 0.00048358184328575873, "loss": 3.1408, "step": 3632 }, { "epoch": 1.05, "learning_rate": 0.000483347384339186, "loss": 3.1444, "step": 3633 }, { "epoch": 1.05, "learning_rate": 0.0004831129290582659, "loss": 3.2625, "step": 3634 }, { "epoch": 1.05, "learning_rate": 0.0004828784774946078, "loss": 3.2941, "step": 3635 }, { "epoch": 1.05, "learning_rate": 0.0004826440296998206, "loss": 3.2095, "step": 3636 }, { "epoch": 1.05, "learning_rate": 0.0004824095857255117, "loss": 3.1675, "step": 3637 }, { "epoch": 1.05, "learning_rate": 0.00048217514562328784, "loss": 3.2552, "step": 3638 }, { "epoch": 1.05, "learning_rate": 0.0004819407094447555, "loss": 3.1501, "step": 3639 }, { "epoch": 1.05, "learning_rate": 0.0004817062772415196, "loss": 3.1004, "step": 3640 }, { "epoch": 1.05, "learning_rate": 0.00048147184906518483, "loss": 3.1336, "step": 3641 }, { "epoch": 1.05, "learning_rate": 0.00048123742496735414, "loss": 3.1509, "step": 3642 }, { "epoch": 1.06, "learning_rate": 0.0004810030049996304, "loss": 3.1597, "step": 3643 }, { "epoch": 1.06, "learning_rate": 0.0004807685892136151, "loss": 3.2088, "step": 3644 }, { "epoch": 1.06, "learning_rate": 0.0004805341776609092, "loss": 3.1144, "step": 3645 }, { "epoch": 1.06, "learning_rate": 0.00048029977039311237, "loss": 3.1333, "step": 3646 }, { "epoch": 1.06, "learning_rate": 0.00048006536746182313, "loss": 3.2032, "step": 3647 }, { "epoch": 1.06, "learning_rate": 0.00047983096891863965, "loss": 3.2688, "step": 3648 }, { "epoch": 1.06, "learning_rate": 0.00047959657481515874, "loss": 3.2184, "step": 3649 }, { "epoch": 1.06, "learning_rate": 0.00047936218520297665, "loss": 3.2813, "step": 3650 }, { "epoch": 1.06, "learning_rate": 0.0004791278001336878, "loss": 3.2475, "step": 3651 }, { "epoch": 1.06, "learning_rate": 0.0004788934196588866, "loss": 3.1451, "step": 3652 }, { "epoch": 1.06, "learning_rate": 0.00047865904383016567, "loss": 3.1412, "step": 3653 }, { "epoch": 1.06, "learning_rate": 0.0004784246726991173, "loss": 3.155, "step": 3654 }, { "epoch": 1.06, "learning_rate": 0.0004781903063173321, "loss": 3.257, "step": 3655 }, { "epoch": 1.06, "learning_rate": 0.00047795594473639975, "loss": 3.2712, "step": 3656 }, { "epoch": 1.06, "learning_rate": 0.0004777215880079095, "loss": 3.2999, "step": 3657 }, { "epoch": 1.06, "learning_rate": 0.0004774872361834486, "loss": 3.2229, "step": 3658 }, { "epoch": 1.06, "learning_rate": 0.0004772528893146041, "loss": 3.3174, "step": 3659 }, { "epoch": 1.06, "learning_rate": 0.00047701854745296106, "loss": 3.2488, "step": 3660 }, { "epoch": 1.06, "learning_rate": 0.00047678421065010436, "loss": 3.179, "step": 3661 }, { "epoch": 1.06, "learning_rate": 0.000476549878957617, "loss": 3.1016, "step": 3662 }, { "epoch": 1.06, "learning_rate": 0.00047631555242708156, "loss": 3.238, "step": 3663 }, { "epoch": 1.06, "learning_rate": 0.0004760812311100788, "loss": 3.0342, "step": 3664 }, { "epoch": 1.06, "learning_rate": 0.0004758469150581886, "loss": 3.0674, "step": 3665 }, { "epoch": 1.06, "learning_rate": 0.0004756126043229901, "loss": 3.1496, "step": 3666 }, { "epoch": 1.06, "learning_rate": 0.0004753782989560603, "loss": 3.1733, "step": 3667 }, { "epoch": 1.06, "learning_rate": 0.00047514399900897627, "loss": 3.3232, "step": 3668 }, { "epoch": 1.06, "learning_rate": 0.0004749097045333126, "loss": 3.3586, "step": 3669 }, { "epoch": 1.06, "learning_rate": 0.0004746754155806437, "loss": 3.2001, "step": 3670 }, { "epoch": 1.06, "learning_rate": 0.00047444113220254213, "loss": 3.1849, "step": 3671 }, { "epoch": 1.06, "learning_rate": 0.0004742068544505798, "loss": 3.2893, "step": 3672 }, { "epoch": 1.06, "learning_rate": 0.0004739725823763267, "loss": 3.0684, "step": 3673 }, { "epoch": 1.06, "learning_rate": 0.000473738316031352, "loss": 3.11, "step": 3674 }, { "epoch": 1.06, "learning_rate": 0.00047350405546722367, "loss": 3.2859, "step": 3675 }, { "epoch": 1.06, "learning_rate": 0.00047326980073550797, "loss": 3.223, "step": 3676 }, { "epoch": 1.06, "learning_rate": 0.00047303555188777047, "loss": 3.3546, "step": 3677 }, { "epoch": 1.07, "learning_rate": 0.00047280130897557485, "loss": 3.1524, "step": 3678 }, { "epoch": 1.07, "learning_rate": 0.0004725670720504839, "loss": 3.1957, "step": 3679 }, { "epoch": 1.07, "learning_rate": 0.00047233284116405893, "loss": 3.1913, "step": 3680 }, { "epoch": 1.07, "learning_rate": 0.0004720986163678602, "loss": 3.0721, "step": 3681 }, { "epoch": 1.07, "learning_rate": 0.000471864397713446, "loss": 3.1537, "step": 3682 }, { "epoch": 1.07, "learning_rate": 0.0004716301852523738, "loss": 3.2208, "step": 3683 }, { "epoch": 1.07, "learning_rate": 0.00047139597903619976, "loss": 3.2736, "step": 3684 }, { "epoch": 1.07, "learning_rate": 0.000471161779116478, "loss": 3.2384, "step": 3685 }, { "epoch": 1.07, "learning_rate": 0.0004709275855447621, "loss": 3.3967, "step": 3686 }, { "epoch": 1.07, "learning_rate": 0.00047069339837260373, "loss": 3.2931, "step": 3687 }, { "epoch": 1.07, "learning_rate": 0.0004704592176515534, "loss": 3.2945, "step": 3688 }, { "epoch": 1.07, "learning_rate": 0.00047022504343315985, "loss": 3.1712, "step": 3689 }, { "epoch": 1.07, "learning_rate": 0.00046999087576897095, "loss": 3.2604, "step": 3690 }, { "epoch": 1.07, "learning_rate": 0.00046975671471053245, "loss": 3.2514, "step": 3691 }, { "epoch": 1.07, "learning_rate": 0.000469522560309389, "loss": 3.1834, "step": 3692 }, { "epoch": 1.07, "learning_rate": 0.00046928841261708416, "loss": 3.1217, "step": 3693 }, { "epoch": 1.07, "learning_rate": 0.0004690542716851591, "loss": 3.1033, "step": 3694 }, { "epoch": 1.07, "learning_rate": 0.0004688201375651545, "loss": 3.0763, "step": 3695 }, { "epoch": 1.07, "learning_rate": 0.0004685860103086087, "loss": 3.0679, "step": 3696 }, { "epoch": 1.07, "learning_rate": 0.00046835188996705935, "loss": 3.2249, "step": 3697 }, { "epoch": 1.07, "learning_rate": 0.0004681177765920416, "loss": 3.0226, "step": 3698 }, { "epoch": 1.07, "learning_rate": 0.0004678836702350901, "loss": 3.2229, "step": 3699 }, { "epoch": 1.07, "learning_rate": 0.000467649570947737, "loss": 3.2604, "step": 3700 }, { "epoch": 1.07, "learning_rate": 0.0004674154787815135, "loss": 3.2413, "step": 3701 }, { "epoch": 1.07, "learning_rate": 0.0004671813937879494, "loss": 3.2905, "step": 3702 }, { "epoch": 1.07, "learning_rate": 0.00046694731601857204, "loss": 3.1761, "step": 3703 }, { "epoch": 1.07, "learning_rate": 0.00046671324552490813, "loss": 3.2753, "step": 3704 }, { "epoch": 1.07, "learning_rate": 0.0004664791823584821, "loss": 3.2358, "step": 3705 }, { "epoch": 1.07, "learning_rate": 0.0004662451265708174, "loss": 3.0456, "step": 3706 }, { "epoch": 1.07, "learning_rate": 0.00046601107821343514, "loss": 3.3786, "step": 3707 }, { "epoch": 1.07, "learning_rate": 0.00046577703733785536, "loss": 3.1959, "step": 3708 }, { "epoch": 1.07, "learning_rate": 0.00046554300399559613, "loss": 3.192, "step": 3709 }, { "epoch": 1.07, "learning_rate": 0.0004653089782381742, "loss": 3.324, "step": 3710 }, { "epoch": 1.07, "learning_rate": 0.0004650749601171043, "loss": 3.1865, "step": 3711 }, { "epoch": 1.08, "learning_rate": 0.00046484094968389934, "loss": 3.109, "step": 3712 }, { "epoch": 1.08, "learning_rate": 0.00046460694699007116, "loss": 3.2007, "step": 3713 }, { "epoch": 1.08, "learning_rate": 0.0004643729520871293, "loss": 3.2807, "step": 3714 }, { "epoch": 1.08, "learning_rate": 0.00046413896502658214, "loss": 3.0879, "step": 3715 }, { "epoch": 1.08, "learning_rate": 0.0004639049858599357, "loss": 3.2074, "step": 3716 }, { "epoch": 1.08, "learning_rate": 0.0004636710146386947, "loss": 3.278, "step": 3717 }, { "epoch": 1.08, "learning_rate": 0.00046343705141436195, "loss": 3.2555, "step": 3718 }, { "epoch": 1.08, "learning_rate": 0.0004632030962384388, "loss": 3.1672, "step": 3719 }, { "epoch": 1.08, "learning_rate": 0.0004629691491624243, "loss": 3.2779, "step": 3720 }, { "epoch": 1.08, "learning_rate": 0.00046273521023781596, "loss": 3.172, "step": 3721 }, { "epoch": 1.08, "learning_rate": 0.0004625012795161097, "loss": 3.1836, "step": 3722 }, { "epoch": 1.08, "learning_rate": 0.0004622673570487992, "loss": 3.3293, "step": 3723 }, { "epoch": 1.08, "learning_rate": 0.000462033442887377, "loss": 3.2018, "step": 3724 }, { "epoch": 1.08, "learning_rate": 0.0004617995370833329, "loss": 3.3641, "step": 3725 }, { "epoch": 1.08, "learning_rate": 0.00046156563968815573, "loss": 3.2679, "step": 3726 }, { "epoch": 1.08, "learning_rate": 0.00046133175075333183, "loss": 3.191, "step": 3727 }, { "epoch": 1.08, "learning_rate": 0.00046109787033034636, "loss": 3.2499, "step": 3728 }, { "epoch": 1.08, "learning_rate": 0.00046086399847068184, "loss": 3.2342, "step": 3729 }, { "epoch": 1.08, "learning_rate": 0.00046063013522581916, "loss": 3.1732, "step": 3730 }, { "epoch": 1.08, "learning_rate": 0.0004603962806472376, "loss": 3.2129, "step": 3731 }, { "epoch": 1.08, "learning_rate": 0.00046016243478641425, "loss": 3.1685, "step": 3732 }, { "epoch": 1.08, "learning_rate": 0.00045992859769482453, "loss": 3.0929, "step": 3733 }, { "epoch": 1.08, "learning_rate": 0.0004596947694239415, "loss": 3.1812, "step": 3734 }, { "epoch": 1.08, "learning_rate": 0.0004594609500252368, "loss": 3.1994, "step": 3735 }, { "epoch": 1.08, "learning_rate": 0.00045922713955017967, "loss": 3.087, "step": 3736 }, { "epoch": 1.08, "learning_rate": 0.00045899333805023786, "loss": 3.1594, "step": 3737 }, { "epoch": 1.08, "learning_rate": 0.0004587595455768765, "loss": 3.1735, "step": 3738 }, { "epoch": 1.08, "learning_rate": 0.0004585257621815593, "loss": 3.1604, "step": 3739 }, { "epoch": 1.08, "learning_rate": 0.0004582919879157479, "loss": 3.2697, "step": 3740 }, { "epoch": 1.08, "learning_rate": 0.00045805822283090137, "loss": 3.2114, "step": 3741 }, { "epoch": 1.08, "learning_rate": 0.0004578244669784777, "loss": 3.0601, "step": 3742 }, { "epoch": 1.08, "learning_rate": 0.00045759072040993187, "loss": 3.1277, "step": 3743 }, { "epoch": 1.08, "learning_rate": 0.0004573569831767176, "loss": 3.2905, "step": 3744 }, { "epoch": 1.08, "learning_rate": 0.00045712325533028603, "loss": 3.2718, "step": 3745 }, { "epoch": 1.08, "learning_rate": 0.0004568895369220868, "loss": 3.278, "step": 3746 }, { "epoch": 1.09, "learning_rate": 0.0004566558280035667, "loss": 3.1588, "step": 3747 }, { "epoch": 1.09, "learning_rate": 0.00045642212862617086, "loss": 3.1912, "step": 3748 }, { "epoch": 1.09, "learning_rate": 0.00045618843884134277, "loss": 3.1865, "step": 3749 }, { "epoch": 1.09, "learning_rate": 0.0004559547587005227, "loss": 3.1859, "step": 3750 }, { "epoch": 1.09, "learning_rate": 0.00045572108825514996, "loss": 3.1617, "step": 3751 }, { "epoch": 1.09, "learning_rate": 0.00045548742755666073, "loss": 3.2566, "step": 3752 }, { "epoch": 1.09, "learning_rate": 0.00045525377665648985, "loss": 3.2713, "step": 3753 }, { "epoch": 1.09, "learning_rate": 0.0004550201356060695, "loss": 3.1732, "step": 3754 }, { "epoch": 1.09, "learning_rate": 0.00045478650445683014, "loss": 3.1581, "step": 3755 }, { "epoch": 1.09, "learning_rate": 0.00045455288326019935, "loss": 3.1202, "step": 3756 }, { "epoch": 1.09, "learning_rate": 0.00045431927206760305, "loss": 3.1627, "step": 3757 }, { "epoch": 1.09, "learning_rate": 0.0004540856709304651, "loss": 3.1361, "step": 3758 }, { "epoch": 1.09, "learning_rate": 0.0004538520799002065, "loss": 3.0894, "step": 3759 }, { "epoch": 1.09, "learning_rate": 0.00045361849902824676, "loss": 3.2309, "step": 3760 }, { "epoch": 1.09, "learning_rate": 0.0004533849283660025, "loss": 3.0817, "step": 3761 }, { "epoch": 1.09, "learning_rate": 0.00045315136796488876, "loss": 3.0932, "step": 3762 }, { "epoch": 1.09, "learning_rate": 0.00045291781787631756, "loss": 3.2852, "step": 3763 }, { "epoch": 1.09, "learning_rate": 0.00045268427815169955, "loss": 3.2032, "step": 3764 }, { "epoch": 1.09, "learning_rate": 0.0004524507488424421, "loss": 3.1599, "step": 3765 }, { "epoch": 1.09, "learning_rate": 0.000452217229999951, "loss": 3.2816, "step": 3766 }, { "epoch": 1.09, "learning_rate": 0.0004519837216756296, "loss": 3.2345, "step": 3767 }, { "epoch": 1.09, "learning_rate": 0.0004517502239208786, "loss": 3.3493, "step": 3768 }, { "epoch": 1.09, "learning_rate": 0.00045151673678709694, "loss": 3.1288, "step": 3769 }, { "epoch": 1.09, "learning_rate": 0.0004512832603256807, "loss": 3.2525, "step": 3770 }, { "epoch": 1.09, "learning_rate": 0.00045104979458802404, "loss": 3.2619, "step": 3771 }, { "epoch": 1.09, "learning_rate": 0.00045081633962551823, "loss": 3.2968, "step": 3772 }, { "epoch": 1.09, "learning_rate": 0.0004505828954895529, "loss": 3.0708, "step": 3773 }, { "epoch": 1.09, "learning_rate": 0.00045034946223151447, "loss": 3.2054, "step": 3774 }, { "epoch": 1.09, "learning_rate": 0.0004501160399027874, "loss": 3.1687, "step": 3775 }, { "epoch": 1.09, "learning_rate": 0.0004498826285547542, "loss": 3.3402, "step": 3776 }, { "epoch": 1.09, "learning_rate": 0.00044964922823879385, "loss": 3.2077, "step": 3777 }, { "epoch": 1.09, "learning_rate": 0.000449415839006284, "loss": 3.2226, "step": 3778 }, { "epoch": 1.09, "learning_rate": 0.00044918246090859906, "loss": 3.1346, "step": 3779 }, { "epoch": 1.09, "learning_rate": 0.00044894909399711166, "loss": 3.1409, "step": 3780 }, { "epoch": 1.1, "learning_rate": 0.00044871573832319124, "loss": 3.2565, "step": 3781 }, { "epoch": 1.1, "learning_rate": 0.0004484823939382056, "loss": 3.1807, "step": 3782 }, { "epoch": 1.1, "learning_rate": 0.0004482490608935193, "loss": 3.209, "step": 3783 }, { "epoch": 1.1, "learning_rate": 0.0004480157392404947, "loss": 3.2073, "step": 3784 }, { "epoch": 1.1, "learning_rate": 0.0004477824290304919, "loss": 3.1481, "step": 3785 }, { "epoch": 1.1, "learning_rate": 0.00044754913031486794, "loss": 3.1632, "step": 3786 }, { "epoch": 1.1, "learning_rate": 0.0004473158431449779, "loss": 3.0496, "step": 3787 }, { "epoch": 1.1, "learning_rate": 0.0004470825675721738, "loss": 3.2307, "step": 3788 }, { "epoch": 1.1, "learning_rate": 0.00044684930364780577, "loss": 3.2482, "step": 3789 }, { "epoch": 1.1, "learning_rate": 0.0004466160514232206, "loss": 3.1386, "step": 3790 }, { "epoch": 1.1, "learning_rate": 0.000446382810949763, "loss": 3.3337, "step": 3791 }, { "epoch": 1.1, "learning_rate": 0.0004461495822787751, "loss": 3.2076, "step": 3792 }, { "epoch": 1.1, "learning_rate": 0.00044591636546159597, "loss": 3.0852, "step": 3793 }, { "epoch": 1.1, "learning_rate": 0.00044568316054956284, "loss": 3.0665, "step": 3794 }, { "epoch": 1.1, "learning_rate": 0.00044544996759400936, "loss": 3.2644, "step": 3795 }, { "epoch": 1.1, "learning_rate": 0.00044521678664626745, "loss": 3.284, "step": 3796 }, { "epoch": 1.1, "learning_rate": 0.0004449836177576658, "loss": 3.2861, "step": 3797 }, { "epoch": 1.1, "learning_rate": 0.000444750460979531, "loss": 3.2307, "step": 3798 }, { "epoch": 1.1, "learning_rate": 0.00044451731636318615, "loss": 3.2224, "step": 3799 }, { "epoch": 1.1, "learning_rate": 0.00044428418395995245, "loss": 3.1316, "step": 3800 }, { "epoch": 1.1, "learning_rate": 0.00044405106382114796, "loss": 3.2926, "step": 3801 }, { "epoch": 1.1, "learning_rate": 0.0004438179559980884, "loss": 3.2539, "step": 3802 }, { "epoch": 1.1, "learning_rate": 0.0004435848605420865, "loss": 3.1522, "step": 3803 }, { "epoch": 1.1, "learning_rate": 0.0004433517775044521, "loss": 3.2026, "step": 3804 }, { "epoch": 1.1, "learning_rate": 0.00044311870693649263, "loss": 3.2937, "step": 3805 }, { "epoch": 1.1, "learning_rate": 0.00044288564888951277, "loss": 3.2174, "step": 3806 }, { "epoch": 1.1, "learning_rate": 0.00044265260341481456, "loss": 3.2023, "step": 3807 }, { "epoch": 1.1, "learning_rate": 0.00044241957056369665, "loss": 3.0363, "step": 3808 }, { "epoch": 1.1, "learning_rate": 0.00044218655038745557, "loss": 3.2262, "step": 3809 }, { "epoch": 1.1, "learning_rate": 0.0004419535429373848, "loss": 3.1189, "step": 3810 }, { "epoch": 1.1, "learning_rate": 0.00044172054826477526, "loss": 3.1864, "step": 3811 }, { "epoch": 1.1, "learning_rate": 0.00044148756642091447, "loss": 3.304, "step": 3812 }, { "epoch": 1.1, "learning_rate": 0.0004412545974570876, "loss": 3.2396, "step": 3813 }, { "epoch": 1.1, "learning_rate": 0.00044102164142457707, "loss": 3.3437, "step": 3814 }, { "epoch": 1.1, "learning_rate": 0.000440788698374662, "loss": 3.2134, "step": 3815 }, { "epoch": 1.11, "learning_rate": 0.0004405557683586192, "loss": 3.2442, "step": 3816 }, { "epoch": 1.11, "learning_rate": 0.000440322851427722, "loss": 3.2183, "step": 3817 }, { "epoch": 1.11, "learning_rate": 0.00044008994763324147, "loss": 3.405, "step": 3818 }, { "epoch": 1.11, "learning_rate": 0.00043985705702644524, "loss": 3.1572, "step": 3819 }, { "epoch": 1.11, "learning_rate": 0.00043962417965859866, "loss": 3.142, "step": 3820 }, { "epoch": 1.11, "learning_rate": 0.00043939131558096346, "loss": 3.1039, "step": 3821 }, { "epoch": 1.11, "learning_rate": 0.0004391584648447988, "loss": 3.2459, "step": 3822 }, { "epoch": 1.11, "learning_rate": 0.00043892562750136115, "loss": 3.1952, "step": 3823 }, { "epoch": 1.11, "learning_rate": 0.00043869280360190347, "loss": 3.0562, "step": 3824 }, { "epoch": 1.11, "learning_rate": 0.0004384599931976764, "loss": 3.1578, "step": 3825 }, { "epoch": 1.11, "learning_rate": 0.0004382271963399268, "loss": 3.323, "step": 3826 }, { "epoch": 1.11, "learning_rate": 0.0004379944130798994, "loss": 3.174, "step": 3827 }, { "epoch": 1.11, "learning_rate": 0.0004377616434688353, "loss": 3.1091, "step": 3828 }, { "epoch": 1.11, "learning_rate": 0.0004375288875579732, "loss": 3.0464, "step": 3829 }, { "epoch": 1.11, "learning_rate": 0.0004372961453985481, "loss": 3.3275, "step": 3830 }, { "epoch": 1.11, "learning_rate": 0.0004370634170417924, "loss": 3.2356, "step": 3831 }, { "epoch": 1.11, "learning_rate": 0.00043683070253893553, "loss": 3.1687, "step": 3832 }, { "epoch": 1.11, "learning_rate": 0.00043659800194120334, "loss": 3.1444, "step": 3833 }, { "epoch": 1.11, "learning_rate": 0.00043636531529981943, "loss": 3.1932, "step": 3834 }, { "epoch": 1.11, "learning_rate": 0.0004361326426660035, "loss": 3.4213, "step": 3835 }, { "epoch": 1.11, "learning_rate": 0.0004358999840909728, "loss": 3.0843, "step": 3836 }, { "epoch": 1.11, "learning_rate": 0.000435667339625941, "loss": 3.2171, "step": 3837 }, { "epoch": 1.11, "learning_rate": 0.0004354347093221194, "loss": 3.2208, "step": 3838 }, { "epoch": 1.11, "learning_rate": 0.00043520209323071507, "loss": 3.1032, "step": 3839 }, { "epoch": 1.11, "learning_rate": 0.00043496949140293283, "loss": 3.249, "step": 3840 }, { "epoch": 1.11, "learning_rate": 0.00043473690388997434, "loss": 3.2737, "step": 3841 }, { "epoch": 1.11, "learning_rate": 0.00043450433074303734, "loss": 3.2754, "step": 3842 }, { "epoch": 1.11, "learning_rate": 0.0004342717720133174, "loss": 3.2744, "step": 3843 }, { "epoch": 1.11, "learning_rate": 0.0004340392277520061, "loss": 3.3137, "step": 3844 }, { "epoch": 1.11, "learning_rate": 0.0004338066980102926, "loss": 3.2754, "step": 3845 }, { "epoch": 1.11, "learning_rate": 0.000433574182839362, "loss": 3.2811, "step": 3846 }, { "epoch": 1.11, "learning_rate": 0.00043334168229039704, "loss": 3.2642, "step": 3847 }, { "epoch": 1.11, "learning_rate": 0.0004331091964145766, "loss": 3.0624, "step": 3848 }, { "epoch": 1.11, "learning_rate": 0.00043287672526307644, "loss": 3.2067, "step": 3849 }, { "epoch": 1.11, "learning_rate": 0.00043264426888706966, "loss": 3.3398, "step": 3850 }, { "epoch": 1.12, "learning_rate": 0.0004324118273377251, "loss": 3.236, "step": 3851 }, { "epoch": 1.12, "learning_rate": 0.0004321794006662093, "loss": 3.1633, "step": 3852 }, { "epoch": 1.12, "learning_rate": 0.00043194698892368497, "loss": 3.1391, "step": 3853 }, { "epoch": 1.12, "learning_rate": 0.0004317145921613119, "loss": 3.0568, "step": 3854 }, { "epoch": 1.12, "learning_rate": 0.0004314822104302459, "loss": 3.2879, "step": 3855 }, { "epoch": 1.12, "learning_rate": 0.00043124984378164046, "loss": 3.1896, "step": 3856 }, { "epoch": 1.12, "learning_rate": 0.0004310174922666449, "loss": 3.2834, "step": 3857 }, { "epoch": 1.12, "learning_rate": 0.0004307851559364055, "loss": 3.2652, "step": 3858 }, { "epoch": 1.12, "learning_rate": 0.0004305528348420656, "loss": 3.1892, "step": 3859 }, { "epoch": 1.12, "learning_rate": 0.00043032052903476433, "loss": 3.229, "step": 3860 }, { "epoch": 1.12, "learning_rate": 0.0004300882385656383, "loss": 3.2305, "step": 3861 }, { "epoch": 1.12, "learning_rate": 0.00042985596348582014, "loss": 3.203, "step": 3862 }, { "epoch": 1.12, "learning_rate": 0.0004296237038464397, "loss": 3.2652, "step": 3863 }, { "epoch": 1.12, "learning_rate": 0.00042939145969862264, "loss": 3.1965, "step": 3864 }, { "epoch": 1.12, "learning_rate": 0.000429159231093492, "loss": 3.163, "step": 3865 }, { "epoch": 1.12, "learning_rate": 0.00042892701808216696, "loss": 3.1824, "step": 3866 }, { "epoch": 1.12, "learning_rate": 0.00042869482071576306, "loss": 3.0142, "step": 3867 }, { "epoch": 1.12, "learning_rate": 0.00042846263904539305, "loss": 3.2287, "step": 3868 }, { "epoch": 1.12, "learning_rate": 0.0004282304731221656, "loss": 3.2401, "step": 3869 }, { "epoch": 1.12, "learning_rate": 0.0004279983229971863, "loss": 3.1882, "step": 3870 }, { "epoch": 1.12, "learning_rate": 0.00042776618872155715, "loss": 3.3685, "step": 3871 }, { "epoch": 1.12, "learning_rate": 0.0004275340703463767, "loss": 3.2122, "step": 3872 }, { "epoch": 1.12, "learning_rate": 0.0004273019679227397, "loss": 3.232, "step": 3873 }, { "epoch": 1.12, "learning_rate": 0.00042706988150173784, "loss": 3.1538, "step": 3874 }, { "epoch": 1.12, "learning_rate": 0.00042683781113445925, "loss": 3.0751, "step": 3875 }, { "epoch": 1.12, "learning_rate": 0.00042660575687198787, "loss": 3.2469, "step": 3876 }, { "epoch": 1.12, "learning_rate": 0.00042637371876540507, "loss": 3.2119, "step": 3877 }, { "epoch": 1.12, "learning_rate": 0.00042614169686578773, "loss": 3.2187, "step": 3878 }, { "epoch": 1.12, "learning_rate": 0.00042590969122420995, "loss": 3.1348, "step": 3879 }, { "epoch": 1.12, "learning_rate": 0.0004256777018917418, "loss": 3.1702, "step": 3880 }, { "epoch": 1.12, "learning_rate": 0.00042544572891945, "loss": 3.0849, "step": 3881 }, { "epoch": 1.12, "learning_rate": 0.0004252137723583972, "loss": 3.167, "step": 3882 }, { "epoch": 1.12, "learning_rate": 0.0004249818322596431, "loss": 3.1909, "step": 3883 }, { "epoch": 1.12, "learning_rate": 0.00042474990867424347, "loss": 3.3824, "step": 3884 }, { "epoch": 1.13, "learning_rate": 0.00042451800165325005, "loss": 3.1535, "step": 3885 }, { "epoch": 1.13, "learning_rate": 0.00042428611124771184, "loss": 3.2029, "step": 3886 }, { "epoch": 1.13, "learning_rate": 0.0004240542375086731, "loss": 3.207, "step": 3887 }, { "epoch": 1.13, "learning_rate": 0.0004238223804871754, "loss": 3.1492, "step": 3888 }, { "epoch": 1.13, "learning_rate": 0.00042359054023425593, "loss": 3.1949, "step": 3889 }, { "epoch": 1.13, "learning_rate": 0.0004233587168009488, "loss": 3.0165, "step": 3890 }, { "epoch": 1.13, "learning_rate": 0.0004231269102382837, "loss": 3.0848, "step": 3891 }, { "epoch": 1.13, "learning_rate": 0.00042289512059728727, "loss": 3.1501, "step": 3892 }, { "epoch": 1.13, "learning_rate": 0.0004226633479289819, "loss": 3.0703, "step": 3893 }, { "epoch": 1.13, "learning_rate": 0.00042243159228438687, "loss": 3.2244, "step": 3894 }, { "epoch": 1.13, "learning_rate": 0.0004221998537145169, "loss": 3.0597, "step": 3895 }, { "epoch": 1.13, "learning_rate": 0.00042196813227038355, "loss": 3.1418, "step": 3896 }, { "epoch": 1.13, "learning_rate": 0.0004217364280029946, "loss": 3.0592, "step": 3897 }, { "epoch": 1.13, "learning_rate": 0.00042150474096335356, "loss": 3.3565, "step": 3898 }, { "epoch": 1.13, "learning_rate": 0.0004212730712024609, "loss": 3.2946, "step": 3899 }, { "epoch": 1.13, "learning_rate": 0.0004210414187713124, "loss": 3.2278, "step": 3900 }, { "epoch": 1.13, "learning_rate": 0.00042080978372090086, "loss": 3.3059, "step": 3901 }, { "epoch": 1.13, "learning_rate": 0.0004205781661022146, "loss": 3.2639, "step": 3902 }, { "epoch": 1.13, "learning_rate": 0.00042034656596623876, "loss": 3.2645, "step": 3903 }, { "epoch": 1.13, "learning_rate": 0.00042011498336395393, "loss": 3.2686, "step": 3904 }, { "epoch": 1.13, "learning_rate": 0.00041988341834633714, "loss": 3.2458, "step": 3905 }, { "epoch": 1.13, "learning_rate": 0.000419651870964362, "loss": 3.1302, "step": 3906 }, { "epoch": 1.13, "learning_rate": 0.0004194203412689972, "loss": 3.1789, "step": 3907 }, { "epoch": 1.13, "learning_rate": 0.0004191888293112087, "loss": 3.2982, "step": 3908 }, { "epoch": 1.13, "learning_rate": 0.00041895733514195766, "loss": 3.268, "step": 3909 }, { "epoch": 1.13, "learning_rate": 0.00041872585881220186, "loss": 3.1106, "step": 3910 }, { "epoch": 1.13, "learning_rate": 0.00041849440037289477, "loss": 3.1786, "step": 3911 }, { "epoch": 1.13, "learning_rate": 0.00041826295987498654, "loss": 3.2415, "step": 3912 }, { "epoch": 1.13, "learning_rate": 0.00041803153736942253, "loss": 3.2482, "step": 3913 }, { "epoch": 1.13, "learning_rate": 0.0004178001329071445, "loss": 3.1382, "step": 3914 }, { "epoch": 1.13, "learning_rate": 0.0004175687465390908, "loss": 3.2586, "step": 3915 }, { "epoch": 1.13, "learning_rate": 0.00041733737831619466, "loss": 3.0783, "step": 3916 }, { "epoch": 1.13, "learning_rate": 0.0004171060282893865, "loss": 3.0757, "step": 3917 }, { "epoch": 1.13, "learning_rate": 0.0004168746965095918, "loss": 3.1406, "step": 3918 }, { "epoch": 1.13, "learning_rate": 0.00041664338302773276, "loss": 3.2691, "step": 3919 }, { "epoch": 1.14, "learning_rate": 0.0004164120878947267, "loss": 3.1951, "step": 3920 }, { "epoch": 1.14, "learning_rate": 0.000416180811161488, "loss": 3.2827, "step": 3921 }, { "epoch": 1.14, "learning_rate": 0.000415949552878926, "loss": 3.2308, "step": 3922 }, { "epoch": 1.14, "learning_rate": 0.0004157183130979463, "loss": 3.3029, "step": 3923 }, { "epoch": 1.14, "learning_rate": 0.0004154870918694509, "loss": 3.2606, "step": 3924 }, { "epoch": 1.14, "learning_rate": 0.0004152558892443368, "loss": 3.2146, "step": 3925 }, { "epoch": 1.14, "learning_rate": 0.00041502470527349786, "loss": 3.2293, "step": 3926 }, { "epoch": 1.14, "learning_rate": 0.00041479354000782307, "loss": 3.161, "step": 3927 }, { "epoch": 1.14, "learning_rate": 0.0004145623934981979, "loss": 3.2265, "step": 3928 }, { "epoch": 1.14, "learning_rate": 0.00041433126579550316, "loss": 3.2566, "step": 3929 }, { "epoch": 1.14, "learning_rate": 0.0004141001569506161, "loss": 3.2367, "step": 3930 }, { "epoch": 1.14, "learning_rate": 0.00041386906701440914, "loss": 3.1939, "step": 3931 }, { "epoch": 1.14, "learning_rate": 0.00041363799603775093, "loss": 3.2132, "step": 3932 }, { "epoch": 1.14, "learning_rate": 0.00041340694407150625, "loss": 3.0105, "step": 3933 }, { "epoch": 1.14, "learning_rate": 0.00041317591116653486, "loss": 3.1917, "step": 3934 }, { "epoch": 1.14, "learning_rate": 0.00041294489737369325, "loss": 3.179, "step": 3935 }, { "epoch": 1.14, "learning_rate": 0.0004127139027438329, "loss": 3.1875, "step": 3936 }, { "epoch": 1.14, "learning_rate": 0.0004124829273278019, "loss": 3.2307, "step": 3937 }, { "epoch": 1.14, "learning_rate": 0.0004122519711764431, "loss": 3.1339, "step": 3938 }, { "epoch": 1.14, "learning_rate": 0.0004120210343405963, "loss": 3.131, "step": 3939 }, { "epoch": 1.14, "learning_rate": 0.0004117901168710959, "loss": 3.1833, "step": 3940 }, { "epoch": 1.14, "learning_rate": 0.00041155921881877254, "loss": 3.298, "step": 3941 }, { "epoch": 1.14, "learning_rate": 0.00041132834023445305, "loss": 3.2729, "step": 3942 }, { "epoch": 1.14, "learning_rate": 0.000411097481168959, "loss": 3.1716, "step": 3943 }, { "epoch": 1.14, "learning_rate": 0.0004108666416731085, "loss": 3.1331, "step": 3944 }, { "epoch": 1.14, "learning_rate": 0.0004106358217977148, "loss": 3.2906, "step": 3945 }, { "epoch": 1.14, "learning_rate": 0.00041040502159358746, "loss": 3.1609, "step": 3946 }, { "epoch": 1.14, "learning_rate": 0.00041017424111153077, "loss": 3.2043, "step": 3947 }, { "epoch": 1.14, "learning_rate": 0.00040994348040234574, "loss": 3.0853, "step": 3948 }, { "epoch": 1.14, "learning_rate": 0.0004097127395168283, "loss": 3.3268, "step": 3949 }, { "epoch": 1.14, "learning_rate": 0.00040948201850577007, "loss": 3.097, "step": 3950 }, { "epoch": 1.14, "learning_rate": 0.0004092513174199587, "loss": 3.21, "step": 3951 }, { "epoch": 1.14, "learning_rate": 0.0004090206363101769, "loss": 3.1494, "step": 3952 }, { "epoch": 1.14, "learning_rate": 0.0004087899752272037, "loss": 3.1524, "step": 3953 }, { "epoch": 1.15, "learning_rate": 0.0004085593342218129, "loss": 3.1724, "step": 3954 }, { "epoch": 1.15, "learning_rate": 0.00040832871334477465, "loss": 3.0849, "step": 3955 }, { "epoch": 1.15, "learning_rate": 0.00040809811264685405, "loss": 3.1859, "step": 3956 }, { "epoch": 1.15, "learning_rate": 0.0004078675321788121, "loss": 3.3641, "step": 3957 }, { "epoch": 1.15, "learning_rate": 0.0004076369719914055, "loss": 3.2084, "step": 3958 }, { "epoch": 1.15, "learning_rate": 0.0004074064321353857, "loss": 3.2282, "step": 3959 }, { "epoch": 1.15, "learning_rate": 0.00040717591266150086, "loss": 3.3031, "step": 3960 }, { "epoch": 1.15, "learning_rate": 0.0004069454136204934, "loss": 3.2404, "step": 3961 }, { "epoch": 1.15, "learning_rate": 0.00040671493506310233, "loss": 3.1711, "step": 3962 }, { "epoch": 1.15, "learning_rate": 0.0004064844770400614, "loss": 3.2755, "step": 3963 }, { "epoch": 1.15, "learning_rate": 0.0004062540396021005, "loss": 3.1002, "step": 3964 }, { "epoch": 1.15, "learning_rate": 0.0004060236227999441, "loss": 3.2025, "step": 3965 }, { "epoch": 1.15, "learning_rate": 0.00040579322668431297, "loss": 3.1782, "step": 3966 }, { "epoch": 1.15, "learning_rate": 0.00040556285130592304, "loss": 3.3421, "step": 3967 }, { "epoch": 1.15, "learning_rate": 0.00040533249671548526, "loss": 3.1477, "step": 3968 }, { "epoch": 1.15, "learning_rate": 0.0004051021629637068, "loss": 3.1892, "step": 3969 }, { "epoch": 1.15, "learning_rate": 0.00040487185010128945, "loss": 3.3576, "step": 3970 }, { "epoch": 1.15, "learning_rate": 0.0004046415581789312, "loss": 3.1145, "step": 3971 }, { "epoch": 1.15, "learning_rate": 0.0004044112872473246, "loss": 3.2326, "step": 3972 }, { "epoch": 1.15, "learning_rate": 0.0004041810373571582, "loss": 3.2882, "step": 3973 }, { "epoch": 1.15, "learning_rate": 0.00040395080855911553, "loss": 3.1865, "step": 3974 }, { "epoch": 1.15, "learning_rate": 0.0004037206009038758, "loss": 3.2908, "step": 3975 }, { "epoch": 1.15, "learning_rate": 0.00040349041444211344, "loss": 3.1152, "step": 3976 }, { "epoch": 1.15, "learning_rate": 0.0004032602492244979, "loss": 3.1498, "step": 3977 }, { "epoch": 1.15, "learning_rate": 0.00040303010530169447, "loss": 3.4095, "step": 3978 }, { "epoch": 1.15, "learning_rate": 0.0004027999827243635, "loss": 3.0859, "step": 3979 }, { "epoch": 1.15, "learning_rate": 0.0004025698815431607, "loss": 3.0825, "step": 3980 }, { "epoch": 1.15, "learning_rate": 0.00040233980180873685, "loss": 3.2347, "step": 3981 }, { "epoch": 1.15, "learning_rate": 0.0004021097435717385, "loss": 3.2511, "step": 3982 }, { "epoch": 1.15, "learning_rate": 0.0004018797068828068, "loss": 3.2395, "step": 3983 }, { "epoch": 1.15, "learning_rate": 0.0004016496917925787, "loss": 3.2503, "step": 3984 }, { "epoch": 1.15, "learning_rate": 0.0004014196983516862, "loss": 3.2179, "step": 3985 }, { "epoch": 1.15, "learning_rate": 0.0004011897266107567, "loss": 3.1806, "step": 3986 }, { "epoch": 1.15, "learning_rate": 0.0004009597766204124, "loss": 3.1287, "step": 3987 }, { "epoch": 1.15, "learning_rate": 0.000400729848431271, "loss": 3.284, "step": 3988 }, { "epoch": 1.16, "learning_rate": 0.0004004999420939456, "loss": 3.312, "step": 3989 }, { "epoch": 1.16, "learning_rate": 0.0004002700576590441, "loss": 3.3213, "step": 3990 }, { "epoch": 1.16, "learning_rate": 0.0004000401951771699, "loss": 3.3023, "step": 3991 }, { "epoch": 1.16, "learning_rate": 0.00039981035469892107, "loss": 3.2983, "step": 3992 }, { "epoch": 1.16, "learning_rate": 0.0003995805362748916, "loss": 3.236, "step": 3993 }, { "epoch": 1.16, "learning_rate": 0.0003993507399556699, "loss": 3.3492, "step": 3994 }, { "epoch": 1.16, "learning_rate": 0.0003991209657918402, "loss": 3.223, "step": 3995 }, { "epoch": 1.16, "learning_rate": 0.00039889121383398113, "loss": 3.1532, "step": 3996 }, { "epoch": 1.16, "learning_rate": 0.00039866148413266676, "loss": 3.1948, "step": 3997 }, { "epoch": 1.16, "learning_rate": 0.00039843177673846666, "loss": 3.2222, "step": 3998 }, { "epoch": 1.16, "learning_rate": 0.0003982020917019446, "loss": 3.2088, "step": 3999 }, { "epoch": 1.16, "learning_rate": 0.00039797242907366025, "loss": 3.1355, "step": 4000 }, { "epoch": 1.16, "learning_rate": 0.000397742788904168, "loss": 3.0896, "step": 4001 }, { "epoch": 1.16, "learning_rate": 0.00039751317124401745, "loss": 3.2837, "step": 4002 }, { "epoch": 1.16, "learning_rate": 0.0003972835761437528, "loss": 3.2005, "step": 4003 }, { "epoch": 1.16, "learning_rate": 0.00039705400365391396, "loss": 3.1852, "step": 4004 }, { "epoch": 1.16, "learning_rate": 0.0003968244538250353, "loss": 3.1617, "step": 4005 }, { "epoch": 1.16, "learning_rate": 0.0003965949267076464, "loss": 3.2276, "step": 4006 }, { "epoch": 1.16, "learning_rate": 0.00039636542235227206, "loss": 3.2348, "step": 4007 }, { "epoch": 1.16, "learning_rate": 0.00039613594080943154, "loss": 3.2677, "step": 4008 }, { "epoch": 1.16, "learning_rate": 0.0003959064821296398, "loss": 3.2916, "step": 4009 }, { "epoch": 1.16, "learning_rate": 0.0003956770463634059, "loss": 3.1391, "step": 4010 }, { "epoch": 1.16, "learning_rate": 0.0003954476335612349, "loss": 3.1435, "step": 4011 }, { "epoch": 1.16, "learning_rate": 0.00039521824377362566, "loss": 3.1791, "step": 4012 }, { "epoch": 1.16, "learning_rate": 0.0003949888770510731, "loss": 3.2232, "step": 4013 }, { "epoch": 1.16, "learning_rate": 0.000394759533444066, "loss": 3.1358, "step": 4014 }, { "epoch": 1.16, "learning_rate": 0.0003945302130030888, "loss": 3.1231, "step": 4015 }, { "epoch": 1.16, "learning_rate": 0.00039430091577862073, "loss": 3.2749, "step": 4016 }, { "epoch": 1.16, "learning_rate": 0.0003940716418211354, "loss": 3.2106, "step": 4017 }, { "epoch": 1.16, "learning_rate": 0.00039384239118110204, "loss": 3.2774, "step": 4018 }, { "epoch": 1.16, "learning_rate": 0.00039361316390898416, "loss": 3.2353, "step": 4019 }, { "epoch": 1.16, "learning_rate": 0.0003933839600552407, "loss": 3.2145, "step": 4020 }, { "epoch": 1.16, "learning_rate": 0.00039315477967032456, "loss": 3.2081, "step": 4021 }, { "epoch": 1.16, "learning_rate": 0.0003929256228046845, "loss": 3.2771, "step": 4022 }, { "epoch": 1.17, "learning_rate": 0.00039269648950876343, "loss": 3.1912, "step": 4023 }, { "epoch": 1.17, "learning_rate": 0.00039246737983299906, "loss": 3.2578, "step": 4024 }, { "epoch": 1.17, "learning_rate": 0.0003922382938278245, "loss": 3.3033, "step": 4025 }, { "epoch": 1.17, "learning_rate": 0.00039200923154366686, "loss": 3.1755, "step": 4026 }, { "epoch": 1.17, "learning_rate": 0.0003917801930309486, "loss": 3.1325, "step": 4027 }, { "epoch": 1.17, "learning_rate": 0.0003915511783400867, "loss": 3.1023, "step": 4028 }, { "epoch": 1.17, "learning_rate": 0.00039132218752149317, "loss": 3.2588, "step": 4029 }, { "epoch": 1.17, "learning_rate": 0.00039109322062557425, "loss": 3.1979, "step": 4030 }, { "epoch": 1.17, "learning_rate": 0.00039086427770273135, "loss": 3.1973, "step": 4031 }, { "epoch": 1.17, "learning_rate": 0.00039063535880336063, "loss": 3.298, "step": 4032 }, { "epoch": 1.17, "learning_rate": 0.00039040646397785244, "loss": 3.1923, "step": 4033 }, { "epoch": 1.17, "learning_rate": 0.00039017759327659264, "loss": 3.178, "step": 4034 }, { "epoch": 1.17, "learning_rate": 0.00038994874674996085, "loss": 3.3328, "step": 4035 }, { "epoch": 1.17, "learning_rate": 0.00038971992444833213, "loss": 3.0656, "step": 4036 }, { "epoch": 1.17, "learning_rate": 0.0003894911264220758, "loss": 3.1726, "step": 4037 }, { "epoch": 1.17, "learning_rate": 0.00038926235272155636, "loss": 3.1708, "step": 4038 }, { "epoch": 1.17, "learning_rate": 0.000389033603397132, "loss": 3.17, "step": 4039 }, { "epoch": 1.17, "learning_rate": 0.0003888048784991565, "loss": 3.2044, "step": 4040 }, { "epoch": 1.17, "learning_rate": 0.0003885761780779777, "loss": 3.2153, "step": 4041 }, { "epoch": 1.17, "learning_rate": 0.00038834750218393814, "loss": 3.1924, "step": 4042 }, { "epoch": 1.17, "learning_rate": 0.00038811885086737543, "loss": 3.1763, "step": 4043 }, { "epoch": 1.17, "learning_rate": 0.00038789022417862076, "loss": 3.1785, "step": 4044 }, { "epoch": 1.17, "learning_rate": 0.000387661622168001, "loss": 3.1306, "step": 4045 }, { "epoch": 1.17, "learning_rate": 0.00038743304488583687, "loss": 3.1417, "step": 4046 }, { "epoch": 1.17, "learning_rate": 0.00038720449238244416, "loss": 3.1771, "step": 4047 }, { "epoch": 1.17, "learning_rate": 0.00038697596470813257, "loss": 3.2698, "step": 4048 }, { "epoch": 1.17, "learning_rate": 0.00038674746191320696, "loss": 3.1484, "step": 4049 }, { "epoch": 1.17, "learning_rate": 0.00038651898404796646, "loss": 3.1724, "step": 4050 }, { "epoch": 1.17, "learning_rate": 0.0003862905311627044, "loss": 3.1, "step": 4051 }, { "epoch": 1.17, "learning_rate": 0.00038606210330770923, "loss": 3.0924, "step": 4052 }, { "epoch": 1.17, "learning_rate": 0.0003858337005332634, "loss": 3.2093, "step": 4053 }, { "epoch": 1.17, "learning_rate": 0.0003856053228896442, "loss": 3.2267, "step": 4054 }, { "epoch": 1.17, "learning_rate": 0.0003853769704271228, "loss": 3.2123, "step": 4055 }, { "epoch": 1.17, "learning_rate": 0.00038514864319596576, "loss": 3.2561, "step": 4056 }, { "epoch": 1.17, "learning_rate": 0.00038492034124643304, "loss": 3.1676, "step": 4057 }, { "epoch": 1.18, "learning_rate": 0.00038469206462877993, "loss": 3.1073, "step": 4058 }, { "epoch": 1.18, "learning_rate": 0.0003844638133932557, "loss": 3.2115, "step": 4059 }, { "epoch": 1.18, "learning_rate": 0.0003842355875901038, "loss": 3.2581, "step": 4060 }, { "epoch": 1.18, "learning_rate": 0.0003840073872695627, "loss": 3.2943, "step": 4061 }, { "epoch": 1.18, "learning_rate": 0.0003837792124818646, "loss": 3.2224, "step": 4062 }, { "epoch": 1.18, "learning_rate": 0.000383551063277237, "loss": 3.2126, "step": 4063 }, { "epoch": 1.18, "learning_rate": 0.0003833229397059005, "loss": 3.1695, "step": 4064 }, { "epoch": 1.18, "learning_rate": 0.0003830948418180713, "loss": 3.2221, "step": 4065 }, { "epoch": 1.18, "learning_rate": 0.00038286676966395897, "loss": 3.3321, "step": 4066 }, { "epoch": 1.18, "learning_rate": 0.000382638723293768, "loss": 3.3215, "step": 4067 }, { "epoch": 1.18, "learning_rate": 0.00038241070275769727, "loss": 3.2907, "step": 4068 }, { "epoch": 1.18, "learning_rate": 0.00038218270810593924, "loss": 3.1451, "step": 4069 }, { "epoch": 1.18, "learning_rate": 0.00038195473938868157, "loss": 3.1538, "step": 4070 }, { "epoch": 1.18, "learning_rate": 0.00038172679665610554, "loss": 3.1464, "step": 4071 }, { "epoch": 1.18, "learning_rate": 0.0003814988799583874, "loss": 3.1446, "step": 4072 }, { "epoch": 1.18, "learning_rate": 0.00038127098934569674, "loss": 3.1271, "step": 4073 }, { "epoch": 1.18, "learning_rate": 0.0003810431248681983, "loss": 3.1067, "step": 4074 }, { "epoch": 1.18, "learning_rate": 0.00038081528657605044, "loss": 3.2711, "step": 4075 }, { "epoch": 1.18, "learning_rate": 0.0003805874745194063, "loss": 3.3308, "step": 4076 }, { "epoch": 1.18, "learning_rate": 0.00038035968874841256, "loss": 3.1709, "step": 4077 }, { "epoch": 1.18, "learning_rate": 0.00038013192931321093, "loss": 3.2316, "step": 4078 }, { "epoch": 1.18, "learning_rate": 0.00037990419626393655, "loss": 3.1083, "step": 4079 }, { "epoch": 1.18, "learning_rate": 0.0003796764896507192, "loss": 3.1901, "step": 4080 }, { "epoch": 1.18, "learning_rate": 0.0003794488095236829, "loss": 3.1927, "step": 4081 }, { "epoch": 1.18, "learning_rate": 0.00037922115593294533, "loss": 3.114, "step": 4082 }, { "epoch": 1.18, "learning_rate": 0.000378993528928619, "loss": 3.2058, "step": 4083 }, { "epoch": 1.18, "learning_rate": 0.00037876592856081, "loss": 3.1745, "step": 4084 }, { "epoch": 1.18, "learning_rate": 0.0003785383548796191, "loss": 3.2863, "step": 4085 }, { "epoch": 1.18, "learning_rate": 0.0003783108079351406, "loss": 3.2427, "step": 4086 }, { "epoch": 1.18, "learning_rate": 0.0003780832877774635, "loss": 3.179, "step": 4087 }, { "epoch": 1.18, "learning_rate": 0.0003778557944566704, "loss": 3.1784, "step": 4088 }, { "epoch": 1.18, "learning_rate": 0.0003776283280228381, "loss": 3.1608, "step": 4089 }, { "epoch": 1.18, "learning_rate": 0.00037740088852603797, "loss": 3.1194, "step": 4090 }, { "epoch": 1.18, "learning_rate": 0.00037717347601633466, "loss": 3.2013, "step": 4091 }, { "epoch": 1.19, "learning_rate": 0.00037694609054378755, "loss": 3.1917, "step": 4092 }, { "epoch": 1.19, "learning_rate": 0.00037671873215844965, "loss": 3.1769, "step": 4093 }, { "epoch": 1.19, "learning_rate": 0.00037649140091036845, "loss": 3.2586, "step": 4094 }, { "epoch": 1.19, "learning_rate": 0.00037626409684958474, "loss": 3.2098, "step": 4095 }, { "epoch": 1.19, "learning_rate": 0.0003760368200261342, "loss": 3.1913, "step": 4096 }, { "epoch": 1.19, "learning_rate": 0.00037580957049004583, "loss": 3.1873, "step": 4097 }, { "epoch": 1.19, "learning_rate": 0.0003755823482913428, "loss": 3.17, "step": 4098 }, { "epoch": 1.19, "learning_rate": 0.00037535515348004265, "loss": 3.2382, "step": 4099 }, { "epoch": 1.19, "learning_rate": 0.0003751279861061562, "loss": 3.2544, "step": 4100 }, { "epoch": 1.19, "learning_rate": 0.0003749008462196889, "loss": 3.2093, "step": 4101 }, { "epoch": 1.19, "learning_rate": 0.00037467373387063967, "loss": 3.1262, "step": 4102 }, { "epoch": 1.19, "learning_rate": 0.00037444664910900186, "loss": 3.2361, "step": 4103 }, { "epoch": 1.19, "learning_rate": 0.00037421959198476195, "loss": 3.0531, "step": 4104 }, { "epoch": 1.19, "learning_rate": 0.0003739925625479013, "loss": 3.1786, "step": 4105 }, { "epoch": 1.19, "learning_rate": 0.00037376556084839466, "loss": 3.1437, "step": 4106 }, { "epoch": 1.19, "learning_rate": 0.0003735385869362102, "loss": 3.345, "step": 4107 }, { "epoch": 1.19, "learning_rate": 0.0003733116408613112, "loss": 3.1622, "step": 4108 }, { "epoch": 1.19, "learning_rate": 0.00037308472267365343, "loss": 3.1459, "step": 4109 }, { "epoch": 1.19, "learning_rate": 0.0003728578324231877, "loss": 3.2328, "step": 4110 }, { "epoch": 1.19, "learning_rate": 0.000372630970159858, "loss": 3.1024, "step": 4111 }, { "epoch": 1.19, "learning_rate": 0.00037240413593360244, "loss": 3.3138, "step": 4112 }, { "epoch": 1.19, "learning_rate": 0.0003721773297943525, "loss": 3.162, "step": 4113 }, { "epoch": 1.19, "learning_rate": 0.00037195055179203417, "loss": 3.289, "step": 4114 }, { "epoch": 1.19, "learning_rate": 0.00037172380197656693, "loss": 3.2632, "step": 4115 }, { "epoch": 1.19, "learning_rate": 0.0003714970803978636, "loss": 3.1751, "step": 4116 }, { "epoch": 1.19, "learning_rate": 0.0003712703871058317, "loss": 3.2248, "step": 4117 }, { "epoch": 1.19, "learning_rate": 0.0003710437221503716, "loss": 3.1523, "step": 4118 }, { "epoch": 1.19, "learning_rate": 0.0003708170855813782, "loss": 3.1638, "step": 4119 }, { "epoch": 1.19, "learning_rate": 0.0003705904774487396, "loss": 3.1285, "step": 4120 }, { "epoch": 1.19, "learning_rate": 0.00037036389780233813, "loss": 3.1076, "step": 4121 }, { "epoch": 1.19, "learning_rate": 0.0003701373466920492, "loss": 3.2363, "step": 4122 }, { "epoch": 1.19, "learning_rate": 0.00036991082416774256, "loss": 3.0701, "step": 4123 }, { "epoch": 1.19, "learning_rate": 0.0003696843302792815, "loss": 3.0883, "step": 4124 }, { "epoch": 1.19, "learning_rate": 0.0003694578650765226, "loss": 3.2974, "step": 4125 }, { "epoch": 1.19, "learning_rate": 0.0003692314286093167, "loss": 3.1886, "step": 4126 }, { "epoch": 1.2, "learning_rate": 0.0003690050209275079, "loss": 3.106, "step": 4127 }, { "epoch": 1.2, "learning_rate": 0.0003687786420809345, "loss": 3.2187, "step": 4128 }, { "epoch": 1.2, "learning_rate": 0.00036855229211942757, "loss": 3.1785, "step": 4129 }, { "epoch": 1.2, "learning_rate": 0.00036832597109281285, "loss": 3.0497, "step": 4130 }, { "epoch": 1.2, "learning_rate": 0.0003680996790509087, "loss": 3.0127, "step": 4131 }, { "epoch": 1.2, "learning_rate": 0.0003678734160435279, "loss": 3.0846, "step": 4132 }, { "epoch": 1.2, "learning_rate": 0.00036764718212047666, "loss": 3.1767, "step": 4133 }, { "epoch": 1.2, "learning_rate": 0.0003674209773315542, "loss": 3.102, "step": 4134 }, { "epoch": 1.2, "learning_rate": 0.00036719480172655417, "loss": 3.1547, "step": 4135 }, { "epoch": 1.2, "learning_rate": 0.0003669686553552632, "loss": 3.1923, "step": 4136 }, { "epoch": 1.2, "learning_rate": 0.000366742538267462, "loss": 3.3023, "step": 4137 }, { "epoch": 1.2, "learning_rate": 0.00036651645051292417, "loss": 3.1525, "step": 4138 }, { "epoch": 1.2, "learning_rate": 0.00036629039214141755, "loss": 3.2204, "step": 4139 }, { "epoch": 1.2, "learning_rate": 0.00036606436320270287, "loss": 3.1223, "step": 4140 }, { "epoch": 1.2, "learning_rate": 0.0003658383637465349, "loss": 3.3409, "step": 4141 }, { "epoch": 1.2, "learning_rate": 0.00036561239382266173, "loss": 3.1399, "step": 4142 }, { "epoch": 1.2, "learning_rate": 0.00036538645348082466, "loss": 3.2694, "step": 4143 }, { "epoch": 1.2, "learning_rate": 0.00036516054277075906, "loss": 3.268, "step": 4144 }, { "epoch": 1.2, "learning_rate": 0.00036493466174219313, "loss": 3.1023, "step": 4145 }, { "epoch": 1.2, "learning_rate": 0.0003647088104448494, "loss": 3.2836, "step": 4146 }, { "epoch": 1.2, "learning_rate": 0.0003644829889284428, "loss": 3.3069, "step": 4147 }, { "epoch": 1.2, "learning_rate": 0.0003642571972426826, "loss": 3.2055, "step": 4148 }, { "epoch": 1.2, "learning_rate": 0.0003640314354372708, "loss": 3.231, "step": 4149 }, { "epoch": 1.2, "learning_rate": 0.00036380570356190343, "loss": 3.177, "step": 4150 }, { "epoch": 1.2, "learning_rate": 0.0003635800016662696, "loss": 3.145, "step": 4151 }, { "epoch": 1.2, "learning_rate": 0.0003633543298000517, "loss": 3.1336, "step": 4152 }, { "epoch": 1.2, "learning_rate": 0.000363128688012926, "loss": 3.2226, "step": 4153 }, { "epoch": 1.2, "learning_rate": 0.00036290307635456143, "loss": 3.2366, "step": 4154 }, { "epoch": 1.2, "learning_rate": 0.00036267749487462126, "loss": 3.1169, "step": 4155 }, { "epoch": 1.2, "learning_rate": 0.00036245194362276095, "loss": 3.1966, "step": 4156 }, { "epoch": 1.2, "learning_rate": 0.0003622264226486304, "loss": 3.1723, "step": 4157 }, { "epoch": 1.2, "learning_rate": 0.0003620009320018719, "loss": 3.0829, "step": 4158 }, { "epoch": 1.2, "learning_rate": 0.000361775471732122, "loss": 3.2862, "step": 4159 }, { "epoch": 1.2, "learning_rate": 0.0003615500418890099, "loss": 3.1118, "step": 4160 }, { "epoch": 1.21, "learning_rate": 0.00036132464252215804, "loss": 3.1997, "step": 4161 }, { "epoch": 1.21, "learning_rate": 0.0003610992736811827, "loss": 3.2473, "step": 4162 }, { "epoch": 1.21, "learning_rate": 0.0003608739354156928, "loss": 3.0605, "step": 4163 }, { "epoch": 1.21, "learning_rate": 0.0003606486277752914, "loss": 3.2096, "step": 4164 }, { "epoch": 1.21, "learning_rate": 0.00036042335080957373, "loss": 3.1528, "step": 4165 }, { "epoch": 1.21, "learning_rate": 0.00036019810456812916, "loss": 3.2216, "step": 4166 }, { "epoch": 1.21, "learning_rate": 0.00035997288910053984, "loss": 3.1796, "step": 4167 }, { "epoch": 1.21, "learning_rate": 0.0003597477044563814, "loss": 3.2269, "step": 4168 }, { "epoch": 1.21, "learning_rate": 0.00035952255068522233, "loss": 3.3176, "step": 4169 }, { "epoch": 1.21, "learning_rate": 0.00035929742783662476, "loss": 3.2202, "step": 4170 }, { "epoch": 1.21, "learning_rate": 0.00035907233596014365, "loss": 3.241, "step": 4171 }, { "epoch": 1.21, "learning_rate": 0.0003588472751053272, "loss": 3.2567, "step": 4172 }, { "epoch": 1.21, "learning_rate": 0.00035862224532171727, "loss": 3.1592, "step": 4173 }, { "epoch": 1.21, "learning_rate": 0.00035839724665884796, "loss": 3.2479, "step": 4174 }, { "epoch": 1.21, "learning_rate": 0.00035817227916624744, "loss": 3.1329, "step": 4175 }, { "epoch": 1.21, "learning_rate": 0.0003579473428934364, "loss": 3.1178, "step": 4176 }, { "epoch": 1.21, "learning_rate": 0.0003577224378899291, "loss": 3.1493, "step": 4177 }, { "epoch": 1.21, "learning_rate": 0.00035749756420523236, "loss": 3.0414, "step": 4178 }, { "epoch": 1.21, "learning_rate": 0.0003572727218888467, "loss": 3.1915, "step": 4179 }, { "epoch": 1.21, "learning_rate": 0.0003570479109902656, "loss": 3.1355, "step": 4180 }, { "epoch": 1.21, "learning_rate": 0.00035682313155897507, "loss": 3.1475, "step": 4181 }, { "epoch": 1.21, "learning_rate": 0.00035659838364445503, "loss": 3.3453, "step": 4182 }, { "epoch": 1.21, "learning_rate": 0.0003563736672961777, "loss": 3.0462, "step": 4183 }, { "epoch": 1.21, "learning_rate": 0.00035614898256360896, "loss": 3.2848, "step": 4184 }, { "epoch": 1.21, "learning_rate": 0.0003559243294962073, "loss": 3.2593, "step": 4185 }, { "epoch": 1.21, "learning_rate": 0.00035569970814342477, "loss": 3.2589, "step": 4186 }, { "epoch": 1.21, "learning_rate": 0.0003554751185547056, "loss": 3.3488, "step": 4187 }, { "epoch": 1.21, "learning_rate": 0.00035525056077948794, "loss": 3.1833, "step": 4188 }, { "epoch": 1.21, "learning_rate": 0.0003550260348672023, "loss": 3.2401, "step": 4189 }, { "epoch": 1.21, "learning_rate": 0.00035480154086727227, "loss": 3.1203, "step": 4190 }, { "epoch": 1.21, "learning_rate": 0.00035457707882911497, "loss": 3.1669, "step": 4191 }, { "epoch": 1.21, "learning_rate": 0.0003543526488021394, "loss": 3.1721, "step": 4192 }, { "epoch": 1.21, "learning_rate": 0.00035412825083574865, "loss": 3.0992, "step": 4193 }, { "epoch": 1.21, "learning_rate": 0.0003539038849793381, "loss": 3.172, "step": 4194 }, { "epoch": 1.21, "learning_rate": 0.0003536795512822964, "loss": 3.1138, "step": 4195 }, { "epoch": 1.22, "learning_rate": 0.00035345524979400457, "loss": 3.151, "step": 4196 }, { "epoch": 1.22, "learning_rate": 0.00035323098056383727, "loss": 3.1909, "step": 4197 }, { "epoch": 1.22, "learning_rate": 0.0003530067436411617, "loss": 3.1957, "step": 4198 }, { "epoch": 1.22, "learning_rate": 0.00035278253907533764, "loss": 3.1767, "step": 4199 }, { "epoch": 1.22, "learning_rate": 0.0003525583669157184, "loss": 3.298, "step": 4200 }, { "epoch": 1.22, "learning_rate": 0.0003523342272116495, "loss": 3.1666, "step": 4201 }, { "epoch": 1.22, "learning_rate": 0.0003521101200124699, "loss": 3.0668, "step": 4202 }, { "epoch": 1.22, "learning_rate": 0.0003518860453675109, "loss": 3.1048, "step": 4203 }, { "epoch": 1.22, "learning_rate": 0.0003516620033260972, "loss": 3.0785, "step": 4204 }, { "epoch": 1.22, "learning_rate": 0.0003514379939375457, "loss": 3.2453, "step": 4205 }, { "epoch": 1.22, "learning_rate": 0.0003512140172511665, "loss": 3.1934, "step": 4206 }, { "epoch": 1.22, "learning_rate": 0.00035099007331626265, "loss": 3.1732, "step": 4207 }, { "epoch": 1.22, "learning_rate": 0.0003507661621821292, "loss": 3.1776, "step": 4208 }, { "epoch": 1.22, "learning_rate": 0.00035054228389805504, "loss": 3.1875, "step": 4209 }, { "epoch": 1.22, "learning_rate": 0.000350318438513321, "loss": 3.0945, "step": 4210 }, { "epoch": 1.22, "learning_rate": 0.0003500946260772013, "loss": 3.1285, "step": 4211 }, { "epoch": 1.22, "learning_rate": 0.0003498708466389622, "loss": 3.247, "step": 4212 }, { "epoch": 1.22, "learning_rate": 0.0003496471002478635, "loss": 3.1806, "step": 4213 }, { "epoch": 1.22, "learning_rate": 0.000349423386953157, "loss": 3.1901, "step": 4214 }, { "epoch": 1.22, "learning_rate": 0.00034919970680408766, "loss": 3.1186, "step": 4215 }, { "epoch": 1.22, "learning_rate": 0.00034897605984989314, "loss": 3.1894, "step": 4216 }, { "epoch": 1.22, "learning_rate": 0.0003487524461398033, "loss": 3.1216, "step": 4217 }, { "epoch": 1.22, "learning_rate": 0.00034852886572304134, "loss": 3.1955, "step": 4218 }, { "epoch": 1.22, "learning_rate": 0.0003483053186488227, "loss": 3.2126, "step": 4219 }, { "epoch": 1.22, "learning_rate": 0.00034808180496635586, "loss": 3.1729, "step": 4220 }, { "epoch": 1.22, "learning_rate": 0.0003478583247248414, "loss": 3.2972, "step": 4221 }, { "epoch": 1.22, "learning_rate": 0.0003476348779734732, "loss": 3.2511, "step": 4222 }, { "epoch": 1.22, "learning_rate": 0.00034741146476143696, "loss": 3.248, "step": 4223 }, { "epoch": 1.22, "learning_rate": 0.00034718808513791187, "loss": 3.1516, "step": 4224 }, { "epoch": 1.22, "learning_rate": 0.00034696473915206915, "loss": 3.2058, "step": 4225 }, { "epoch": 1.22, "learning_rate": 0.00034674142685307266, "loss": 3.2724, "step": 4226 }, { "epoch": 1.22, "learning_rate": 0.00034651814829007914, "loss": 3.1246, "step": 4227 }, { "epoch": 1.22, "learning_rate": 0.0003462949035122376, "loss": 3.195, "step": 4228 }, { "epoch": 1.22, "learning_rate": 0.00034607169256868994, "loss": 3.3511, "step": 4229 }, { "epoch": 1.23, "learning_rate": 0.0003458485155085701, "loss": 3.1965, "step": 4230 }, { "epoch": 1.23, "learning_rate": 0.00034562537238100506, "loss": 3.2688, "step": 4231 }, { "epoch": 1.23, "learning_rate": 0.000345402263235114, "loss": 3.1221, "step": 4232 }, { "epoch": 1.23, "learning_rate": 0.00034517918812000924, "loss": 3.2228, "step": 4233 }, { "epoch": 1.23, "learning_rate": 0.00034495614708479464, "loss": 3.184, "step": 4234 }, { "epoch": 1.23, "learning_rate": 0.00034473314017856706, "loss": 3.1453, "step": 4235 }, { "epoch": 1.23, "learning_rate": 0.00034451016745041597, "loss": 3.1863, "step": 4236 }, { "epoch": 1.23, "learning_rate": 0.0003442872289494231, "loss": 3.2089, "step": 4237 }, { "epoch": 1.23, "learning_rate": 0.00034406432472466306, "loss": 3.1962, "step": 4238 }, { "epoch": 1.23, "learning_rate": 0.000343841454825202, "loss": 3.1547, "step": 4239 }, { "epoch": 1.23, "learning_rate": 0.0003436186193000996, "loss": 3.3114, "step": 4240 }, { "epoch": 1.23, "learning_rate": 0.00034339581819840717, "loss": 3.2018, "step": 4241 }, { "epoch": 1.23, "learning_rate": 0.00034317305156916913, "loss": 3.0251, "step": 4242 }, { "epoch": 1.23, "learning_rate": 0.0003429503194614215, "loss": 3.1709, "step": 4243 }, { "epoch": 1.23, "learning_rate": 0.00034272762192419327, "loss": 3.1243, "step": 4244 }, { "epoch": 1.23, "learning_rate": 0.0003425049590065057, "loss": 3.2077, "step": 4245 }, { "epoch": 1.23, "learning_rate": 0.00034228233075737224, "loss": 3.1441, "step": 4246 }, { "epoch": 1.23, "learning_rate": 0.0003420597372257993, "loss": 3.2513, "step": 4247 }, { "epoch": 1.23, "learning_rate": 0.0003418371784607848, "loss": 3.0965, "step": 4248 }, { "epoch": 1.23, "learning_rate": 0.00034161465451131956, "loss": 3.2459, "step": 4249 }, { "epoch": 1.23, "learning_rate": 0.0003413921654263866, "loss": 3.1875, "step": 4250 }, { "epoch": 1.23, "learning_rate": 0.0003411697112549615, "loss": 3.1533, "step": 4251 }, { "epoch": 1.23, "learning_rate": 0.0003409472920460117, "loss": 3.1681, "step": 4252 }, { "epoch": 1.23, "learning_rate": 0.00034072490784849705, "loss": 3.2463, "step": 4253 }, { "epoch": 1.23, "learning_rate": 0.00034050255871137004, "loss": 3.137, "step": 4254 }, { "epoch": 1.23, "learning_rate": 0.000340280244683575, "loss": 3.2839, "step": 4255 }, { "epoch": 1.23, "learning_rate": 0.00034005796581404907, "loss": 3.1489, "step": 4256 }, { "epoch": 1.23, "learning_rate": 0.000339835722151721, "loss": 3.1893, "step": 4257 }, { "epoch": 1.23, "learning_rate": 0.0003396135137455123, "loss": 3.2583, "step": 4258 }, { "epoch": 1.23, "learning_rate": 0.00033939134064433634, "loss": 3.1624, "step": 4259 }, { "epoch": 1.23, "learning_rate": 0.00033916920289709935, "loss": 3.0282, "step": 4260 }, { "epoch": 1.23, "learning_rate": 0.00033894710055269885, "loss": 3.1511, "step": 4261 }, { "epoch": 1.23, "learning_rate": 0.0003387250336600254, "loss": 3.2936, "step": 4262 }, { "epoch": 1.23, "learning_rate": 0.00033850300226796124, "loss": 3.2264, "step": 4263 }, { "epoch": 1.23, "learning_rate": 0.00033828100642538096, "loss": 3.0628, "step": 4264 }, { "epoch": 1.24, "learning_rate": 0.00033805904618115155, "loss": 3.2084, "step": 4265 }, { "epoch": 1.24, "learning_rate": 0.0003378371215841316, "loss": 3.2241, "step": 4266 }, { "epoch": 1.24, "learning_rate": 0.0003376152326831725, "loss": 3.154, "step": 4267 }, { "epoch": 1.24, "learning_rate": 0.0003373933795271172, "loss": 3.1831, "step": 4268 }, { "epoch": 1.24, "learning_rate": 0.00033717156216480157, "loss": 3.1617, "step": 4269 }, { "epoch": 1.24, "learning_rate": 0.0003369497806450526, "loss": 3.2192, "step": 4270 }, { "epoch": 1.24, "learning_rate": 0.00033672803501669, "loss": 3.2653, "step": 4271 }, { "epoch": 1.24, "learning_rate": 0.00033650632532852576, "loss": 3.0386, "step": 4272 }, { "epoch": 1.24, "learning_rate": 0.00033628465162936316, "loss": 3.2432, "step": 4273 }, { "epoch": 1.24, "learning_rate": 0.0003360630139679986, "loss": 3.1577, "step": 4274 }, { "epoch": 1.24, "learning_rate": 0.00033584141239321953, "loss": 3.1083, "step": 4275 }, { "epoch": 1.24, "learning_rate": 0.00033561984695380624, "loss": 3.1469, "step": 4276 }, { "epoch": 1.24, "learning_rate": 0.0003353983176985306, "loss": 3.1334, "step": 4277 }, { "epoch": 1.24, "learning_rate": 0.0003351768246761571, "loss": 3.2648, "step": 4278 }, { "epoch": 1.24, "learning_rate": 0.00033495536793544113, "loss": 3.2822, "step": 4279 }, { "epoch": 1.24, "learning_rate": 0.0003347339475251313, "loss": 3.234, "step": 4280 }, { "epoch": 1.24, "learning_rate": 0.00033451256349396777, "loss": 3.1696, "step": 4281 }, { "epoch": 1.24, "learning_rate": 0.00033429121589068214, "loss": 3.1158, "step": 4282 }, { "epoch": 1.24, "learning_rate": 0.00033406990476399895, "loss": 3.3167, "step": 4283 }, { "epoch": 1.24, "learning_rate": 0.000333848630162634, "loss": 3.1526, "step": 4284 }, { "epoch": 1.24, "learning_rate": 0.00033362739213529567, "loss": 3.1677, "step": 4285 }, { "epoch": 1.24, "learning_rate": 0.00033340619073068343, "loss": 3.1768, "step": 4286 }, { "epoch": 1.24, "learning_rate": 0.0003331850259974897, "loss": 3.2288, "step": 4287 }, { "epoch": 1.24, "learning_rate": 0.0003329638979843977, "loss": 3.1919, "step": 4288 }, { "epoch": 1.24, "learning_rate": 0.0003327428067400836, "loss": 3.2671, "step": 4289 }, { "epoch": 1.24, "learning_rate": 0.00033252175231321515, "loss": 3.2077, "step": 4290 }, { "epoch": 1.24, "learning_rate": 0.0003323007347524515, "loss": 3.2547, "step": 4291 }, { "epoch": 1.24, "learning_rate": 0.0003320797541064443, "loss": 3.2235, "step": 4292 }, { "epoch": 1.24, "learning_rate": 0.0003318588104238368, "loss": 3.2003, "step": 4293 }, { "epoch": 1.24, "learning_rate": 0.0003316379037532644, "loss": 3.1254, "step": 4294 }, { "epoch": 1.24, "learning_rate": 0.00033141703414335365, "loss": 3.0023, "step": 4295 }, { "epoch": 1.24, "learning_rate": 0.000331196201642724, "loss": 3.0829, "step": 4296 }, { "epoch": 1.24, "learning_rate": 0.00033097540629998563, "loss": 3.2302, "step": 4297 }, { "epoch": 1.24, "learning_rate": 0.0003307546481637413, "loss": 3.1724, "step": 4298 }, { "epoch": 1.25, "learning_rate": 0.0003305339272825855, "loss": 3.1656, "step": 4299 }, { "epoch": 1.25, "learning_rate": 0.00033031324370510394, "loss": 3.1136, "step": 4300 }, { "epoch": 1.25, "learning_rate": 0.00033009259747987486, "loss": 3.1695, "step": 4301 }, { "epoch": 1.25, "learning_rate": 0.0003298719886554677, "loss": 3.2487, "step": 4302 }, { "epoch": 1.25, "learning_rate": 0.0003296514172804442, "loss": 3.1768, "step": 4303 }, { "epoch": 1.25, "learning_rate": 0.0003294308834033574, "loss": 3.2315, "step": 4304 }, { "epoch": 1.25, "learning_rate": 0.0003292103870727524, "loss": 3.2935, "step": 4305 }, { "epoch": 1.25, "learning_rate": 0.0003289899283371657, "loss": 3.16, "step": 4306 }, { "epoch": 1.25, "learning_rate": 0.00032876950724512575, "loss": 3.1158, "step": 4307 }, { "epoch": 1.25, "learning_rate": 0.000328549123845153, "loss": 3.1635, "step": 4308 }, { "epoch": 1.25, "learning_rate": 0.0003283287781857588, "loss": 3.1948, "step": 4309 }, { "epoch": 1.25, "learning_rate": 0.000328108470315447, "loss": 3.2165, "step": 4310 }, { "epoch": 1.25, "learning_rate": 0.0003278882002827127, "loss": 3.1096, "step": 4311 }, { "epoch": 1.25, "learning_rate": 0.0003276679681360429, "loss": 3.2466, "step": 4312 }, { "epoch": 1.25, "learning_rate": 0.000327447773923916, "loss": 3.1804, "step": 4313 }, { "epoch": 1.25, "learning_rate": 0.00032722761769480224, "loss": 3.1581, "step": 4314 }, { "epoch": 1.25, "learning_rate": 0.00032700749949716345, "loss": 3.2719, "step": 4315 }, { "epoch": 1.25, "learning_rate": 0.00032678741937945324, "loss": 3.0779, "step": 4316 }, { "epoch": 1.25, "learning_rate": 0.00032656737739011655, "loss": 3.2199, "step": 4317 }, { "epoch": 1.25, "learning_rate": 0.0003263473735775899, "loss": 3.1606, "step": 4318 }, { "epoch": 1.25, "learning_rate": 0.0003261274079903018, "loss": 3.1951, "step": 4319 }, { "epoch": 1.25, "learning_rate": 0.00032590748067667203, "loss": 3.2043, "step": 4320 }, { "epoch": 1.25, "learning_rate": 0.0003256875916851122, "loss": 3.0775, "step": 4321 }, { "epoch": 1.25, "learning_rate": 0.000325467741064025, "loss": 3.1817, "step": 4322 }, { "epoch": 1.25, "learning_rate": 0.00032524792886180526, "loss": 3.2576, "step": 4323 }, { "epoch": 1.25, "learning_rate": 0.0003250281551268389, "loss": 3.1821, "step": 4324 }, { "epoch": 1.25, "learning_rate": 0.0003248084199075038, "loss": 3.1587, "step": 4325 }, { "epoch": 1.25, "learning_rate": 0.00032458872325216893, "loss": 3.2003, "step": 4326 }, { "epoch": 1.25, "learning_rate": 0.00032436906520919484, "loss": 3.2474, "step": 4327 }, { "epoch": 1.25, "learning_rate": 0.00032414944582693394, "loss": 3.1876, "step": 4328 }, { "epoch": 1.25, "learning_rate": 0.00032392986515372967, "loss": 3.1252, "step": 4329 }, { "epoch": 1.25, "learning_rate": 0.00032371032323791755, "loss": 3.0773, "step": 4330 }, { "epoch": 1.25, "learning_rate": 0.00032349082012782373, "loss": 3.0792, "step": 4331 }, { "epoch": 1.25, "learning_rate": 0.0003232713558717665, "loss": 3.2049, "step": 4332 }, { "epoch": 1.25, "learning_rate": 0.0003230519305180553, "loss": 3.1554, "step": 4333 }, { "epoch": 1.26, "learning_rate": 0.00032283254411499124, "loss": 3.3109, "step": 4334 }, { "epoch": 1.26, "learning_rate": 0.00032261319671086653, "loss": 3.1282, "step": 4335 }, { "epoch": 1.26, "learning_rate": 0.0003223938883539649, "loss": 3.2183, "step": 4336 }, { "epoch": 1.26, "learning_rate": 0.0003221746190925618, "loss": 3.1729, "step": 4337 }, { "epoch": 1.26, "learning_rate": 0.0003219553889749235, "loss": 3.1048, "step": 4338 }, { "epoch": 1.26, "learning_rate": 0.00032173619804930844, "loss": 3.0901, "step": 4339 }, { "epoch": 1.26, "learning_rate": 0.00032151704636396535, "loss": 3.0831, "step": 4340 }, { "epoch": 1.26, "learning_rate": 0.00032129793396713536, "loss": 3.1518, "step": 4341 }, { "epoch": 1.26, "learning_rate": 0.0003210788609070504, "loss": 3.0302, "step": 4342 }, { "epoch": 1.26, "learning_rate": 0.00032085982723193394, "loss": 3.1618, "step": 4343 }, { "epoch": 1.26, "learning_rate": 0.00032064083299000067, "loss": 3.1068, "step": 4344 }, { "epoch": 1.26, "learning_rate": 0.00032042187822945643, "loss": 3.3139, "step": 4345 }, { "epoch": 1.26, "learning_rate": 0.0003202029629984991, "loss": 3.1033, "step": 4346 }, { "epoch": 1.26, "learning_rate": 0.00031998408734531673, "loss": 3.1995, "step": 4347 }, { "epoch": 1.26, "learning_rate": 0.0003197652513180898, "loss": 3.2368, "step": 4348 }, { "epoch": 1.26, "learning_rate": 0.000319546454964989, "loss": 3.2304, "step": 4349 }, { "epoch": 1.26, "learning_rate": 0.0003193276983341773, "loss": 3.081, "step": 4350 }, { "epoch": 1.26, "learning_rate": 0.00031910898147380806, "loss": 3.1436, "step": 4351 }, { "epoch": 1.26, "learning_rate": 0.00031889030443202674, "loss": 3.1729, "step": 4352 }, { "epoch": 1.26, "learning_rate": 0.000318671667256969, "loss": 3.2123, "step": 4353 }, { "epoch": 1.26, "learning_rate": 0.00031845306999676273, "loss": 3.2762, "step": 4354 }, { "epoch": 1.26, "learning_rate": 0.0003182345126995265, "loss": 3.1385, "step": 4355 }, { "epoch": 1.26, "learning_rate": 0.0003180159954133699, "loss": 3.1416, "step": 4356 }, { "epoch": 1.26, "learning_rate": 0.0003177975181863945, "loss": 3.2634, "step": 4357 }, { "epoch": 1.26, "learning_rate": 0.00031757908106669195, "loss": 3.3148, "step": 4358 }, { "epoch": 1.26, "learning_rate": 0.0003173606841023461, "loss": 3.2412, "step": 4359 }, { "epoch": 1.26, "learning_rate": 0.0003171423273414312, "loss": 3.0294, "step": 4360 }, { "epoch": 1.26, "learning_rate": 0.0003169240108320135, "loss": 3.0657, "step": 4361 }, { "epoch": 1.26, "learning_rate": 0.00031670573462214927, "loss": 3.2326, "step": 4362 }, { "epoch": 1.26, "learning_rate": 0.0003164874987598868, "loss": 3.2638, "step": 4363 }, { "epoch": 1.26, "learning_rate": 0.00031626930329326533, "loss": 3.1991, "step": 4364 }, { "epoch": 1.26, "learning_rate": 0.0003160511482703147, "loss": 3.0151, "step": 4365 }, { "epoch": 1.26, "learning_rate": 0.0003158330337390565, "loss": 3.2689, "step": 4366 }, { "epoch": 1.26, "learning_rate": 0.0003156149597475029, "loss": 3.2691, "step": 4367 }, { "epoch": 1.27, "learning_rate": 0.00031539692634365783, "loss": 3.1791, "step": 4368 }, { "epoch": 1.27, "learning_rate": 0.00031517893357551534, "loss": 3.1898, "step": 4369 }, { "epoch": 1.27, "learning_rate": 0.00031496098149106147, "loss": 3.1655, "step": 4370 }, { "epoch": 1.27, "learning_rate": 0.0003147430701382724, "loss": 3.2061, "step": 4371 }, { "epoch": 1.27, "learning_rate": 0.0003145251995651162, "loss": 3.1195, "step": 4372 }, { "epoch": 1.27, "learning_rate": 0.00031430736981955145, "loss": 3.2595, "step": 4373 }, { "epoch": 1.27, "learning_rate": 0.00031408958094952756, "loss": 3.0659, "step": 4374 }, { "epoch": 1.27, "learning_rate": 0.0003138718330029857, "loss": 3.0981, "step": 4375 }, { "epoch": 1.27, "learning_rate": 0.0003136541260278574, "loss": 3.0637, "step": 4376 }, { "epoch": 1.27, "learning_rate": 0.0003134364600720654, "loss": 3.1966, "step": 4377 }, { "epoch": 1.27, "learning_rate": 0.0003132188351835232, "loss": 3.0912, "step": 4378 }, { "epoch": 1.27, "learning_rate": 0.0003130012514101357, "loss": 3.2594, "step": 4379 }, { "epoch": 1.27, "learning_rate": 0.0003127837087997981, "loss": 3.0932, "step": 4380 }, { "epoch": 1.27, "learning_rate": 0.0003125662074003972, "loss": 3.3003, "step": 4381 }, { "epoch": 1.27, "learning_rate": 0.00031234874725981045, "loss": 3.1608, "step": 4382 }, { "epoch": 1.27, "learning_rate": 0.000312131328425906, "loss": 3.1476, "step": 4383 }, { "epoch": 1.27, "learning_rate": 0.00031191395094654317, "loss": 3.1319, "step": 4384 }, { "epoch": 1.27, "learning_rate": 0.0003116966148695721, "loss": 3.112, "step": 4385 }, { "epoch": 1.27, "learning_rate": 0.0003114793202428342, "loss": 3.2105, "step": 4386 }, { "epoch": 1.27, "learning_rate": 0.00031126206711416083, "loss": 3.1714, "step": 4387 }, { "epoch": 1.27, "learning_rate": 0.0003110448555313752, "loss": 3.1959, "step": 4388 }, { "epoch": 1.27, "learning_rate": 0.00031082768554229064, "loss": 3.1005, "step": 4389 }, { "epoch": 1.27, "learning_rate": 0.000310610557194712, "loss": 3.1843, "step": 4390 }, { "epoch": 1.27, "learning_rate": 0.0003103934705364344, "loss": 3.056, "step": 4391 }, { "epoch": 1.27, "learning_rate": 0.00031017642561524385, "loss": 3.1208, "step": 4392 }, { "epoch": 1.27, "learning_rate": 0.00030995942247891747, "loss": 3.0345, "step": 4393 }, { "epoch": 1.27, "learning_rate": 0.00030974246117522285, "loss": 3.1842, "step": 4394 }, { "epoch": 1.27, "learning_rate": 0.00030952554175191894, "loss": 3.1379, "step": 4395 }, { "epoch": 1.27, "learning_rate": 0.00030930866425675453, "loss": 3.2579, "step": 4396 }, { "epoch": 1.27, "learning_rate": 0.0003090918287374701, "loss": 3.0501, "step": 4397 }, { "epoch": 1.27, "learning_rate": 0.00030887503524179637, "loss": 3.2008, "step": 4398 }, { "epoch": 1.27, "learning_rate": 0.0003086582838174551, "loss": 3.1131, "step": 4399 }, { "epoch": 1.27, "learning_rate": 0.0003084415745121586, "loss": 3.2033, "step": 4400 }, { "epoch": 1.27, "learning_rate": 0.0003082249073736097, "loss": 3.0802, "step": 4401 }, { "epoch": 1.27, "learning_rate": 0.0003080082824495024, "loss": 3.2029, "step": 4402 }, { "epoch": 1.28, "learning_rate": 0.0003077916997875211, "loss": 3.2674, "step": 4403 }, { "epoch": 1.28, "learning_rate": 0.0003075751594353413, "loss": 3.1842, "step": 4404 }, { "epoch": 1.28, "learning_rate": 0.00030735866144062843, "loss": 3.1468, "step": 4405 }, { "epoch": 1.28, "learning_rate": 0.00030714220585103933, "loss": 3.118, "step": 4406 }, { "epoch": 1.28, "learning_rate": 0.00030692579271422124, "loss": 3.1837, "step": 4407 }, { "epoch": 1.28, "learning_rate": 0.00030670942207781205, "loss": 3.0979, "step": 4408 }, { "epoch": 1.28, "learning_rate": 0.00030649309398944033, "loss": 3.1452, "step": 4409 }, { "epoch": 1.28, "learning_rate": 0.0003062768084967249, "loss": 3.2495, "step": 4410 }, { "epoch": 1.28, "learning_rate": 0.0003060605656472759, "loss": 3.1266, "step": 4411 }, { "epoch": 1.28, "learning_rate": 0.0003058443654886935, "loss": 3.1904, "step": 4412 }, { "epoch": 1.28, "learning_rate": 0.00030562820806856906, "loss": 3.1553, "step": 4413 }, { "epoch": 1.28, "learning_rate": 0.00030541209343448375, "loss": 3.0798, "step": 4414 }, { "epoch": 1.28, "learning_rate": 0.00030519602163400994, "loss": 3.1815, "step": 4415 }, { "epoch": 1.28, "learning_rate": 0.00030497999271471033, "loss": 3.0954, "step": 4416 }, { "epoch": 1.28, "learning_rate": 0.0003047640067241385, "loss": 3.101, "step": 4417 }, { "epoch": 1.28, "learning_rate": 0.00030454806370983794, "loss": 3.1149, "step": 4418 }, { "epoch": 1.28, "learning_rate": 0.00030433216371934314, "loss": 3.2756, "step": 4419 }, { "epoch": 1.28, "learning_rate": 0.00030411630680017923, "loss": 3.2381, "step": 4420 }, { "epoch": 1.28, "learning_rate": 0.0003039004929998613, "loss": 3.1258, "step": 4421 }, { "epoch": 1.28, "learning_rate": 0.0003036847223658958, "loss": 3.2076, "step": 4422 }, { "epoch": 1.28, "learning_rate": 0.00030346899494577864, "loss": 3.2223, "step": 4423 }, { "epoch": 1.28, "learning_rate": 0.00030325331078699707, "loss": 3.1679, "step": 4424 }, { "epoch": 1.28, "learning_rate": 0.0003030376699370284, "loss": 3.1179, "step": 4425 }, { "epoch": 1.28, "learning_rate": 0.00030282207244334083, "loss": 3.1891, "step": 4426 }, { "epoch": 1.28, "learning_rate": 0.00030260651835339224, "loss": 3.1348, "step": 4427 }, { "epoch": 1.28, "learning_rate": 0.00030239100771463156, "loss": 3.1525, "step": 4428 }, { "epoch": 1.28, "learning_rate": 0.00030217554057449826, "loss": 3.1918, "step": 4429 }, { "epoch": 1.28, "learning_rate": 0.0003019601169804216, "loss": 3.0102, "step": 4430 }, { "epoch": 1.28, "learning_rate": 0.00030174473697982206, "loss": 3.1344, "step": 4431 }, { "epoch": 1.28, "learning_rate": 0.0003015294006201097, "loss": 3.1769, "step": 4432 }, { "epoch": 1.28, "learning_rate": 0.00030131410794868564, "loss": 3.1818, "step": 4433 }, { "epoch": 1.28, "learning_rate": 0.0003010988590129409, "loss": 3.0946, "step": 4434 }, { "epoch": 1.28, "learning_rate": 0.0003008836538602575, "loss": 3.1929, "step": 4435 }, { "epoch": 1.28, "learning_rate": 0.00030066849253800705, "loss": 3.2949, "step": 4436 }, { "epoch": 1.28, "learning_rate": 0.00030045337509355187, "loss": 3.136, "step": 4437 }, { "epoch": 1.29, "learning_rate": 0.00030023830157424505, "loss": 3.1546, "step": 4438 }, { "epoch": 1.29, "learning_rate": 0.00030002327202742897, "loss": 3.1719, "step": 4439 }, { "epoch": 1.29, "learning_rate": 0.0002998082865004374, "loss": 3.1127, "step": 4440 }, { "epoch": 1.29, "learning_rate": 0.00029959334504059377, "loss": 3.0611, "step": 4441 }, { "epoch": 1.29, "learning_rate": 0.0002993784476952123, "loss": 3.1246, "step": 4442 }, { "epoch": 1.29, "learning_rate": 0.00029916359451159677, "loss": 3.0966, "step": 4443 }, { "epoch": 1.29, "learning_rate": 0.0002989487855370421, "loss": 3.1859, "step": 4444 }, { "epoch": 1.29, "learning_rate": 0.00029873402081883276, "loss": 3.2014, "step": 4445 }, { "epoch": 1.29, "learning_rate": 0.0002985193004042438, "loss": 3.1886, "step": 4446 }, { "epoch": 1.29, "learning_rate": 0.00029830462434054085, "loss": 3.2714, "step": 4447 }, { "epoch": 1.29, "learning_rate": 0.0002980899926749788, "loss": 3.1641, "step": 4448 }, { "epoch": 1.29, "learning_rate": 0.00029787540545480397, "loss": 3.2276, "step": 4449 }, { "epoch": 1.29, "learning_rate": 0.00029766086272725194, "loss": 3.1944, "step": 4450 }, { "epoch": 1.29, "learning_rate": 0.0002974463645395492, "loss": 3.1607, "step": 4451 }, { "epoch": 1.29, "learning_rate": 0.0002972319109389118, "loss": 3.22, "step": 4452 }, { "epoch": 1.29, "learning_rate": 0.0002970175019725465, "loss": 3.1153, "step": 4453 }, { "epoch": 1.29, "learning_rate": 0.00029680313768764985, "loss": 3.079, "step": 4454 }, { "epoch": 1.29, "learning_rate": 0.0002965888181314088, "loss": 3.2596, "step": 4455 }, { "epoch": 1.29, "learning_rate": 0.0002963745433510005, "loss": 3.2678, "step": 4456 }, { "epoch": 1.29, "learning_rate": 0.00029616031339359185, "loss": 3.1018, "step": 4457 }, { "epoch": 1.29, "learning_rate": 0.0002959461283063404, "loss": 3.1338, "step": 4458 }, { "epoch": 1.29, "learning_rate": 0.00029573198813639343, "loss": 3.1867, "step": 4459 }, { "epoch": 1.29, "learning_rate": 0.0002955178929308888, "loss": 3.2485, "step": 4460 }, { "epoch": 1.29, "learning_rate": 0.0002953038427369537, "loss": 3.0609, "step": 4461 }, { "epoch": 1.29, "learning_rate": 0.00029508983760170637, "loss": 3.1474, "step": 4462 }, { "epoch": 1.29, "learning_rate": 0.00029487587757225407, "loss": 3.0304, "step": 4463 }, { "epoch": 1.29, "learning_rate": 0.0002946619626956952, "loss": 3.0586, "step": 4464 }, { "epoch": 1.29, "learning_rate": 0.0002944480930191176, "loss": 3.2352, "step": 4465 }, { "epoch": 1.29, "learning_rate": 0.0002942342685895989, "loss": 3.1061, "step": 4466 }, { "epoch": 1.29, "learning_rate": 0.00029402048945420756, "loss": 3.0077, "step": 4467 }, { "epoch": 1.29, "learning_rate": 0.0002938067556600015, "loss": 3.0736, "step": 4468 }, { "epoch": 1.29, "learning_rate": 0.000293593067254029, "loss": 3.1296, "step": 4469 }, { "epoch": 1.29, "learning_rate": 0.00029337942428332784, "loss": 3.1422, "step": 4470 }, { "epoch": 1.29, "learning_rate": 0.0002931658267949264, "loss": 3.3099, "step": 4471 }, { "epoch": 1.3, "learning_rate": 0.00029295227483584253, "loss": 3.0691, "step": 4472 }, { "epoch": 1.3, "learning_rate": 0.0002927387684530846, "loss": 3.2537, "step": 4473 }, { "epoch": 1.3, "learning_rate": 0.0002925253076936505, "loss": 3.2675, "step": 4474 }, { "epoch": 1.3, "learning_rate": 0.00029231189260452807, "loss": 3.202, "step": 4475 }, { "epoch": 1.3, "learning_rate": 0.00029209852323269536, "loss": 3.1449, "step": 4476 }, { "epoch": 1.3, "learning_rate": 0.0002918851996251203, "loss": 3.1791, "step": 4477 }, { "epoch": 1.3, "learning_rate": 0.00029167192182876075, "loss": 3.1496, "step": 4478 }, { "epoch": 1.3, "learning_rate": 0.0002914586898905642, "loss": 3.1588, "step": 4479 }, { "epoch": 1.3, "learning_rate": 0.0002912455038574686, "loss": 3.1174, "step": 4480 }, { "epoch": 1.3, "learning_rate": 0.000291032363776401, "loss": 3.2206, "step": 4481 }, { "epoch": 1.3, "learning_rate": 0.00029081926969427917, "loss": 3.1654, "step": 4482 }, { "epoch": 1.3, "learning_rate": 0.0002906062216580105, "loss": 3.1472, "step": 4483 }, { "epoch": 1.3, "learning_rate": 0.00029039321971449173, "loss": 3.168, "step": 4484 }, { "epoch": 1.3, "learning_rate": 0.00029018026391061023, "loss": 3.3089, "step": 4485 }, { "epoch": 1.3, "learning_rate": 0.00028996735429324255, "loss": 3.1458, "step": 4486 }, { "epoch": 1.3, "learning_rate": 0.00028975449090925566, "loss": 3.1569, "step": 4487 }, { "epoch": 1.3, "learning_rate": 0.0002895416738055057, "loss": 3.1133, "step": 4488 }, { "epoch": 1.3, "learning_rate": 0.0002893289030288393, "loss": 3.1959, "step": 4489 }, { "epoch": 1.3, "learning_rate": 0.00028911617862609223, "loss": 3.2427, "step": 4490 }, { "epoch": 1.3, "learning_rate": 0.00028890350064409066, "loss": 3.1281, "step": 4491 }, { "epoch": 1.3, "learning_rate": 0.0002886908691296504, "loss": 3.2941, "step": 4492 }, { "epoch": 1.3, "learning_rate": 0.0002884782841295764, "loss": 3.1643, "step": 4493 }, { "epoch": 1.3, "learning_rate": 0.00028826574569066455, "loss": 3.1858, "step": 4494 }, { "epoch": 1.3, "learning_rate": 0.0002880532538596992, "loss": 3.2509, "step": 4495 }, { "epoch": 1.3, "learning_rate": 0.00028784080868345547, "loss": 3.191, "step": 4496 }, { "epoch": 1.3, "learning_rate": 0.00028762841020869747, "loss": 3.0864, "step": 4497 }, { "epoch": 1.3, "learning_rate": 0.0002874160584821798, "loss": 3.1358, "step": 4498 }, { "epoch": 1.3, "learning_rate": 0.00028720375355064575, "loss": 3.2339, "step": 4499 }, { "epoch": 1.3, "learning_rate": 0.0002869914954608293, "loss": 3.144, "step": 4500 }, { "epoch": 1.3, "learning_rate": 0.0002867792842594538, "loss": 3.2085, "step": 4501 }, { "epoch": 1.3, "learning_rate": 0.00028656711999323176, "loss": 3.2408, "step": 4502 }, { "epoch": 1.3, "learning_rate": 0.0002863550027088662, "loss": 2.9579, "step": 4503 }, { "epoch": 1.3, "learning_rate": 0.000286142932453049, "loss": 3.2115, "step": 4504 }, { "epoch": 1.3, "learning_rate": 0.00028593090927246244, "loss": 3.1262, "step": 4505 }, { "epoch": 1.3, "learning_rate": 0.00028571893321377776, "loss": 3.2156, "step": 4506 }, { "epoch": 1.31, "learning_rate": 0.00028550700432365645, "loss": 3.2541, "step": 4507 }, { "epoch": 1.31, "learning_rate": 0.00028529512264874893, "loss": 3.2622, "step": 4508 }, { "epoch": 1.31, "learning_rate": 0.00028508328823569567, "loss": 3.2184, "step": 4509 }, { "epoch": 1.31, "learning_rate": 0.00028487150113112706, "loss": 3.0903, "step": 4510 }, { "epoch": 1.31, "learning_rate": 0.0002846597613816622, "loss": 3.2262, "step": 4511 }, { "epoch": 1.31, "learning_rate": 0.0002844480690339106, "loss": 3.2081, "step": 4512 }, { "epoch": 1.31, "learning_rate": 0.0002842364241344707, "loss": 3.1509, "step": 4513 }, { "epoch": 1.31, "learning_rate": 0.000284024826729931, "loss": 3.0985, "step": 4514 }, { "epoch": 1.31, "learning_rate": 0.00028381327686686917, "loss": 3.2135, "step": 4515 }, { "epoch": 1.31, "learning_rate": 0.00028360177459185265, "loss": 3.0244, "step": 4516 }, { "epoch": 1.31, "learning_rate": 0.0002833903199514383, "loss": 3.0926, "step": 4517 }, { "epoch": 1.31, "learning_rate": 0.00028317891299217235, "loss": 3.0954, "step": 4518 }, { "epoch": 1.31, "learning_rate": 0.00028296755376059106, "loss": 3.2463, "step": 4519 }, { "epoch": 1.31, "learning_rate": 0.0002827562423032195, "loss": 3.1249, "step": 4520 }, { "epoch": 1.31, "learning_rate": 0.0002825449786665729, "loss": 3.2133, "step": 4521 }, { "epoch": 1.31, "learning_rate": 0.00028233376289715507, "loss": 3.1463, "step": 4522 }, { "epoch": 1.31, "learning_rate": 0.00028212259504146043, "loss": 3.1391, "step": 4523 }, { "epoch": 1.31, "learning_rate": 0.0002819114751459718, "loss": 3.2367, "step": 4524 }, { "epoch": 1.31, "learning_rate": 0.00028170040325716216, "loss": 3.0864, "step": 4525 }, { "epoch": 1.31, "learning_rate": 0.00028148937942149347, "loss": 3.1327, "step": 4526 }, { "epoch": 1.31, "learning_rate": 0.0002812784036854172, "loss": 3.2885, "step": 4527 }, { "epoch": 1.31, "learning_rate": 0.00028106747609537485, "loss": 3.2478, "step": 4528 }, { "epoch": 1.31, "learning_rate": 0.00028085659669779616, "loss": 3.0744, "step": 4529 }, { "epoch": 1.31, "learning_rate": 0.0002806457655391015, "loss": 3.163, "step": 4530 }, { "epoch": 1.31, "learning_rate": 0.00028043498266569944, "loss": 3.2394, "step": 4531 }, { "epoch": 1.31, "learning_rate": 0.000280224248123989, "loss": 3.1724, "step": 4532 }, { "epoch": 1.31, "learning_rate": 0.00028001356196035766, "loss": 3.2768, "step": 4533 }, { "epoch": 1.31, "learning_rate": 0.0002798029242211828, "loss": 3.1896, "step": 4534 }, { "epoch": 1.31, "learning_rate": 0.0002795923349528311, "loss": 3.079, "step": 4535 }, { "epoch": 1.31, "learning_rate": 0.0002793817942016585, "loss": 3.1793, "step": 4536 }, { "epoch": 1.31, "learning_rate": 0.00027917130201401025, "loss": 3.1879, "step": 4537 }, { "epoch": 1.31, "learning_rate": 0.0002789608584362205, "loss": 3.2226, "step": 4538 }, { "epoch": 1.31, "learning_rate": 0.00027875046351461365, "loss": 3.0989, "step": 4539 }, { "epoch": 1.31, "learning_rate": 0.0002785401172955022, "loss": 3.2153, "step": 4540 }, { "epoch": 1.32, "learning_rate": 0.00027832981982518925, "loss": 3.2679, "step": 4541 }, { "epoch": 1.32, "learning_rate": 0.00027811957114996577, "loss": 3.2148, "step": 4542 }, { "epoch": 1.32, "learning_rate": 0.0002779093713161131, "loss": 3.3309, "step": 4543 }, { "epoch": 1.32, "learning_rate": 0.0002776992203699014, "loss": 3.2397, "step": 4544 }, { "epoch": 1.32, "learning_rate": 0.0002774891183575902, "loss": 3.2486, "step": 4545 }, { "epoch": 1.32, "learning_rate": 0.0002772790653254278, "loss": 3.1203, "step": 4546 }, { "epoch": 1.32, "learning_rate": 0.0002770690613196525, "loss": 3.2269, "step": 4547 }, { "epoch": 1.32, "learning_rate": 0.0002768591063864911, "loss": 3.1521, "step": 4548 }, { "epoch": 1.32, "learning_rate": 0.00027664920057215975, "loss": 3.2765, "step": 4549 }, { "epoch": 1.32, "learning_rate": 0.00027643934392286427, "loss": 3.1786, "step": 4550 }, { "epoch": 1.32, "learning_rate": 0.000276229536484799, "loss": 3.1451, "step": 4551 }, { "epoch": 1.32, "learning_rate": 0.0002760197783041477, "loss": 3.2161, "step": 4552 }, { "epoch": 1.32, "learning_rate": 0.00027581006942708363, "loss": 3.2235, "step": 4553 }, { "epoch": 1.32, "learning_rate": 0.00027560040989976894, "loss": 3.1718, "step": 4554 }, { "epoch": 1.32, "learning_rate": 0.00027539079976835444, "loss": 3.1093, "step": 4555 }, { "epoch": 1.32, "learning_rate": 0.00027518123907898105, "loss": 3.0281, "step": 4556 }, { "epoch": 1.32, "learning_rate": 0.0002749717278777779, "loss": 3.0718, "step": 4557 }, { "epoch": 1.32, "learning_rate": 0.00027476226621086353, "loss": 3.2629, "step": 4558 }, { "epoch": 1.32, "learning_rate": 0.00027455285412434597, "loss": 3.1003, "step": 4559 }, { "epoch": 1.32, "learning_rate": 0.0002743434916643216, "loss": 3.1979, "step": 4560 }, { "epoch": 1.32, "learning_rate": 0.0002741341788768764, "loss": 3.0718, "step": 4561 }, { "epoch": 1.32, "learning_rate": 0.0002739249158080855, "loss": 3.2098, "step": 4562 }, { "epoch": 1.32, "learning_rate": 0.0002737157025040129, "loss": 3.1909, "step": 4563 }, { "epoch": 1.32, "learning_rate": 0.0002735065390107112, "loss": 3.2881, "step": 4564 }, { "epoch": 1.32, "learning_rate": 0.00027329742537422297, "loss": 3.258, "step": 4565 }, { "epoch": 1.32, "learning_rate": 0.0002730883616405791, "loss": 3.1274, "step": 4566 }, { "epoch": 1.32, "learning_rate": 0.00027287934785579936, "loss": 3.2338, "step": 4567 }, { "epoch": 1.32, "learning_rate": 0.0002726703840658934, "loss": 3.2258, "step": 4568 }, { "epoch": 1.32, "learning_rate": 0.00027246147031685886, "loss": 3.2384, "step": 4569 }, { "epoch": 1.32, "learning_rate": 0.000272252606654683, "loss": 3.0686, "step": 4570 }, { "epoch": 1.32, "learning_rate": 0.0002720437931253419, "loss": 3.3101, "step": 4571 }, { "epoch": 1.32, "learning_rate": 0.0002718350297748008, "loss": 3.0962, "step": 4572 }, { "epoch": 1.32, "learning_rate": 0.00027162631664901323, "loss": 3.0311, "step": 4573 }, { "epoch": 1.32, "learning_rate": 0.0002714176537939225, "loss": 3.1796, "step": 4574 }, { "epoch": 1.32, "learning_rate": 0.0002712090412554604, "loss": 3.1838, "step": 4575 }, { "epoch": 1.33, "learning_rate": 0.00027100047907954733, "loss": 3.3009, "step": 4576 }, { "epoch": 1.33, "learning_rate": 0.00027079196731209353, "loss": 3.1507, "step": 4577 }, { "epoch": 1.33, "learning_rate": 0.00027058350599899706, "loss": 3.1066, "step": 4578 }, { "epoch": 1.33, "learning_rate": 0.0002703750951861458, "loss": 3.1002, "step": 4579 }, { "epoch": 1.33, "learning_rate": 0.0002701667349194159, "loss": 3.0537, "step": 4580 }, { "epoch": 1.33, "learning_rate": 0.000269958425244673, "loss": 3.2584, "step": 4581 }, { "epoch": 1.33, "learning_rate": 0.0002697501662077707, "loss": 3.0661, "step": 4582 }, { "epoch": 1.33, "learning_rate": 0.00026954195785455244, "loss": 3.1509, "step": 4583 }, { "epoch": 1.33, "learning_rate": 0.00026933380023084976, "loss": 3.0836, "step": 4584 }, { "epoch": 1.33, "learning_rate": 0.00026912569338248315, "loss": 3.1636, "step": 4585 }, { "epoch": 1.33, "learning_rate": 0.0002689176373552622, "loss": 3.2025, "step": 4586 }, { "epoch": 1.33, "learning_rate": 0.0002687096321949853, "loss": 3.2062, "step": 4587 }, { "epoch": 1.33, "learning_rate": 0.00026850167794743965, "loss": 3.0706, "step": 4588 }, { "epoch": 1.33, "learning_rate": 0.00026829377465840075, "loss": 3.14, "step": 4589 }, { "epoch": 1.33, "learning_rate": 0.0002680859223736336, "loss": 3.1051, "step": 4590 }, { "epoch": 1.33, "learning_rate": 0.00026787812113889144, "loss": 3.2444, "step": 4591 }, { "epoch": 1.33, "learning_rate": 0.0002676703709999165, "loss": 3.0053, "step": 4592 }, { "epoch": 1.33, "learning_rate": 0.0002674626720024399, "loss": 3.1174, "step": 4593 }, { "epoch": 1.33, "learning_rate": 0.00026725502419218085, "loss": 3.1158, "step": 4594 }, { "epoch": 1.33, "learning_rate": 0.0002670474276148481, "loss": 3.1198, "step": 4595 }, { "epoch": 1.33, "learning_rate": 0.0002668398823161389, "loss": 3.1602, "step": 4596 }, { "epoch": 1.33, "learning_rate": 0.00026663238834173897, "loss": 3.0769, "step": 4597 }, { "epoch": 1.33, "learning_rate": 0.0002664249457373228, "loss": 3.1638, "step": 4598 }, { "epoch": 1.33, "learning_rate": 0.00026621755454855385, "loss": 3.1523, "step": 4599 }, { "epoch": 1.33, "learning_rate": 0.00026601021482108373, "loss": 3.2395, "step": 4600 }, { "epoch": 1.33, "learning_rate": 0.0002658029266005534, "loss": 3.1136, "step": 4601 }, { "epoch": 1.33, "learning_rate": 0.000265595689932592, "loss": 3.0708, "step": 4602 }, { "epoch": 1.33, "learning_rate": 0.00026538850486281705, "loss": 3.2708, "step": 4603 }, { "epoch": 1.33, "learning_rate": 0.00026518137143683563, "loss": 3.1723, "step": 4604 }, { "epoch": 1.33, "learning_rate": 0.0002649742897002426, "loss": 3.272, "step": 4605 }, { "epoch": 1.33, "learning_rate": 0.00026476725969862226, "loss": 3.1536, "step": 4606 }, { "epoch": 1.33, "learning_rate": 0.0002645602814775464, "loss": 3.1652, "step": 4607 }, { "epoch": 1.33, "learning_rate": 0.0002643533550825766, "loss": 3.2234, "step": 4608 }, { "epoch": 1.33, "learning_rate": 0.00026414648055926197, "loss": 3.0745, "step": 4609 }, { "epoch": 1.34, "learning_rate": 0.0002639396579531412, "loss": 3.1498, "step": 4610 }, { "epoch": 1.34, "learning_rate": 0.00026373288730974085, "loss": 3.2241, "step": 4611 }, { "epoch": 1.34, "learning_rate": 0.00026352616867457597, "loss": 3.1241, "step": 4612 }, { "epoch": 1.34, "learning_rate": 0.0002633195020931507, "loss": 3.1765, "step": 4613 }, { "epoch": 1.34, "learning_rate": 0.0002631128876109575, "loss": 3.1491, "step": 4614 }, { "epoch": 1.34, "learning_rate": 0.00026290632527347746, "loss": 3.1721, "step": 4615 }, { "epoch": 1.34, "learning_rate": 0.0002626998151261798, "loss": 3.3024, "step": 4616 }, { "epoch": 1.34, "learning_rate": 0.0002624933572145227, "loss": 3.1746, "step": 4617 }, { "epoch": 1.34, "learning_rate": 0.0002622869515839524, "loss": 3.0736, "step": 4618 }, { "epoch": 1.34, "learning_rate": 0.0002620805982799042, "loss": 3.1496, "step": 4619 }, { "epoch": 1.34, "learning_rate": 0.00026187429734780143, "loss": 3.1626, "step": 4620 }, { "epoch": 1.34, "learning_rate": 0.0002616680488330558, "loss": 3.1653, "step": 4621 }, { "epoch": 1.34, "learning_rate": 0.00026146185278106803, "loss": 3.1045, "step": 4622 }, { "epoch": 1.34, "learning_rate": 0.00026125570923722673, "loss": 3.1786, "step": 4623 }, { "epoch": 1.34, "learning_rate": 0.00026104961824690963, "loss": 3.3294, "step": 4624 }, { "epoch": 1.34, "learning_rate": 0.0002608435798554819, "loss": 3.1639, "step": 4625 }, { "epoch": 1.34, "learning_rate": 0.0002606375941082981, "loss": 3.2586, "step": 4626 }, { "epoch": 1.34, "learning_rate": 0.0002604316610507005, "loss": 3.1593, "step": 4627 }, { "epoch": 1.34, "learning_rate": 0.0002602257807280203, "loss": 3.1559, "step": 4628 }, { "epoch": 1.34, "learning_rate": 0.0002600199531855768, "loss": 3.1758, "step": 4629 }, { "epoch": 1.34, "learning_rate": 0.0002598141784686775, "loss": 3.3047, "step": 4630 }, { "epoch": 1.34, "learning_rate": 0.00025960845662261855, "loss": 3.2599, "step": 4631 }, { "epoch": 1.34, "learning_rate": 0.00025940278769268467, "loss": 3.1531, "step": 4632 }, { "epoch": 1.34, "learning_rate": 0.0002591971717241487, "loss": 3.2921, "step": 4633 }, { "epoch": 1.34, "learning_rate": 0.00025899160876227147, "loss": 3.0657, "step": 4634 }, { "epoch": 1.34, "learning_rate": 0.00025878609885230275, "loss": 3.1355, "step": 4635 }, { "epoch": 1.34, "learning_rate": 0.0002585806420394802, "loss": 3.149, "step": 4636 }, { "epoch": 1.34, "learning_rate": 0.0002583752383690301, "loss": 3.1341, "step": 4637 }, { "epoch": 1.34, "learning_rate": 0.00025816988788616665, "loss": 3.116, "step": 4638 }, { "epoch": 1.34, "learning_rate": 0.00025796459063609267, "loss": 3.1596, "step": 4639 }, { "epoch": 1.34, "learning_rate": 0.00025775934666399936, "loss": 3.2647, "step": 4640 }, { "epoch": 1.34, "learning_rate": 0.00025755415601506573, "loss": 3.2792, "step": 4641 }, { "epoch": 1.34, "learning_rate": 0.0002573490187344596, "loss": 3.3075, "step": 4642 }, { "epoch": 1.34, "learning_rate": 0.00025714393486733636, "loss": 3.2197, "step": 4643 }, { "epoch": 1.34, "learning_rate": 0.0002569389044588404, "loss": 3.1239, "step": 4644 }, { "epoch": 1.35, "learning_rate": 0.0002567339275541037, "loss": 3.0941, "step": 4645 }, { "epoch": 1.35, "learning_rate": 0.0002565290041982471, "loss": 3.1523, "step": 4646 }, { "epoch": 1.35, "learning_rate": 0.00025632413443637885, "loss": 3.1606, "step": 4647 }, { "epoch": 1.35, "learning_rate": 0.00025611931831359615, "loss": 3.1903, "step": 4648 }, { "epoch": 1.35, "learning_rate": 0.0002559145558749842, "loss": 3.0856, "step": 4649 }, { "epoch": 1.35, "learning_rate": 0.00025570984716561594, "loss": 3.0458, "step": 4650 }, { "epoch": 1.35, "learning_rate": 0.0002555051922305532, "loss": 3.185, "step": 4651 }, { "epoch": 1.35, "learning_rate": 0.00025530059111484514, "loss": 3.1185, "step": 4652 }, { "epoch": 1.35, "learning_rate": 0.00025509604386353, "loss": 3.1567, "step": 4653 }, { "epoch": 1.35, "learning_rate": 0.0002548915505216333, "loss": 3.2298, "step": 4654 }, { "epoch": 1.35, "learning_rate": 0.00025468711113416944, "loss": 3.0566, "step": 4655 }, { "epoch": 1.35, "learning_rate": 0.00025448272574614015, "loss": 3.2068, "step": 4656 }, { "epoch": 1.35, "learning_rate": 0.0002542783944025361, "loss": 3.1994, "step": 4657 }, { "epoch": 1.35, "learning_rate": 0.0002540741171483356, "loss": 3.1709, "step": 4658 }, { "epoch": 1.35, "learning_rate": 0.00025386989402850486, "loss": 3.1098, "step": 4659 }, { "epoch": 1.35, "learning_rate": 0.00025366572508799884, "loss": 3.0496, "step": 4660 }, { "epoch": 1.35, "learning_rate": 0.00025346161037175977, "loss": 3.1481, "step": 4661 }, { "epoch": 1.35, "learning_rate": 0.00025325754992471883, "loss": 3.2612, "step": 4662 }, { "epoch": 1.35, "learning_rate": 0.0002530535437917942, "loss": 3.2384, "step": 4663 }, { "epoch": 1.35, "learning_rate": 0.00025284959201789313, "loss": 3.1182, "step": 4664 }, { "epoch": 1.35, "learning_rate": 0.00025264569464791023, "loss": 3.2053, "step": 4665 }, { "epoch": 1.35, "learning_rate": 0.00025244185172672827, "loss": 3.1141, "step": 4666 }, { "epoch": 1.35, "learning_rate": 0.0002522380632992185, "loss": 3.3139, "step": 4667 }, { "epoch": 1.35, "learning_rate": 0.0002520343294102394, "loss": 3.0521, "step": 4668 }, { "epoch": 1.35, "learning_rate": 0.0002518306501046381, "loss": 3.1451, "step": 4669 }, { "epoch": 1.35, "learning_rate": 0.00025162702542724926, "loss": 3.1, "step": 4670 }, { "epoch": 1.35, "learning_rate": 0.0002514234554228959, "loss": 3.2367, "step": 4671 }, { "epoch": 1.35, "learning_rate": 0.0002512199401363886, "loss": 3.0206, "step": 4672 }, { "epoch": 1.35, "learning_rate": 0.00025101647961252635, "loss": 3.147, "step": 4673 }, { "epoch": 1.35, "learning_rate": 0.0002508130738960955, "loss": 3.2224, "step": 4674 }, { "epoch": 1.35, "learning_rate": 0.0002506097230318709, "loss": 3.2643, "step": 4675 }, { "epoch": 1.35, "learning_rate": 0.00025040642706461533, "loss": 3.1765, "step": 4676 }, { "epoch": 1.35, "learning_rate": 0.0002502031860390788, "loss": 3.1892, "step": 4677 }, { "epoch": 1.35, "learning_rate": 0.0002500000000000001, "loss": 3.3372, "step": 4678 }, { "epoch": 1.36, "learning_rate": 0.00024979686899210515, "loss": 3.2664, "step": 4679 }, { "epoch": 1.36, "learning_rate": 0.0002495937930601084, "loss": 3.1426, "step": 4680 }, { "epoch": 1.36, "learning_rate": 0.00024939077224871164, "loss": 3.1751, "step": 4681 }, { "epoch": 1.36, "learning_rate": 0.00024918780660260507, "loss": 3.1972, "step": 4682 }, { "epoch": 1.36, "learning_rate": 0.00024898489616646607, "loss": 3.1065, "step": 4683 }, { "epoch": 1.36, "learning_rate": 0.00024878204098496045, "loss": 3.1524, "step": 4684 }, { "epoch": 1.36, "learning_rate": 0.00024857924110274185, "loss": 3.1291, "step": 4685 }, { "epoch": 1.36, "learning_rate": 0.00024837649656445115, "loss": 3.3093, "step": 4686 }, { "epoch": 1.36, "learning_rate": 0.0002481738074147178, "loss": 3.2211, "step": 4687 }, { "epoch": 1.36, "learning_rate": 0.0002479711736981584, "loss": 3.03, "step": 4688 }, { "epoch": 1.36, "learning_rate": 0.00024776859545937787, "loss": 3.2136, "step": 4689 }, { "epoch": 1.36, "learning_rate": 0.00024756607274296845, "loss": 3.0667, "step": 4690 }, { "epoch": 1.36, "learning_rate": 0.0002473636055935105, "loss": 3.1628, "step": 4691 }, { "epoch": 1.36, "learning_rate": 0.00024716119405557215, "loss": 3.2409, "step": 4692 }, { "epoch": 1.36, "learning_rate": 0.0002469588381737093, "loss": 3.1614, "step": 4693 }, { "epoch": 1.36, "learning_rate": 0.0002467565379924653, "loss": 3.1096, "step": 4694 }, { "epoch": 1.36, "learning_rate": 0.0002465542935563712, "loss": 3.1729, "step": 4695 }, { "epoch": 1.36, "learning_rate": 0.0002463521049099465, "loss": 3.2557, "step": 4696 }, { "epoch": 1.36, "learning_rate": 0.0002461499720976974, "loss": 3.1858, "step": 4697 }, { "epoch": 1.36, "learning_rate": 0.0002459478951641188, "loss": 3.2453, "step": 4698 }, { "epoch": 1.36, "learning_rate": 0.0002457458741536924, "loss": 3.1961, "step": 4699 }, { "epoch": 1.36, "learning_rate": 0.0002455439091108883, "loss": 3.1637, "step": 4700 }, { "epoch": 1.36, "learning_rate": 0.00024534200008016397, "loss": 3.1196, "step": 4701 }, { "epoch": 1.36, "learning_rate": 0.00024514014710596464, "loss": 3.2624, "step": 4702 }, { "epoch": 1.36, "learning_rate": 0.00024493835023272313, "loss": 3.0744, "step": 4703 }, { "epoch": 1.36, "learning_rate": 0.0002447366095048597, "loss": 3.2355, "step": 4704 }, { "epoch": 1.36, "learning_rate": 0.0002445349249667828, "loss": 3.1455, "step": 4705 }, { "epoch": 1.36, "learning_rate": 0.00024433329666288773, "loss": 3.2215, "step": 4706 }, { "epoch": 1.36, "learning_rate": 0.0002441317246375584, "loss": 3.1803, "step": 4707 }, { "epoch": 1.36, "learning_rate": 0.00024393020893516537, "loss": 3.2079, "step": 4708 }, { "epoch": 1.36, "learning_rate": 0.00024372874960006742, "loss": 3.0395, "step": 4709 }, { "epoch": 1.36, "learning_rate": 0.0002435273466766107, "loss": 3.2556, "step": 4710 }, { "epoch": 1.36, "learning_rate": 0.00024332600020912916, "loss": 3.0952, "step": 4711 }, { "epoch": 1.36, "learning_rate": 0.000243124710241944, "loss": 3.0976, "step": 4712 }, { "epoch": 1.36, "learning_rate": 0.00024292347681936395, "loss": 3.1068, "step": 4713 }, { "epoch": 1.37, "learning_rate": 0.00024272229998568574, "loss": 3.0871, "step": 4714 }, { "epoch": 1.37, "learning_rate": 0.00024252117978519304, "loss": 3.1564, "step": 4715 }, { "epoch": 1.37, "learning_rate": 0.00024232011626215783, "loss": 3.0593, "step": 4716 }, { "epoch": 1.37, "learning_rate": 0.0002421191094608387, "loss": 3.1595, "step": 4717 }, { "epoch": 1.37, "learning_rate": 0.0002419181594254824, "loss": 3.1244, "step": 4718 }, { "epoch": 1.37, "learning_rate": 0.00024171726620032309, "loss": 3.2051, "step": 4719 }, { "epoch": 1.37, "learning_rate": 0.00024151642982958243, "loss": 3.1475, "step": 4720 }, { "epoch": 1.37, "learning_rate": 0.00024131565035746932, "loss": 3.1166, "step": 4721 }, { "epoch": 1.37, "learning_rate": 0.00024111492782818013, "loss": 3.2291, "step": 4722 }, { "epoch": 1.37, "learning_rate": 0.0002409142622858992, "loss": 3.1165, "step": 4723 }, { "epoch": 1.37, "learning_rate": 0.00024071365377479766, "loss": 3.1898, "step": 4724 }, { "epoch": 1.37, "learning_rate": 0.00024051310233903466, "loss": 3.1328, "step": 4725 }, { "epoch": 1.37, "learning_rate": 0.00024031260802275623, "loss": 3.177, "step": 4726 }, { "epoch": 1.37, "learning_rate": 0.00024011217087009635, "loss": 3.0635, "step": 4727 }, { "epoch": 1.37, "learning_rate": 0.00023991179092517612, "loss": 3.2857, "step": 4728 }, { "epoch": 1.37, "learning_rate": 0.00023971146823210438, "loss": 3.2692, "step": 4729 }, { "epoch": 1.37, "learning_rate": 0.00023951120283497668, "loss": 3.1513, "step": 4730 }, { "epoch": 1.37, "learning_rate": 0.0002393109947778768, "loss": 3.1462, "step": 4731 }, { "epoch": 1.37, "learning_rate": 0.00023911084410487533, "loss": 3.0955, "step": 4732 }, { "epoch": 1.37, "learning_rate": 0.0002389107508600301, "loss": 3.2386, "step": 4733 }, { "epoch": 1.37, "learning_rate": 0.000238710715087387, "loss": 3.0783, "step": 4734 }, { "epoch": 1.37, "learning_rate": 0.00023851073683097852, "loss": 3.2868, "step": 4735 }, { "epoch": 1.37, "learning_rate": 0.00023831081613482496, "loss": 3.2019, "step": 4736 }, { "epoch": 1.37, "learning_rate": 0.00023811095304293383, "loss": 3.0669, "step": 4737 }, { "epoch": 1.37, "learning_rate": 0.0002379111475993001, "loss": 3.0716, "step": 4738 }, { "epoch": 1.37, "learning_rate": 0.0002377113998479056, "loss": 3.2232, "step": 4739 }, { "epoch": 1.37, "learning_rate": 0.00023751170983271996, "loss": 3.1723, "step": 4740 }, { "epoch": 1.37, "learning_rate": 0.00023731207759769985, "loss": 3.2145, "step": 4741 }, { "epoch": 1.37, "learning_rate": 0.00023711250318678907, "loss": 3.1971, "step": 4742 }, { "epoch": 1.37, "learning_rate": 0.00023691298664391903, "loss": 3.2075, "step": 4743 }, { "epoch": 1.37, "learning_rate": 0.00023671352801300823, "loss": 3.0842, "step": 4744 }, { "epoch": 1.37, "learning_rate": 0.0002365141273379627, "loss": 3.0985, "step": 4745 }, { "epoch": 1.37, "learning_rate": 0.00023631478466267498, "loss": 3.1266, "step": 4746 }, { "epoch": 1.37, "learning_rate": 0.00023611550003102582, "loss": 3.2164, "step": 4747 }, { "epoch": 1.38, "learning_rate": 0.00023591627348688228, "loss": 3.2108, "step": 4748 }, { "epoch": 1.38, "learning_rate": 0.00023571710507409944, "loss": 3.1917, "step": 4749 }, { "epoch": 1.38, "learning_rate": 0.0002355179948365189, "loss": 3.177, "step": 4750 }, { "epoch": 1.38, "learning_rate": 0.00023531894281796973, "loss": 3.2635, "step": 4751 }, { "epoch": 1.38, "learning_rate": 0.0002351199490622683, "loss": 3.066, "step": 4752 }, { "epoch": 1.38, "learning_rate": 0.00023492101361321805, "loss": 3.2129, "step": 4753 }, { "epoch": 1.38, "learning_rate": 0.00023472213651460976, "loss": 3.209, "step": 4754 }, { "epoch": 1.38, "learning_rate": 0.0002345233178102209, "loss": 3.3059, "step": 4755 }, { "epoch": 1.38, "learning_rate": 0.00023432455754381675, "loss": 3.1397, "step": 4756 }, { "epoch": 1.38, "learning_rate": 0.0002341258557591489, "loss": 3.1549, "step": 4757 }, { "epoch": 1.38, "learning_rate": 0.00023392721249995696, "loss": 3.1978, "step": 4758 }, { "epoch": 1.38, "learning_rate": 0.00023372862780996707, "loss": 3.2501, "step": 4759 }, { "epoch": 1.38, "learning_rate": 0.0002335301017328924, "loss": 3.1446, "step": 4760 }, { "epoch": 1.38, "learning_rate": 0.00023333163431243364, "loss": 3.137, "step": 4761 }, { "epoch": 1.38, "learning_rate": 0.0002331332255922784, "loss": 3.1342, "step": 4762 }, { "epoch": 1.38, "learning_rate": 0.0002329348756161015, "loss": 3.1787, "step": 4763 }, { "epoch": 1.38, "learning_rate": 0.0002327365844275643, "loss": 3.2193, "step": 4764 }, { "epoch": 1.38, "learning_rate": 0.00023253835207031603, "loss": 3.2207, "step": 4765 }, { "epoch": 1.38, "learning_rate": 0.00023234017858799212, "loss": 3.1829, "step": 4766 }, { "epoch": 1.38, "learning_rate": 0.00023214206402421585, "loss": 3.0381, "step": 4767 }, { "epoch": 1.38, "learning_rate": 0.0002319440084225969, "loss": 3.0953, "step": 4768 }, { "epoch": 1.38, "learning_rate": 0.00023174601182673204, "loss": 3.0973, "step": 4769 }, { "epoch": 1.38, "learning_rate": 0.0002315480742802054, "loss": 3.2272, "step": 4770 }, { "epoch": 1.38, "learning_rate": 0.000231350195826588, "loss": 3.1671, "step": 4771 }, { "epoch": 1.38, "learning_rate": 0.00023115237650943792, "loss": 3.1089, "step": 4772 }, { "epoch": 1.38, "learning_rate": 0.0002309546163722997, "loss": 3.2299, "step": 4773 }, { "epoch": 1.38, "learning_rate": 0.0002307569154587056, "loss": 3.3094, "step": 4774 }, { "epoch": 1.38, "learning_rate": 0.00023055927381217413, "loss": 3.1787, "step": 4775 }, { "epoch": 1.38, "learning_rate": 0.0002303616914762115, "loss": 3.2224, "step": 4776 }, { "epoch": 1.38, "learning_rate": 0.0002301641684943102, "loss": 3.2235, "step": 4777 }, { "epoch": 1.38, "learning_rate": 0.00022996670490994987, "loss": 3.1893, "step": 4778 }, { "epoch": 1.38, "learning_rate": 0.00022976930076659718, "loss": 3.1835, "step": 4779 }, { "epoch": 1.38, "learning_rate": 0.00022957195610770576, "loss": 3.0309, "step": 4780 }, { "epoch": 1.38, "learning_rate": 0.0002293746709767162, "loss": 3.0467, "step": 4781 }, { "epoch": 1.38, "learning_rate": 0.00022917744541705543, "loss": 3.2067, "step": 4782 }, { "epoch": 1.39, "learning_rate": 0.00022898027947213813, "loss": 3.1588, "step": 4783 }, { "epoch": 1.39, "learning_rate": 0.000228783173185365, "loss": 3.1309, "step": 4784 }, { "epoch": 1.39, "learning_rate": 0.00022858612660012434, "loss": 3.2307, "step": 4785 }, { "epoch": 1.39, "learning_rate": 0.0002283891397597908, "loss": 3.055, "step": 4786 }, { "epoch": 1.39, "learning_rate": 0.0002281922127077259, "loss": 3.2264, "step": 4787 }, { "epoch": 1.39, "learning_rate": 0.00022799534548727834, "loss": 3.1983, "step": 4788 }, { "epoch": 1.39, "learning_rate": 0.00022779853814178347, "loss": 3.1529, "step": 4789 }, { "epoch": 1.39, "learning_rate": 0.00022760179071456356, "loss": 3.0728, "step": 4790 }, { "epoch": 1.39, "learning_rate": 0.00022740510324892733, "loss": 3.2164, "step": 4791 }, { "epoch": 1.39, "learning_rate": 0.0002272084757881709, "loss": 3.1414, "step": 4792 }, { "epoch": 1.39, "learning_rate": 0.0002270119083755764, "loss": 3.1329, "step": 4793 }, { "epoch": 1.39, "learning_rate": 0.00022681540105441356, "loss": 3.082, "step": 4794 }, { "epoch": 1.39, "learning_rate": 0.00022661895386793824, "loss": 3.0959, "step": 4795 }, { "epoch": 1.39, "learning_rate": 0.00022642256685939338, "loss": 3.199, "step": 4796 }, { "epoch": 1.39, "learning_rate": 0.00022622624007200887, "loss": 3.1239, "step": 4797 }, { "epoch": 1.39, "learning_rate": 0.00022602997354900073, "loss": 3.2793, "step": 4798 }, { "epoch": 1.39, "learning_rate": 0.0002258337673335724, "loss": 3.1101, "step": 4799 }, { "epoch": 1.39, "learning_rate": 0.00022563762146891344, "loss": 3.139, "step": 4800 }, { "epoch": 1.39, "learning_rate": 0.0002254415359982006, "loss": 3.1757, "step": 4801 }, { "epoch": 1.39, "learning_rate": 0.000225245510964597, "loss": 3.1062, "step": 4802 }, { "epoch": 1.39, "learning_rate": 0.0002250495464112528, "loss": 3.2204, "step": 4803 }, { "epoch": 1.39, "learning_rate": 0.00022485364238130436, "loss": 3.2032, "step": 4804 }, { "epoch": 1.39, "learning_rate": 0.00022465779891787512, "loss": 3.2247, "step": 4805 }, { "epoch": 1.39, "learning_rate": 0.00022446201606407534, "loss": 3.2433, "step": 4806 }, { "epoch": 1.39, "learning_rate": 0.00022426629386300124, "loss": 3.0789, "step": 4807 }, { "epoch": 1.39, "learning_rate": 0.00022407063235773645, "loss": 3.2201, "step": 4808 }, { "epoch": 1.39, "learning_rate": 0.00022387503159135064, "loss": 3.087, "step": 4809 }, { "epoch": 1.39, "learning_rate": 0.0002236794916069007, "loss": 3.1565, "step": 4810 }, { "epoch": 1.39, "learning_rate": 0.00022348401244742943, "loss": 3.1342, "step": 4811 }, { "epoch": 1.39, "learning_rate": 0.00022328859415596703, "loss": 3.2827, "step": 4812 }, { "epoch": 1.39, "learning_rate": 0.00022309323677552944, "loss": 3.1224, "step": 4813 }, { "epoch": 1.39, "learning_rate": 0.00022289794034911997, "loss": 3.2083, "step": 4814 }, { "epoch": 1.39, "learning_rate": 0.00022270270491972833, "loss": 3.0468, "step": 4815 }, { "epoch": 1.39, "learning_rate": 0.00022250753053033024, "loss": 3.082, "step": 4816 }, { "epoch": 1.4, "learning_rate": 0.00022231241722388883, "loss": 3.1906, "step": 4817 }, { "epoch": 1.4, "learning_rate": 0.00022211736504335305, "loss": 3.3461, "step": 4818 }, { "epoch": 1.4, "learning_rate": 0.000221922374031659, "loss": 3.2171, "step": 4819 }, { "epoch": 1.4, "learning_rate": 0.00022172744423172865, "loss": 3.2047, "step": 4820 }, { "epoch": 1.4, "learning_rate": 0.00022153257568647124, "loss": 3.15, "step": 4821 }, { "epoch": 1.4, "learning_rate": 0.00022133776843878184, "loss": 3.1991, "step": 4822 }, { "epoch": 1.4, "learning_rate": 0.0002211430225315425, "loss": 3.1327, "step": 4823 }, { "epoch": 1.4, "learning_rate": 0.0002209483380076218, "loss": 3.1133, "step": 4824 }, { "epoch": 1.4, "learning_rate": 0.00022075371490987424, "loss": 3.0977, "step": 4825 }, { "epoch": 1.4, "learning_rate": 0.00022055915328114158, "loss": 3.1069, "step": 4826 }, { "epoch": 1.4, "learning_rate": 0.0002203646531642512, "loss": 3.1309, "step": 4827 }, { "epoch": 1.4, "learning_rate": 0.00022017021460201785, "loss": 3.0878, "step": 4828 }, { "epoch": 1.4, "learning_rate": 0.00021997583763724187, "loss": 3.1742, "step": 4829 }, { "epoch": 1.4, "learning_rate": 0.00021978152231271077, "loss": 3.0155, "step": 4830 }, { "epoch": 1.4, "learning_rate": 0.00021958726867119782, "loss": 3.2554, "step": 4831 }, { "epoch": 1.4, "learning_rate": 0.00021939307675546322, "loss": 3.2394, "step": 4832 }, { "epoch": 1.4, "learning_rate": 0.00021919894660825363, "loss": 3.0463, "step": 4833 }, { "epoch": 1.4, "learning_rate": 0.00021900487827230148, "loss": 3.134, "step": 4834 }, { "epoch": 1.4, "learning_rate": 0.00021881087179032643, "loss": 3.1376, "step": 4835 }, { "epoch": 1.4, "learning_rate": 0.00021861692720503368, "loss": 3.1092, "step": 4836 }, { "epoch": 1.4, "learning_rate": 0.00021842304455911565, "loss": 3.1688, "step": 4837 }, { "epoch": 1.4, "learning_rate": 0.00021822922389525036, "loss": 3.1174, "step": 4838 }, { "epoch": 1.4, "learning_rate": 0.0002180354652561028, "loss": 3.147, "step": 4839 }, { "epoch": 1.4, "learning_rate": 0.00021784176868432377, "loss": 3.1493, "step": 4840 }, { "epoch": 1.4, "learning_rate": 0.0002176481342225508, "loss": 3.0886, "step": 4841 }, { "epoch": 1.4, "learning_rate": 0.0002174545619134079, "loss": 3.1193, "step": 4842 }, { "epoch": 1.4, "learning_rate": 0.0002172610517995046, "loss": 3.0278, "step": 4843 }, { "epoch": 1.4, "learning_rate": 0.00021706760392343778, "loss": 3.2115, "step": 4844 }, { "epoch": 1.4, "learning_rate": 0.00021687421832778964, "loss": 3.1312, "step": 4845 }, { "epoch": 1.4, "learning_rate": 0.00021668089505512956, "loss": 3.0539, "step": 4846 }, { "epoch": 1.4, "learning_rate": 0.00021668089505512956, "loss": 3.236, "step": 4847 }, { "epoch": 1.4, "learning_rate": 0.0002164876341480123, "loss": 3.1148, "step": 4848 }, { "epoch": 1.4, "learning_rate": 0.0002162944356489797, "loss": 3.144, "step": 4849 }, { "epoch": 1.4, "learning_rate": 0.00021610129960055946, "loss": 2.9799, "step": 4850 }, { "epoch": 1.4, "learning_rate": 0.00021590822604526576, "loss": 3.0892, "step": 4851 }, { "epoch": 1.41, "learning_rate": 0.00021571521502559876, "loss": 3.0612, "step": 4852 }, { "epoch": 1.41, "learning_rate": 0.00021552226658404466, "loss": 3.1319, "step": 4853 }, { "epoch": 1.41, "learning_rate": 0.0002153293807630766, "loss": 3.1463, "step": 4854 }, { "epoch": 1.41, "learning_rate": 0.0002151365576051532, "loss": 3.0752, "step": 4855 }, { "epoch": 1.41, "learning_rate": 0.00021494379715272, "loss": 3.2536, "step": 4856 }, { "epoch": 1.41, "learning_rate": 0.0002147510994482078, "loss": 3.1966, "step": 4857 }, { "epoch": 1.41, "learning_rate": 0.00021455846453403456, "loss": 3.1548, "step": 4858 }, { "epoch": 1.41, "learning_rate": 0.00021436589245260373, "loss": 3.2177, "step": 4859 }, { "epoch": 1.41, "learning_rate": 0.00021417338324630558, "loss": 3.1875, "step": 4860 }, { "epoch": 1.41, "learning_rate": 0.00021398093695751587, "loss": 3.1237, "step": 4861 }, { "epoch": 1.41, "learning_rate": 0.00021378855362859663, "loss": 3.1576, "step": 4862 }, { "epoch": 1.41, "learning_rate": 0.00021359623330189653, "loss": 3.186, "step": 4863 }, { "epoch": 1.41, "learning_rate": 0.0002134039760197497, "loss": 3.1699, "step": 4864 }, { "epoch": 1.41, "learning_rate": 0.00021321178182447708, "loss": 3.1358, "step": 4865 }, { "epoch": 1.41, "learning_rate": 0.00021301965075838498, "loss": 3.1838, "step": 4866 }, { "epoch": 1.41, "learning_rate": 0.0002128275828637664, "loss": 3.0968, "step": 4867 }, { "epoch": 1.41, "learning_rate": 0.00021263557818290024, "loss": 3.1526, "step": 4868 }, { "epoch": 1.41, "learning_rate": 0.00021244363675805168, "loss": 3.0926, "step": 4869 }, { "epoch": 1.41, "learning_rate": 0.0002122517586314715, "loss": 3.1612, "step": 4870 }, { "epoch": 1.41, "learning_rate": 0.0002120599438453968, "loss": 3.0952, "step": 4871 }, { "epoch": 1.41, "learning_rate": 0.0002118681924420509, "loss": 3.124, "step": 4872 }, { "epoch": 1.41, "learning_rate": 0.0002116765044636429, "loss": 3.2084, "step": 4873 }, { "epoch": 1.41, "learning_rate": 0.0002114848799523683, "loss": 3.3064, "step": 4874 }, { "epoch": 1.41, "learning_rate": 0.00021129331895040803, "loss": 3.1184, "step": 4875 }, { "epoch": 1.41, "learning_rate": 0.00021110182149992963, "loss": 3.047, "step": 4876 }, { "epoch": 1.41, "learning_rate": 0.0002109103876430864, "loss": 3.1922, "step": 4877 }, { "epoch": 1.41, "learning_rate": 0.00021071901742201782, "loss": 3.1065, "step": 4878 }, { "epoch": 1.41, "learning_rate": 0.0002105277108788491, "loss": 3.1321, "step": 4879 }, { "epoch": 1.41, "learning_rate": 0.00021033646805569133, "loss": 3.1027, "step": 4880 }, { "epoch": 1.41, "learning_rate": 0.00021014528899464214, "loss": 3.148, "step": 4881 }, { "epoch": 1.41, "learning_rate": 0.00020995417373778435, "loss": 3.1258, "step": 4882 }, { "epoch": 1.41, "learning_rate": 0.00020976312232718765, "loss": 3.115, "step": 4883 }, { "epoch": 1.41, "learning_rate": 0.00020957213480490673, "loss": 3.2527, "step": 4884 }, { "epoch": 1.41, "learning_rate": 0.0002093812112129828, "loss": 3.0708, "step": 4885 }, { "epoch": 1.42, "learning_rate": 0.00020919035159344302, "loss": 3.1619, "step": 4886 }, { "epoch": 1.42, "learning_rate": 0.0002089995559883004, "loss": 3.0303, "step": 4887 }, { "epoch": 1.42, "learning_rate": 0.00020880882443955352, "loss": 3.0762, "step": 4888 }, { "epoch": 1.42, "learning_rate": 0.00020861815698918707, "loss": 3.2248, "step": 4889 }, { "epoch": 1.42, "learning_rate": 0.00020842755367917194, "loss": 3.1526, "step": 4890 }, { "epoch": 1.42, "learning_rate": 0.00020823701455146431, "loss": 3.1506, "step": 4891 }, { "epoch": 1.42, "learning_rate": 0.0002080465396480069, "loss": 3.2438, "step": 4892 }, { "epoch": 1.42, "learning_rate": 0.00020785612901072763, "loss": 3.2399, "step": 4893 }, { "epoch": 1.42, "learning_rate": 0.00020766578268154072, "loss": 3.1575, "step": 4894 }, { "epoch": 1.42, "learning_rate": 0.0002074755007023461, "loss": 3.1926, "step": 4895 }, { "epoch": 1.42, "learning_rate": 0.00020728528311502974, "loss": 3.0671, "step": 4896 }, { "epoch": 1.42, "learning_rate": 0.00020709512996146314, "loss": 3.05, "step": 4897 }, { "epoch": 1.42, "learning_rate": 0.00020690504128350335, "loss": 3.2301, "step": 4898 }, { "epoch": 1.42, "learning_rate": 0.00020671501712299413, "loss": 3.1826, "step": 4899 }, { "epoch": 1.42, "learning_rate": 0.00020652505752176398, "loss": 3.0602, "step": 4900 }, { "epoch": 1.42, "learning_rate": 0.00020633516252162803, "loss": 3.1879, "step": 4901 }, { "epoch": 1.42, "learning_rate": 0.00020614533216438673, "loss": 3.1159, "step": 4902 }, { "epoch": 1.42, "learning_rate": 0.00020595556649182677, "loss": 3.2745, "step": 4903 }, { "epoch": 1.42, "learning_rate": 0.0002057658655457198, "loss": 3.1159, "step": 4904 }, { "epoch": 1.42, "learning_rate": 0.00020557622936782412, "loss": 3.2713, "step": 4905 }, { "epoch": 1.42, "learning_rate": 0.00020538665799988304, "loss": 3.1568, "step": 4906 }, { "epoch": 1.42, "learning_rate": 0.00020519715148362584, "loss": 3.0868, "step": 4907 }, { "epoch": 1.42, "learning_rate": 0.00020500770986076796, "loss": 3.1231, "step": 4908 }, { "epoch": 1.42, "learning_rate": 0.00020481833317300975, "loss": 3.0659, "step": 4909 }, { "epoch": 1.42, "learning_rate": 0.00020462902146203794, "loss": 3.2051, "step": 4910 }, { "epoch": 1.42, "learning_rate": 0.00020443977476952469, "loss": 3.2312, "step": 4911 }, { "epoch": 1.42, "learning_rate": 0.00020425059313712807, "loss": 3.0831, "step": 4912 }, { "epoch": 1.42, "learning_rate": 0.00020406147660649132, "loss": 3.1583, "step": 4913 }, { "epoch": 1.42, "learning_rate": 0.00020387242521924392, "loss": 3.03, "step": 4914 }, { "epoch": 1.42, "learning_rate": 0.00020368343901700054, "loss": 3.1212, "step": 4915 }, { "epoch": 1.42, "learning_rate": 0.00020349451804136192, "loss": 3.0455, "step": 4916 }, { "epoch": 1.42, "learning_rate": 0.00020330566233391417, "loss": 3.1794, "step": 4917 }, { "epoch": 1.42, "learning_rate": 0.0002031168719362289, "loss": 2.9961, "step": 4918 }, { "epoch": 1.42, "learning_rate": 0.00020292814688986372, "loss": 3.1809, "step": 4919 }, { "epoch": 1.42, "learning_rate": 0.00020273948723636165, "loss": 3.2329, "step": 4920 }, { "epoch": 1.43, "learning_rate": 0.0002025508930172516, "loss": 3.2684, "step": 4921 }, { "epoch": 1.43, "learning_rate": 0.00020236236427404735, "loss": 3.1181, "step": 4922 }, { "epoch": 1.43, "learning_rate": 0.00020217390104824924, "loss": 3.2072, "step": 4923 }, { "epoch": 1.43, "learning_rate": 0.00020198550338134218, "loss": 3.2338, "step": 4924 }, { "epoch": 1.43, "learning_rate": 0.0002017971713147977, "loss": 3.1974, "step": 4925 }, { "epoch": 1.43, "learning_rate": 0.00020160890489007193, "loss": 3.1437, "step": 4926 }, { "epoch": 1.43, "learning_rate": 0.00020142070414860702, "loss": 3.1719, "step": 4927 }, { "epoch": 1.43, "learning_rate": 0.00020123256913183064, "loss": 3.1549, "step": 4928 }, { "epoch": 1.43, "learning_rate": 0.00020104449988115597, "loss": 3.0832, "step": 4929 }, { "epoch": 1.43, "learning_rate": 0.000200856496437982, "loss": 3.2204, "step": 4930 }, { "epoch": 1.43, "learning_rate": 0.00020066855884369244, "loss": 3.2044, "step": 4931 }, { "epoch": 1.43, "learning_rate": 0.00020048068713965743, "loss": 3.3526, "step": 4932 }, { "epoch": 1.43, "learning_rate": 0.00020029288136723177, "loss": 3.0929, "step": 4933 }, { "epoch": 1.43, "learning_rate": 0.0002001051415677566, "loss": 3.2433, "step": 4934 }, { "epoch": 1.43, "learning_rate": 0.00019991746778255786, "loss": 3.1197, "step": 4935 }, { "epoch": 1.43, "learning_rate": 0.00019972986005294707, "loss": 3.0751, "step": 4936 }, { "epoch": 1.43, "learning_rate": 0.0001995423184202214, "loss": 3.1883, "step": 4937 }, { "epoch": 1.43, "learning_rate": 0.00019935484292566346, "loss": 3.0802, "step": 4938 }, { "epoch": 1.43, "learning_rate": 0.00019916743361054145, "loss": 3.0523, "step": 4939 }, { "epoch": 1.43, "learning_rate": 0.00019898009051610844, "loss": 3.1174, "step": 4940 }, { "epoch": 1.43, "learning_rate": 0.00019879281368360359, "loss": 3.15, "step": 4941 }, { "epoch": 1.43, "learning_rate": 0.00019860560315425086, "loss": 3.2163, "step": 4942 }, { "epoch": 1.43, "learning_rate": 0.0001984184589692602, "loss": 3.1737, "step": 4943 }, { "epoch": 1.43, "learning_rate": 0.00019823138116982652, "loss": 3.1692, "step": 4944 }, { "epoch": 1.43, "learning_rate": 0.0001980443697971302, "loss": 3.146, "step": 4945 }, { "epoch": 1.43, "learning_rate": 0.0001978574248923371, "loss": 3.1343, "step": 4946 }, { "epoch": 1.43, "learning_rate": 0.00019767054649659849, "loss": 3.1502, "step": 4947 }, { "epoch": 1.43, "learning_rate": 0.00019748373465105107, "loss": 3.0044, "step": 4948 }, { "epoch": 1.43, "learning_rate": 0.00019729698939681645, "loss": 3.2191, "step": 4949 }, { "epoch": 1.43, "learning_rate": 0.00019711031077500218, "loss": 3.2671, "step": 4950 }, { "epoch": 1.43, "learning_rate": 0.00019692369882670053, "loss": 3.0781, "step": 4951 }, { "epoch": 1.43, "learning_rate": 0.00019673715359298972, "loss": 3.1761, "step": 4952 }, { "epoch": 1.43, "learning_rate": 0.00019655067511493263, "loss": 3.2169, "step": 4953 }, { "epoch": 1.43, "learning_rate": 0.00019636426343357794, "loss": 3.1785, "step": 4954 }, { "epoch": 1.44, "learning_rate": 0.0001961779185899597, "loss": 3.2021, "step": 4955 }, { "epoch": 1.44, "learning_rate": 0.00019599164062509666, "loss": 3.2001, "step": 4956 }, { "epoch": 1.44, "learning_rate": 0.00019580542957999353, "loss": 3.1405, "step": 4957 }, { "epoch": 1.44, "learning_rate": 0.00019561928549563967, "loss": 3.2605, "step": 4958 }, { "epoch": 1.44, "learning_rate": 0.00019543320841301028, "loss": 3.0594, "step": 4959 }, { "epoch": 1.44, "learning_rate": 0.00019524719837306514, "loss": 3.1016, "step": 4960 }, { "epoch": 1.44, "learning_rate": 0.00019506125541675018, "loss": 3.0993, "step": 4961 }, { "epoch": 1.44, "learning_rate": 0.00019487537958499552, "loss": 3.1634, "step": 4962 }, { "epoch": 1.44, "learning_rate": 0.0001946895709187173, "loss": 3.1763, "step": 4963 }, { "epoch": 1.44, "learning_rate": 0.00019450382945881674, "loss": 3.2498, "step": 4964 }, { "epoch": 1.44, "learning_rate": 0.0001943181552461798, "loss": 3.1754, "step": 4965 }, { "epoch": 1.44, "learning_rate": 0.0001941325483216783, "loss": 3.099, "step": 4966 }, { "epoch": 1.44, "learning_rate": 0.00019394700872616856, "loss": 3.1702, "step": 4967 }, { "epoch": 1.44, "learning_rate": 0.00019376153650049283, "loss": 3.1546, "step": 4968 }, { "epoch": 1.44, "learning_rate": 0.00019357613168547773, "loss": 2.973, "step": 4969 }, { "epoch": 1.44, "learning_rate": 0.0001933907943219358, "loss": 3.1945, "step": 4970 }, { "epoch": 1.44, "learning_rate": 0.00019320552445066408, "loss": 3.1176, "step": 4971 }, { "epoch": 1.44, "learning_rate": 0.0001930203221124452, "loss": 3.2118, "step": 4972 }, { "epoch": 1.44, "learning_rate": 0.00019283518734804694, "loss": 3.2346, "step": 4973 }, { "epoch": 1.44, "learning_rate": 0.00019265012019822165, "loss": 3.1857, "step": 4974 }, { "epoch": 1.44, "learning_rate": 0.00019246512070370764, "loss": 3.3577, "step": 4975 }, { "epoch": 1.44, "learning_rate": 0.00019228018890522742, "loss": 3.162, "step": 4976 }, { "epoch": 1.44, "learning_rate": 0.00019209532484348947, "loss": 3.1579, "step": 4977 }, { "epoch": 1.44, "learning_rate": 0.00019191052855918657, "loss": 3.116, "step": 4978 }, { "epoch": 1.44, "learning_rate": 0.00019172580009299735, "loss": 3.0441, "step": 4979 }, { "epoch": 1.44, "learning_rate": 0.00019154113948558466, "loss": 3.2342, "step": 4980 }, { "epoch": 1.44, "learning_rate": 0.0001913565467775971, "loss": 3.1608, "step": 4981 }, { "epoch": 1.44, "learning_rate": 0.00019117202200966832, "loss": 3.2481, "step": 4982 }, { "epoch": 1.44, "learning_rate": 0.00019098756522241634, "loss": 3.0637, "step": 4983 }, { "epoch": 1.44, "learning_rate": 0.00019080317645644507, "loss": 3.1949, "step": 4984 }, { "epoch": 1.44, "learning_rate": 0.00019061885575234272, "loss": 3.0564, "step": 4985 }, { "epoch": 1.44, "learning_rate": 0.00019043460315068306, "loss": 3.1937, "step": 4986 }, { "epoch": 1.44, "learning_rate": 0.00019025041869202448, "loss": 3.218, "step": 4987 }, { "epoch": 1.44, "learning_rate": 0.0001900663024169107, "loss": 2.9805, "step": 4988 }, { "epoch": 1.44, "learning_rate": 0.00018988225436587002, "loss": 3.0926, "step": 4989 }, { "epoch": 1.45, "learning_rate": 0.00018969827457941614, "loss": 3.0241, "step": 4990 }, { "epoch": 1.45, "learning_rate": 0.00018951436309804764, "loss": 3.1611, "step": 4991 }, { "epoch": 1.45, "learning_rate": 0.00018933051996224782, "loss": 3.2516, "step": 4992 }, { "epoch": 1.45, "learning_rate": 0.00018914674521248532, "loss": 3.1694, "step": 4993 }, { "epoch": 1.45, "learning_rate": 0.0001889630388892131, "loss": 3.0599, "step": 4994 }, { "epoch": 1.45, "learning_rate": 0.00018877940103287, "loss": 3.0429, "step": 4995 }, { "epoch": 1.45, "learning_rate": 0.00018859583168387878, "loss": 3.1682, "step": 4996 }, { "epoch": 1.45, "learning_rate": 0.00018841233088264797, "loss": 2.9952, "step": 4997 }, { "epoch": 1.45, "learning_rate": 0.00018822889866957027, "loss": 3.1523, "step": 4998 }, { "epoch": 1.45, "learning_rate": 0.00018804553508502386, "loss": 3.1523, "step": 4999 }, { "epoch": 1.45, "learning_rate": 0.0001878622401693717, "loss": 3.0525, "step": 5000 }, { "epoch": 1.45, "learning_rate": 0.00018767901396296127, "loss": 3.3394, "step": 5001 }, { "epoch": 1.45, "learning_rate": 0.00018749585650612555, "loss": 3.1009, "step": 5002 }, { "epoch": 1.45, "learning_rate": 0.0001873127678391816, "loss": 3.0961, "step": 5003 }, { "epoch": 1.45, "learning_rate": 0.00018712974800243215, "loss": 3.0347, "step": 5004 }, { "epoch": 1.45, "learning_rate": 0.00018694679703616402, "loss": 3.1738, "step": 5005 }, { "epoch": 1.45, "learning_rate": 0.0001867639149806495, "loss": 3.2247, "step": 5006 }, { "epoch": 1.45, "learning_rate": 0.00018658110187614536, "loss": 3.1574, "step": 5007 }, { "epoch": 1.45, "learning_rate": 0.0001863983577628935, "loss": 3.1298, "step": 5008 }, { "epoch": 1.45, "learning_rate": 0.0001862156826811202, "loss": 3.1571, "step": 5009 }, { "epoch": 1.45, "learning_rate": 0.0001860330766710367, "loss": 3.2516, "step": 5010 }, { "epoch": 1.45, "learning_rate": 0.00018585053977283934, "loss": 3.0246, "step": 5011 }, { "epoch": 1.45, "learning_rate": 0.00018566807202670865, "loss": 3.2718, "step": 5012 }, { "epoch": 1.45, "learning_rate": 0.00018548567347281077, "loss": 3.3197, "step": 5013 }, { "epoch": 1.45, "learning_rate": 0.00018530334415129568, "loss": 3.2324, "step": 5014 }, { "epoch": 1.45, "learning_rate": 0.00018512108410229878, "loss": 3.178, "step": 5015 }, { "epoch": 1.45, "learning_rate": 0.00018493889336593995, "loss": 3.091, "step": 5016 }, { "epoch": 1.45, "learning_rate": 0.00018475677198232415, "loss": 3.0006, "step": 5017 }, { "epoch": 1.45, "learning_rate": 0.00018457471999154063, "loss": 3.2251, "step": 5018 }, { "epoch": 1.45, "learning_rate": 0.00018439273743366325, "loss": 3.1379, "step": 5019 }, { "epoch": 1.45, "learning_rate": 0.00018421082434875132, "loss": 3.1103, "step": 5020 }, { "epoch": 1.45, "learning_rate": 0.00018402898077684804, "loss": 3.1625, "step": 5021 }, { "epoch": 1.45, "learning_rate": 0.000183847206757982, "loss": 3.0827, "step": 5022 }, { "epoch": 1.45, "learning_rate": 0.00018366550233216584, "loss": 3.1919, "step": 5023 }, { "epoch": 1.45, "learning_rate": 0.00018348386753939733, "loss": 3.2462, "step": 5024 }, { "epoch": 1.46, "learning_rate": 0.00018330230241965883, "loss": 3.1634, "step": 5025 }, { "epoch": 1.46, "learning_rate": 0.0001831208070129175, "loss": 3.1601, "step": 5026 }, { "epoch": 1.46, "learning_rate": 0.00018293938135912474, "loss": 3.1203, "step": 5027 }, { "epoch": 1.46, "learning_rate": 0.0001827580254982167, "loss": 3.1295, "step": 5028 }, { "epoch": 1.46, "learning_rate": 0.00018257673947011456, "loss": 3.1419, "step": 5029 }, { "epoch": 1.46, "learning_rate": 0.00018239552331472358, "loss": 3.1273, "step": 5030 }, { "epoch": 1.46, "learning_rate": 0.0001822143770719342, "loss": 3.2123, "step": 5031 }, { "epoch": 1.46, "learning_rate": 0.000182033300781621, "loss": 3.2782, "step": 5032 }, { "epoch": 1.46, "learning_rate": 0.00018185229448364337, "loss": 3.0794, "step": 5033 }, { "epoch": 1.46, "learning_rate": 0.0001816713582178453, "loss": 3.2066, "step": 5034 }, { "epoch": 1.46, "learning_rate": 0.00018149049202405553, "loss": 3.2021, "step": 5035 }, { "epoch": 1.46, "learning_rate": 0.00018130969594208702, "loss": 3.1035, "step": 5036 }, { "epoch": 1.46, "learning_rate": 0.0001811289700117373, "loss": 3.2057, "step": 5037 }, { "epoch": 1.46, "learning_rate": 0.0001809483142727889, "loss": 3.0486, "step": 5038 }, { "epoch": 1.46, "learning_rate": 0.0001807677287650083, "loss": 3.0078, "step": 5039 }, { "epoch": 1.46, "learning_rate": 0.00018058721352814721, "loss": 3.0885, "step": 5040 }, { "epoch": 1.46, "learning_rate": 0.00018040676860194116, "loss": 2.9607, "step": 5041 }, { "epoch": 1.46, "learning_rate": 0.0001802263940261107, "loss": 3.0293, "step": 5042 }, { "epoch": 1.46, "learning_rate": 0.00018004608984036069, "loss": 3.1549, "step": 5043 }, { "epoch": 1.46, "learning_rate": 0.0001798658560843807, "loss": 3.1456, "step": 5044 }, { "epoch": 1.46, "learning_rate": 0.00017968569279784463, "loss": 3.0543, "step": 5045 }, { "epoch": 1.46, "learning_rate": 0.0001795056000204105, "loss": 3.1443, "step": 5046 }, { "epoch": 1.46, "learning_rate": 0.0001793255777917217, "loss": 3.0805, "step": 5047 }, { "epoch": 1.46, "learning_rate": 0.00017914562615140505, "loss": 3.1715, "step": 5048 }, { "epoch": 1.46, "learning_rate": 0.00017896574513907287, "loss": 3.1215, "step": 5049 }, { "epoch": 1.46, "learning_rate": 0.00017878593479432104, "loss": 3.2094, "step": 5050 }, { "epoch": 1.46, "learning_rate": 0.0001786061951567303, "loss": 3.1217, "step": 5051 }, { "epoch": 1.46, "learning_rate": 0.00017842652626586598, "loss": 3.2132, "step": 5052 }, { "epoch": 1.46, "learning_rate": 0.00017824692816127775, "loss": 3.1405, "step": 5053 }, { "epoch": 1.46, "learning_rate": 0.00017806740088249933, "loss": 3.1858, "step": 5054 }, { "epoch": 1.46, "learning_rate": 0.00017788794446904906, "loss": 3.1055, "step": 5055 }, { "epoch": 1.46, "learning_rate": 0.00017770855896043002, "loss": 3.0878, "step": 5056 }, { "epoch": 1.46, "learning_rate": 0.00017752924439612912, "loss": 3.104, "step": 5057 }, { "epoch": 1.46, "learning_rate": 0.00017735000081561797, "loss": 3.2313, "step": 5058 }, { "epoch": 1.47, "learning_rate": 0.00017717082825835268, "loss": 3.1515, "step": 5059 }, { "epoch": 1.47, "learning_rate": 0.00017699172676377363, "loss": 2.9477, "step": 5060 }, { "epoch": 1.47, "learning_rate": 0.0001768126963713052, "loss": 3.1655, "step": 5061 }, { "epoch": 1.47, "learning_rate": 0.00017663373712035668, "loss": 3.1422, "step": 5062 }, { "epoch": 1.47, "learning_rate": 0.00017645484905032128, "loss": 3.131, "step": 5063 }, { "epoch": 1.47, "learning_rate": 0.00017627603220057657, "loss": 3.0871, "step": 5064 }, { "epoch": 1.47, "learning_rate": 0.00017609728661048492, "loss": 3.0861, "step": 5065 }, { "epoch": 1.47, "learning_rate": 0.00017591861231939216, "loss": 3.2191, "step": 5066 }, { "epoch": 1.47, "learning_rate": 0.00017574000936662927, "loss": 3.0206, "step": 5067 }, { "epoch": 1.47, "learning_rate": 0.0001755614777915111, "loss": 3.1692, "step": 5068 }, { "epoch": 1.47, "learning_rate": 0.00017538301763333702, "loss": 3.0007, "step": 5069 }, { "epoch": 1.47, "learning_rate": 0.00017520462893139022, "loss": 3.0352, "step": 5070 }, { "epoch": 1.47, "learning_rate": 0.00017502631172493878, "loss": 3.1687, "step": 5071 }, { "epoch": 1.47, "learning_rate": 0.00017484806605323457, "loss": 3.1945, "step": 5072 }, { "epoch": 1.47, "learning_rate": 0.0001746698919555137, "loss": 3.1269, "step": 5073 }, { "epoch": 1.47, "learning_rate": 0.00017449178947099702, "loss": 3.2012, "step": 5074 }, { "epoch": 1.47, "learning_rate": 0.00017431375863888898, "loss": 3.0409, "step": 5075 }, { "epoch": 1.47, "learning_rate": 0.0001741357994983787, "loss": 3.1089, "step": 5076 }, { "epoch": 1.47, "learning_rate": 0.00017395791208863948, "loss": 3.2255, "step": 5077 }, { "epoch": 1.47, "learning_rate": 0.0001737800964488288, "loss": 3.0954, "step": 5078 }, { "epoch": 1.47, "learning_rate": 0.000173602352618088, "loss": 3.0814, "step": 5079 }, { "epoch": 1.47, "learning_rate": 0.00017342468063554322, "loss": 3.1429, "step": 5080 }, { "epoch": 1.47, "learning_rate": 0.00017324708054030431, "loss": 3.0447, "step": 5081 }, { "epoch": 1.47, "learning_rate": 0.00017306955237146522, "loss": 3.0767, "step": 5082 }, { "epoch": 1.47, "learning_rate": 0.00017289209616810469, "loss": 3.1377, "step": 5083 }, { "epoch": 1.47, "learning_rate": 0.00017271471196928485, "loss": 3.1865, "step": 5084 }, { "epoch": 1.47, "learning_rate": 0.00017253739981405253, "loss": 3.1529, "step": 5085 }, { "epoch": 1.47, "learning_rate": 0.00017236015974143848, "loss": 3.1991, "step": 5086 }, { "epoch": 1.47, "learning_rate": 0.00017218299179045787, "loss": 3.1605, "step": 5087 }, { "epoch": 1.47, "learning_rate": 0.00017200589600010934, "loss": 3.0345, "step": 5088 }, { "epoch": 1.47, "learning_rate": 0.00017182887240937644, "loss": 3.0794, "step": 5089 }, { "epoch": 1.47, "learning_rate": 0.00017165192105722622, "loss": 3.2268, "step": 5090 }, { "epoch": 1.47, "learning_rate": 0.00017147504198260994, "loss": 3.1163, "step": 5091 }, { "epoch": 1.47, "learning_rate": 0.00017129823522446337, "loss": 3.0638, "step": 5092 }, { "epoch": 1.47, "learning_rate": 0.00017112150082170568, "loss": 3.1321, "step": 5093 }, { "epoch": 1.48, "learning_rate": 0.00017094483881324073, "loss": 3.2566, "step": 5094 }, { "epoch": 1.48, "learning_rate": 0.00017076824923795614, "loss": 3.2599, "step": 5095 }, { "epoch": 1.48, "learning_rate": 0.00017059173213472385, "loss": 3.1829, "step": 5096 }, { "epoch": 1.48, "learning_rate": 0.00017041528754239927, "loss": 3.0713, "step": 5097 }, { "epoch": 1.48, "learning_rate": 0.00017023891549982256, "loss": 3.1495, "step": 5098 }, { "epoch": 1.48, "learning_rate": 0.00017006261604581724, "loss": 3.2171, "step": 5099 }, { "epoch": 1.48, "learning_rate": 0.00016988638921919147, "loss": 3.2043, "step": 5100 }, { "epoch": 1.48, "learning_rate": 0.00016971023505873707, "loss": 3.1246, "step": 5101 }, { "epoch": 1.48, "learning_rate": 0.0001695341536032297, "loss": 3.2132, "step": 5102 }, { "epoch": 1.48, "learning_rate": 0.00016935814489142935, "loss": 3.0695, "step": 5103 }, { "epoch": 1.48, "learning_rate": 0.00016918220896208004, "loss": 3.1494, "step": 5104 }, { "epoch": 1.48, "learning_rate": 0.00016900634585390967, "loss": 3.2162, "step": 5105 }, { "epoch": 1.48, "learning_rate": 0.00016883055560562978, "loss": 3.0525, "step": 5106 }, { "epoch": 1.48, "learning_rate": 0.0001686548382559364, "loss": 3.0624, "step": 5107 }, { "epoch": 1.48, "learning_rate": 0.000168479193843509, "loss": 3.1044, "step": 5108 }, { "epoch": 1.48, "learning_rate": 0.00016830362240701164, "loss": 3.066, "step": 5109 }, { "epoch": 1.48, "learning_rate": 0.00016812812398509154, "loss": 3.0577, "step": 5110 }, { "epoch": 1.48, "learning_rate": 0.0001679526986163804, "loss": 3.0099, "step": 5111 }, { "epoch": 1.48, "learning_rate": 0.00016777734633949393, "loss": 3.1244, "step": 5112 }, { "epoch": 1.48, "learning_rate": 0.00016760206719303107, "loss": 3.3101, "step": 5113 }, { "epoch": 1.48, "learning_rate": 0.00016742686121557542, "loss": 3.0983, "step": 5114 }, { "epoch": 1.48, "learning_rate": 0.00016725172844569392, "loss": 3.1898, "step": 5115 }, { "epoch": 1.48, "learning_rate": 0.0001670766689219379, "loss": 3.192, "step": 5116 }, { "epoch": 1.48, "learning_rate": 0.00016690168268284196, "loss": 3.2193, "step": 5117 }, { "epoch": 1.48, "learning_rate": 0.00016672676976692524, "loss": 3.1499, "step": 5118 }, { "epoch": 1.48, "learning_rate": 0.00016655193021269005, "loss": 3.0394, "step": 5119 }, { "epoch": 1.48, "learning_rate": 0.00016637716405862307, "loss": 3.2218, "step": 5120 }, { "epoch": 1.48, "learning_rate": 0.0001662024713431949, "loss": 3.0087, "step": 5121 }, { "epoch": 1.48, "learning_rate": 0.00016602785210485926, "loss": 3.0303, "step": 5122 }, { "epoch": 1.48, "learning_rate": 0.00016585330638205453, "loss": 3.2245, "step": 5123 }, { "epoch": 1.48, "learning_rate": 0.00016567883421320228, "loss": 3.2196, "step": 5124 }, { "epoch": 1.48, "learning_rate": 0.00016550443563670848, "loss": 3.2211, "step": 5125 }, { "epoch": 1.48, "learning_rate": 0.00016533011069096217, "loss": 3.1984, "step": 5126 }, { "epoch": 1.48, "learning_rate": 0.00016515585941433692, "loss": 3.2623, "step": 5127 }, { "epoch": 1.49, "learning_rate": 0.0001649816818451894, "loss": 3.1305, "step": 5128 }, { "epoch": 1.49, "learning_rate": 0.0001648075780218607, "loss": 3.1474, "step": 5129 }, { "epoch": 1.49, "learning_rate": 0.00016463354798267537, "loss": 3.1244, "step": 5130 }, { "epoch": 1.49, "learning_rate": 0.00016445959176594155, "loss": 3.1782, "step": 5131 }, { "epoch": 1.49, "learning_rate": 0.00016428570940995154, "loss": 3.2158, "step": 5132 }, { "epoch": 1.49, "learning_rate": 0.00016411190095298077, "loss": 3.0847, "step": 5133 }, { "epoch": 1.49, "learning_rate": 0.00016393816643328924, "loss": 3.1619, "step": 5134 }, { "epoch": 1.49, "learning_rate": 0.00016376450588911985, "loss": 3.1613, "step": 5135 }, { "epoch": 1.49, "learning_rate": 0.00016359091935869984, "loss": 3.2884, "step": 5136 }, { "epoch": 1.49, "learning_rate": 0.00016341740688023966, "loss": 3.1123, "step": 5137 }, { "epoch": 1.49, "learning_rate": 0.00016324396849193374, "loss": 3.1471, "step": 5138 }, { "epoch": 1.49, "learning_rate": 0.00016307060423196047, "loss": 3.1733, "step": 5139 }, { "epoch": 1.49, "learning_rate": 0.0001628973141384812, "loss": 3.1481, "step": 5140 }, { "epoch": 1.49, "learning_rate": 0.00016272409824964168, "loss": 3.1135, "step": 5141 }, { "epoch": 1.49, "learning_rate": 0.00016255095660357076, "loss": 3.1053, "step": 5142 }, { "epoch": 1.49, "learning_rate": 0.00016237788923838148, "loss": 3.3294, "step": 5143 }, { "epoch": 1.49, "learning_rate": 0.0001622048961921699, "loss": 3.1823, "step": 5144 }, { "epoch": 1.49, "learning_rate": 0.0001620319775030164, "loss": 3.2064, "step": 5145 }, { "epoch": 1.49, "learning_rate": 0.00016185913320898438, "loss": 3.1228, "step": 5146 }, { "epoch": 1.49, "learning_rate": 0.00016168636334812125, "loss": 3.2693, "step": 5147 }, { "epoch": 1.49, "learning_rate": 0.00016151366795845817, "loss": 3.1474, "step": 5148 }, { "epoch": 1.49, "learning_rate": 0.00016134104707800928, "loss": 3.2661, "step": 5149 }, { "epoch": 1.49, "learning_rate": 0.000161168500744773, "loss": 3.0609, "step": 5150 }, { "epoch": 1.49, "learning_rate": 0.00016099602899673083, "loss": 3.0265, "step": 5151 }, { "epoch": 1.49, "learning_rate": 0.0001608236318718483, "loss": 3.1289, "step": 5152 }, { "epoch": 1.49, "learning_rate": 0.000160651309408074, "loss": 3.0658, "step": 5153 }, { "epoch": 1.49, "learning_rate": 0.00016047906164334065, "loss": 3.2073, "step": 5154 }, { "epoch": 1.49, "learning_rate": 0.00016030688861556392, "loss": 3.0244, "step": 5155 }, { "epoch": 1.49, "learning_rate": 0.00016013479036264356, "loss": 3.2324, "step": 5156 }, { "epoch": 1.49, "learning_rate": 0.0001599627669224628, "loss": 3.1937, "step": 5157 }, { "epoch": 1.49, "learning_rate": 0.00015979081833288796, "loss": 3.1276, "step": 5158 }, { "epoch": 1.49, "learning_rate": 0.00015961894463176941, "loss": 3.1273, "step": 5159 }, { "epoch": 1.49, "learning_rate": 0.0001594471458569406, "loss": 3.108, "step": 5160 }, { "epoch": 1.49, "learning_rate": 0.00015927542204621886, "loss": 3.1275, "step": 5161 }, { "epoch": 1.49, "learning_rate": 0.00015910377323740473, "loss": 3.1144, "step": 5162 }, { "epoch": 1.5, "learning_rate": 0.0001589321994682823, "loss": 3.2121, "step": 5163 }, { "epoch": 1.5, "learning_rate": 0.00015876070077661952, "loss": 3.17, "step": 5164 }, { "epoch": 1.5, "learning_rate": 0.00015858927720016709, "loss": 3.1923, "step": 5165 }, { "epoch": 1.5, "learning_rate": 0.00015841792877665994, "loss": 3.182, "step": 5166 }, { "epoch": 1.5, "learning_rate": 0.00015824665554381578, "loss": 2.9975, "step": 5167 }, { "epoch": 1.5, "learning_rate": 0.0001580754575393364, "loss": 3.1815, "step": 5168 }, { "epoch": 1.5, "learning_rate": 0.00015790433480090632, "loss": 3.1293, "step": 5169 }, { "epoch": 1.5, "learning_rate": 0.0001577332873661943, "loss": 3.1856, "step": 5170 }, { "epoch": 1.5, "learning_rate": 0.0001575623152728518, "loss": 3.1244, "step": 5171 }, { "epoch": 1.5, "learning_rate": 0.00015739141855851413, "loss": 3.1271, "step": 5172 }, { "epoch": 1.5, "learning_rate": 0.00015722059726080002, "loss": 3.2803, "step": 5173 }, { "epoch": 1.5, "learning_rate": 0.00015704985141731115, "loss": 3.0374, "step": 5174 }, { "epoch": 1.5, "learning_rate": 0.00015687918106563326, "loss": 3.2125, "step": 5175 }, { "epoch": 1.5, "learning_rate": 0.00015670858624333478, "loss": 3.234, "step": 5176 }, { "epoch": 1.5, "learning_rate": 0.0001565380669879682, "loss": 3.1614, "step": 5177 }, { "epoch": 1.5, "learning_rate": 0.00015636762333706868, "loss": 3.1564, "step": 5178 }, { "epoch": 1.5, "learning_rate": 0.00015619725532815533, "loss": 3.1641, "step": 5179 }, { "epoch": 1.5, "learning_rate": 0.00015602696299873016, "loss": 3.0884, "step": 5180 }, { "epoch": 1.5, "learning_rate": 0.00015585674638627883, "loss": 3.1131, "step": 5181 }, { "epoch": 1.5, "learning_rate": 0.00015568660552827036, "loss": 3.1779, "step": 5182 }, { "epoch": 1.5, "learning_rate": 0.00015551654046215668, "loss": 3.0806, "step": 5183 }, { "epoch": 1.5, "learning_rate": 0.00015534655122537367, "loss": 3.0105, "step": 5184 }, { "epoch": 1.5, "learning_rate": 0.00015517663785533982, "loss": 3.0174, "step": 5185 }, { "epoch": 1.5, "learning_rate": 0.00015500680038945752, "loss": 3.0993, "step": 5186 }, { "epoch": 1.5, "learning_rate": 0.0001548370388651119, "loss": 3.3187, "step": 5187 }, { "epoch": 1.5, "learning_rate": 0.00015466735331967206, "loss": 3.094, "step": 5188 }, { "epoch": 1.5, "learning_rate": 0.0001544977437904897, "loss": 3.1944, "step": 5189 }, { "epoch": 1.5, "learning_rate": 0.0001543282103149001, "loss": 3.2544, "step": 5190 }, { "epoch": 1.5, "learning_rate": 0.0001541587529302218, "loss": 3.1072, "step": 5191 }, { "epoch": 1.5, "learning_rate": 0.00015398937167375676, "loss": 3.1428, "step": 5192 }, { "epoch": 1.5, "learning_rate": 0.00015382006658278986, "loss": 3.0977, "step": 5193 }, { "epoch": 1.5, "learning_rate": 0.0001536508376945891, "loss": 3.0319, "step": 5194 }, { "epoch": 1.5, "learning_rate": 0.0001534816850464063, "loss": 3.1062, "step": 5195 }, { "epoch": 1.5, "learning_rate": 0.0001533126086754758, "loss": 3.1383, "step": 5196 }, { "epoch": 1.51, "learning_rate": 0.0001531436086190159, "loss": 3.2505, "step": 5197 }, { "epoch": 1.51, "learning_rate": 0.00015297468491422722, "loss": 3.1718, "step": 5198 }, { "epoch": 1.51, "learning_rate": 0.00015280583759829435, "loss": 3.265, "step": 5199 }, { "epoch": 1.51, "learning_rate": 0.00015263706670838467, "loss": 3.1504, "step": 5200 }, { "epoch": 1.51, "learning_rate": 0.00015246837228164906, "loss": 3.1196, "step": 5201 }, { "epoch": 1.51, "learning_rate": 0.00015229975435522108, "loss": 3.0123, "step": 5202 }, { "epoch": 1.51, "learning_rate": 0.00015213121296621767, "loss": 3.0216, "step": 5203 }, { "epoch": 1.51, "learning_rate": 0.00015196274815173916, "loss": 3.1087, "step": 5204 }, { "epoch": 1.51, "learning_rate": 0.00015179435994886858, "loss": 3.2677, "step": 5205 }, { "epoch": 1.51, "learning_rate": 0.00015162604839467266, "loss": 3.2803, "step": 5206 }, { "epoch": 1.51, "learning_rate": 0.00015145781352620053, "loss": 3.0644, "step": 5207 }, { "epoch": 1.51, "learning_rate": 0.00015128965538048505, "loss": 3.3286, "step": 5208 }, { "epoch": 1.51, "learning_rate": 0.00015112157399454207, "loss": 3.2309, "step": 5209 }, { "epoch": 1.51, "learning_rate": 0.00015095356940537054, "loss": 3.03, "step": 5210 }, { "epoch": 1.51, "learning_rate": 0.00015078564164995223, "loss": 3.0838, "step": 5211 }, { "epoch": 1.51, "learning_rate": 0.0001506177907652521, "loss": 3.1295, "step": 5212 }, { "epoch": 1.51, "learning_rate": 0.00015045001678821863, "loss": 3.182, "step": 5213 }, { "epoch": 1.51, "learning_rate": 0.0001502823197557826, "loss": 3.2146, "step": 5214 }, { "epoch": 1.51, "learning_rate": 0.00015011469970485853, "loss": 3.2906, "step": 5215 }, { "epoch": 1.51, "learning_rate": 0.00014994715667234366, "loss": 3.1762, "step": 5216 }, { "epoch": 1.51, "learning_rate": 0.0001497796906951186, "loss": 3.1908, "step": 5217 }, { "epoch": 1.51, "learning_rate": 0.0001496123018100463, "loss": 3.0245, "step": 5218 }, { "epoch": 1.51, "learning_rate": 0.00014944499005397371, "loss": 3.162, "step": 5219 }, { "epoch": 1.51, "learning_rate": 0.00014927775546372994, "loss": 3.0931, "step": 5220 }, { "epoch": 1.51, "learning_rate": 0.00014911059807612743, "loss": 3.0925, "step": 5221 }, { "epoch": 1.51, "learning_rate": 0.0001489435179279619, "loss": 3.0957, "step": 5222 }, { "epoch": 1.51, "learning_rate": 0.00014877651505601158, "loss": 3.0923, "step": 5223 }, { "epoch": 1.51, "learning_rate": 0.000148609589497038, "loss": 3.1732, "step": 5224 }, { "epoch": 1.51, "learning_rate": 0.00014844274128778562, "loss": 3.0558, "step": 5225 }, { "epoch": 1.51, "learning_rate": 0.0001482759704649821, "loss": 3.292, "step": 5226 }, { "epoch": 1.51, "learning_rate": 0.0001481092770653374, "loss": 3.1974, "step": 5227 }, { "epoch": 1.51, "learning_rate": 0.0001479426611255453, "loss": 3.253, "step": 5228 }, { "epoch": 1.51, "learning_rate": 0.00014777612268228175, "loss": 3.1121, "step": 5229 }, { "epoch": 1.51, "learning_rate": 0.00014760966177220598, "loss": 3.0561, "step": 5230 }, { "epoch": 1.51, "learning_rate": 0.00014744327843196043, "loss": 3.1365, "step": 5231 }, { "epoch": 1.52, "learning_rate": 0.00014727697269816987, "loss": 3.2947, "step": 5232 }, { "epoch": 1.52, "learning_rate": 0.00014711074460744246, "loss": 3.1352, "step": 5233 }, { "epoch": 1.52, "learning_rate": 0.00014694459419636906, "loss": 3.3585, "step": 5234 }, { "epoch": 1.52, "learning_rate": 0.00014677852150152376, "loss": 3.2597, "step": 5235 }, { "epoch": 1.52, "learning_rate": 0.00014661252655946285, "loss": 3.0481, "step": 5236 }, { "epoch": 1.52, "learning_rate": 0.00014644660940672628, "loss": 3.0324, "step": 5237 }, { "epoch": 1.52, "learning_rate": 0.00014628077007983637, "loss": 3.287, "step": 5238 }, { "epoch": 1.52, "learning_rate": 0.00014611500861529825, "loss": 3.145, "step": 5239 }, { "epoch": 1.52, "learning_rate": 0.00014594932504960053, "loss": 3.1928, "step": 5240 }, { "epoch": 1.52, "learning_rate": 0.00014578371941921381, "loss": 3.1074, "step": 5241 }, { "epoch": 1.52, "learning_rate": 0.00014561819176059227, "loss": 3.1368, "step": 5242 }, { "epoch": 1.52, "learning_rate": 0.00014545274211017262, "loss": 3.1952, "step": 5243 }, { "epoch": 1.52, "learning_rate": 0.00014528737050437452, "loss": 2.9415, "step": 5244 }, { "epoch": 1.52, "learning_rate": 0.00014512207697960005, "loss": 3.1715, "step": 5245 }, { "epoch": 1.52, "learning_rate": 0.00014495686157223477, "loss": 3.1022, "step": 5246 }, { "epoch": 1.52, "learning_rate": 0.00014479172431864645, "loss": 3.1012, "step": 5247 }, { "epoch": 1.52, "learning_rate": 0.00014462666525518576, "loss": 3.1904, "step": 5248 }, { "epoch": 1.52, "learning_rate": 0.00014446168441818665, "loss": 3.1091, "step": 5249 }, { "epoch": 1.52, "learning_rate": 0.00014429678184396506, "loss": 3.257, "step": 5250 }, { "epoch": 1.52, "learning_rate": 0.00014413195756882036, "loss": 3.1502, "step": 5251 }, { "epoch": 1.52, "learning_rate": 0.00014396721162903442, "loss": 3.169, "step": 5252 }, { "epoch": 1.52, "learning_rate": 0.00014380254406087207, "loss": 3.1429, "step": 5253 }, { "epoch": 1.52, "learning_rate": 0.0001436379549005804, "loss": 3.1733, "step": 5254 }, { "epoch": 1.52, "learning_rate": 0.0001434734441843899, "loss": 3.2021, "step": 5255 }, { "epoch": 1.52, "learning_rate": 0.0001433090119485132, "loss": 3.1254, "step": 5256 }, { "epoch": 1.52, "learning_rate": 0.00014314465822914585, "loss": 3.1646, "step": 5257 }, { "epoch": 1.52, "learning_rate": 0.00014298038306246642, "loss": 3.0537, "step": 5258 }, { "epoch": 1.52, "learning_rate": 0.00014281618648463563, "loss": 3.2739, "step": 5259 }, { "epoch": 1.52, "learning_rate": 0.00014265206853179735, "loss": 3.1048, "step": 5260 }, { "epoch": 1.52, "learning_rate": 0.00014248802924007802, "loss": 3.1322, "step": 5261 }, { "epoch": 1.52, "learning_rate": 0.00014232406864558695, "loss": 3.2404, "step": 5262 }, { "epoch": 1.52, "learning_rate": 0.00014216018678441555, "loss": 3.1883, "step": 5263 }, { "epoch": 1.52, "learning_rate": 0.00014199638369263856, "loss": 3.0812, "step": 5264 }, { "epoch": 1.52, "learning_rate": 0.00014183265940631302, "loss": 3.0961, "step": 5265 }, { "epoch": 1.53, "learning_rate": 0.00014166901396147851, "loss": 3.1166, "step": 5266 }, { "epoch": 1.53, "learning_rate": 0.00014150544739415755, "loss": 3.1536, "step": 5267 }, { "epoch": 1.53, "learning_rate": 0.00014134195974035524, "loss": 3.0609, "step": 5268 }, { "epoch": 1.53, "learning_rate": 0.00014117855103605936, "loss": 3.0596, "step": 5269 }, { "epoch": 1.53, "learning_rate": 0.00014101522131723993, "loss": 3.2519, "step": 5270 }, { "epoch": 1.53, "learning_rate": 0.00014085197061985022, "loss": 3.3013, "step": 5271 }, { "epoch": 1.53, "learning_rate": 0.00014068879897982534, "loss": 3.1412, "step": 5272 }, { "epoch": 1.53, "learning_rate": 0.00014052570643308376, "loss": 3.1466, "step": 5273 }, { "epoch": 1.53, "learning_rate": 0.00014036269301552606, "loss": 3.2268, "step": 5274 }, { "epoch": 1.53, "learning_rate": 0.00014019975876303532, "loss": 3.2264, "step": 5275 }, { "epoch": 1.53, "learning_rate": 0.00014003690371147754, "loss": 3.1332, "step": 5276 }, { "epoch": 1.53, "learning_rate": 0.00013987412789670124, "loss": 3.1265, "step": 5277 }, { "epoch": 1.53, "learning_rate": 0.00013971143135453745, "loss": 3.1487, "step": 5278 }, { "epoch": 1.53, "learning_rate": 0.00013954881412079945, "loss": 3.0509, "step": 5279 }, { "epoch": 1.53, "learning_rate": 0.00013938627623128364, "loss": 3.1947, "step": 5280 }, { "epoch": 1.53, "learning_rate": 0.0001392238177217683, "loss": 3.2064, "step": 5281 }, { "epoch": 1.53, "learning_rate": 0.00013906143862801485, "loss": 3.1461, "step": 5282 }, { "epoch": 1.53, "learning_rate": 0.00013889913898576677, "loss": 3.1508, "step": 5283 }, { "epoch": 1.53, "learning_rate": 0.00013873691883075018, "loss": 2.9904, "step": 5284 }, { "epoch": 1.53, "learning_rate": 0.0001385747781986738, "loss": 3.0992, "step": 5285 }, { "epoch": 1.53, "learning_rate": 0.00013841271712522886, "loss": 3.2263, "step": 5286 }, { "epoch": 1.53, "learning_rate": 0.00013825073564608907, "loss": 3.1719, "step": 5287 }, { "epoch": 1.53, "learning_rate": 0.0001380888337969104, "loss": 3.0805, "step": 5288 }, { "epoch": 1.53, "learning_rate": 0.00013792701161333164, "loss": 3.124, "step": 5289 }, { "epoch": 1.53, "learning_rate": 0.0001377652691309736, "loss": 3.1584, "step": 5290 }, { "epoch": 1.53, "learning_rate": 0.00013760360638544013, "loss": 3.0419, "step": 5291 }, { "epoch": 1.53, "learning_rate": 0.00013744202341231675, "loss": 3.0817, "step": 5292 }, { "epoch": 1.53, "learning_rate": 0.00013728052024717237, "loss": 3.176, "step": 5293 }, { "epoch": 1.53, "learning_rate": 0.0001371190969255574, "loss": 3.2068, "step": 5294 }, { "epoch": 1.53, "learning_rate": 0.00013695775348300532, "loss": 3.1377, "step": 5295 }, { "epoch": 1.53, "learning_rate": 0.0001367964899550319, "loss": 3.1348, "step": 5296 }, { "epoch": 1.53, "learning_rate": 0.000136635306377135, "loss": 3.2, "step": 5297 }, { "epoch": 1.53, "learning_rate": 0.0001364742027847954, "loss": 3.1097, "step": 5298 }, { "epoch": 1.53, "learning_rate": 0.00013631317921347562, "loss": 3.1914, "step": 5299 }, { "epoch": 1.53, "learning_rate": 0.00013615223569862135, "loss": 3.2135, "step": 5300 }, { "epoch": 1.54, "learning_rate": 0.00013599137227565984, "loss": 3.1536, "step": 5301 }, { "epoch": 1.54, "learning_rate": 0.00013583058898000139, "loss": 3.0547, "step": 5302 }, { "epoch": 1.54, "learning_rate": 0.00013566988584703816, "loss": 3.1536, "step": 5303 }, { "epoch": 1.54, "learning_rate": 0.00013550926291214498, "loss": 3.1743, "step": 5304 }, { "epoch": 1.54, "learning_rate": 0.00013534872021067913, "loss": 3.1049, "step": 5305 }, { "epoch": 1.54, "learning_rate": 0.00013518825777797971, "loss": 3.0741, "step": 5306 }, { "epoch": 1.54, "learning_rate": 0.00013502787564936874, "loss": 3.2195, "step": 5307 }, { "epoch": 1.54, "learning_rate": 0.00013486757386015004, "loss": 3.0371, "step": 5308 }, { "epoch": 1.54, "learning_rate": 0.00013470735244561026, "loss": 3.129, "step": 5309 }, { "epoch": 1.54, "learning_rate": 0.00013454721144101782, "loss": 3.0907, "step": 5310 }, { "epoch": 1.54, "learning_rate": 0.000134387150881624, "loss": 3.2764, "step": 5311 }, { "epoch": 1.54, "learning_rate": 0.00013422717080266178, "loss": 3.1031, "step": 5312 }, { "epoch": 1.54, "learning_rate": 0.00013406727123934693, "loss": 3.288, "step": 5313 }, { "epoch": 1.54, "learning_rate": 0.0001339074522268774, "loss": 3.2172, "step": 5314 }, { "epoch": 1.54, "learning_rate": 0.00013374771380043305, "loss": 3.1777, "step": 5315 }, { "epoch": 1.54, "learning_rate": 0.00013358805599517654, "loss": 3.1742, "step": 5316 }, { "epoch": 1.54, "learning_rate": 0.00013342847884625214, "loss": 2.9946, "step": 5317 }, { "epoch": 1.54, "learning_rate": 0.00013326898238878715, "loss": 3.1068, "step": 5318 }, { "epoch": 1.54, "learning_rate": 0.00013310956665789025, "loss": 3.1322, "step": 5319 }, { "epoch": 1.54, "learning_rate": 0.00013295023168865305, "loss": 3.0843, "step": 5320 }, { "epoch": 1.54, "learning_rate": 0.00013279097751614928, "loss": 3.0899, "step": 5321 }, { "epoch": 1.54, "learning_rate": 0.0001326318041754343, "loss": 3.1408, "step": 5322 }, { "epoch": 1.54, "learning_rate": 0.00013247271170154645, "loss": 3.166, "step": 5323 }, { "epoch": 1.54, "learning_rate": 0.00013231370012950567, "loss": 3.1185, "step": 5324 }, { "epoch": 1.54, "learning_rate": 0.0001321547694943146, "loss": 3.1159, "step": 5325 }, { "epoch": 1.54, "learning_rate": 0.00013199591983095748, "loss": 3.2484, "step": 5326 }, { "epoch": 1.54, "learning_rate": 0.00013183715117440144, "loss": 3.2348, "step": 5327 }, { "epoch": 1.54, "learning_rate": 0.00013167846355959502, "loss": 3.1438, "step": 5328 }, { "epoch": 1.54, "learning_rate": 0.00013151985702146942, "loss": 3.1333, "step": 5329 }, { "epoch": 1.54, "learning_rate": 0.000131361331594938, "loss": 3.0621, "step": 5330 }, { "epoch": 1.54, "learning_rate": 0.00013120288731489592, "loss": 3.2499, "step": 5331 }, { "epoch": 1.54, "learning_rate": 0.00013104452421622092, "loss": 3.0724, "step": 5332 }, { "epoch": 1.54, "learning_rate": 0.00013088624233377239, "loss": 3.0089, "step": 5333 }, { "epoch": 1.54, "learning_rate": 0.00013072804170239228, "loss": 3.1741, "step": 5334 }, { "epoch": 1.55, "learning_rate": 0.0001305699223569043, "loss": 3.0532, "step": 5335 }, { "epoch": 1.55, "learning_rate": 0.0001304118843321146, "loss": 3.2063, "step": 5336 }, { "epoch": 1.55, "learning_rate": 0.00013025392766281103, "loss": 3.1054, "step": 5337 }, { "epoch": 1.55, "learning_rate": 0.00013009605238376387, "loss": 3.1746, "step": 5338 }, { "epoch": 1.55, "learning_rate": 0.00012993825852972558, "loss": 3.2553, "step": 5339 }, { "epoch": 1.55, "learning_rate": 0.00012978054613543017, "loss": 3.2672, "step": 5340 }, { "epoch": 1.55, "learning_rate": 0.00012962291523559426, "loss": 3.104, "step": 5341 }, { "epoch": 1.55, "learning_rate": 0.00012946536586491618, "loss": 3.1966, "step": 5342 }, { "epoch": 1.55, "learning_rate": 0.00012930789805807658, "loss": 3.1915, "step": 5343 }, { "epoch": 1.55, "learning_rate": 0.0001291505118497378, "loss": 3.1312, "step": 5344 }, { "epoch": 1.55, "learning_rate": 0.0001289932072745447, "loss": 3.1569, "step": 5345 }, { "epoch": 1.55, "learning_rate": 0.00012883598436712367, "loss": 3.0799, "step": 5346 }, { "epoch": 1.55, "learning_rate": 0.00012867884316208344, "loss": 3.0748, "step": 5347 }, { "epoch": 1.55, "learning_rate": 0.00012852178369401486, "loss": 3.1858, "step": 5348 }, { "epoch": 1.55, "learning_rate": 0.00012836480599749027, "loss": 3.1225, "step": 5349 }, { "epoch": 1.55, "learning_rate": 0.00012820791010706472, "loss": 3.0755, "step": 5350 }, { "epoch": 1.55, "learning_rate": 0.0001280510960572745, "loss": 3.0321, "step": 5351 }, { "epoch": 1.55, "learning_rate": 0.0001278943638826386, "loss": 3.1258, "step": 5352 }, { "epoch": 1.55, "learning_rate": 0.00012773771361765734, "loss": 3.0121, "step": 5353 }, { "epoch": 1.55, "learning_rate": 0.00012758114529681357, "loss": 3.2094, "step": 5354 }, { "epoch": 1.55, "learning_rate": 0.00012742465895457166, "loss": 3.1055, "step": 5355 }, { "epoch": 1.55, "learning_rate": 0.00012726825462537822, "loss": 3.2514, "step": 5356 }, { "epoch": 1.55, "learning_rate": 0.0001271119323436618, "loss": 3.0504, "step": 5357 }, { "epoch": 1.55, "learning_rate": 0.00012695569214383256, "loss": 3.0434, "step": 5358 }, { "epoch": 1.55, "learning_rate": 0.00012679953406028317, "loss": 3.1434, "step": 5359 }, { "epoch": 1.55, "learning_rate": 0.00012664345812738752, "loss": 3.1578, "step": 5360 }, { "epoch": 1.55, "learning_rate": 0.00012648746437950208, "loss": 3.0658, "step": 5361 }, { "epoch": 1.55, "learning_rate": 0.0001263315528509647, "loss": 3.0547, "step": 5362 }, { "epoch": 1.55, "learning_rate": 0.00012617572357609562, "loss": 3.1036, "step": 5363 }, { "epoch": 1.55, "learning_rate": 0.00012601997658919645, "loss": 3.1853, "step": 5364 }, { "epoch": 1.55, "learning_rate": 0.000125864311924551, "loss": 3.1244, "step": 5365 }, { "epoch": 1.55, "learning_rate": 0.0001257087296164252, "loss": 3.211, "step": 5366 }, { "epoch": 1.55, "learning_rate": 0.00012555322969906617, "loss": 2.9349, "step": 5367 }, { "epoch": 1.55, "learning_rate": 0.00012539781220670366, "loss": 3.3625, "step": 5368 }, { "epoch": 1.55, "learning_rate": 0.00012524247717354852, "loss": 3.1634, "step": 5369 }, { "epoch": 1.56, "learning_rate": 0.00012508722463379425, "loss": 3.0458, "step": 5370 }, { "epoch": 1.56, "learning_rate": 0.00012493205462161533, "loss": 3.073, "step": 5371 }, { "epoch": 1.56, "learning_rate": 0.00012477696717116875, "loss": 3.1837, "step": 5372 }, { "epoch": 1.56, "learning_rate": 0.0001246219623165931, "loss": 3.2856, "step": 5373 }, { "epoch": 1.56, "learning_rate": 0.00012446704009200894, "loss": 3.2806, "step": 5374 }, { "epoch": 1.56, "learning_rate": 0.00012431220053151832, "loss": 3.0416, "step": 5375 }, { "epoch": 1.56, "learning_rate": 0.00012415744366920506, "loss": 3.0895, "step": 5376 }, { "epoch": 1.56, "learning_rate": 0.0001240027695391353, "loss": 3.1812, "step": 5377 }, { "epoch": 1.56, "learning_rate": 0.00012384817817535638, "loss": 3.1306, "step": 5378 }, { "epoch": 1.56, "learning_rate": 0.000123693669611898, "loss": 2.9844, "step": 5379 }, { "epoch": 1.56, "learning_rate": 0.00012353924388277092, "loss": 3.1007, "step": 5380 }, { "epoch": 1.56, "learning_rate": 0.00012338490102196825, "loss": 3.0952, "step": 5381 }, { "epoch": 1.56, "learning_rate": 0.00012323064106346476, "loss": 3.2096, "step": 5382 }, { "epoch": 1.56, "learning_rate": 0.00012307646404121692, "loss": 3.05, "step": 5383 }, { "epoch": 1.56, "learning_rate": 0.00012292236998916263, "loss": 3.1101, "step": 5384 }, { "epoch": 1.56, "learning_rate": 0.00012276835894122214, "loss": 3.0353, "step": 5385 }, { "epoch": 1.56, "learning_rate": 0.00012261443093129691, "loss": 3.0664, "step": 5386 }, { "epoch": 1.56, "learning_rate": 0.0001224605859932702, "loss": 3.0404, "step": 5387 }, { "epoch": 1.56, "learning_rate": 0.0001224605859932702, "loss": 3.0942, "step": 5388 }, { "epoch": 1.56, "learning_rate": 0.0001223068241610073, "loss": 3.2222, "step": 5389 }, { "epoch": 1.56, "learning_rate": 0.00012215314546835482, "loss": 3.1991, "step": 5390 }, { "epoch": 1.56, "learning_rate": 0.00012199954994914125, "loss": 3.2577, "step": 5391 }, { "epoch": 1.56, "learning_rate": 0.00012184603763717683, "loss": 3.1187, "step": 5392 }, { "epoch": 1.56, "learning_rate": 0.00012169260856625358, "loss": 3.0335, "step": 5393 }, { "epoch": 1.56, "learning_rate": 0.0001215392627701447, "loss": 3.0217, "step": 5394 }, { "epoch": 1.56, "learning_rate": 0.00012138600028260577, "loss": 3.2296, "step": 5395 }, { "epoch": 1.56, "learning_rate": 0.00012123282113737338, "loss": 3.0723, "step": 5396 }, { "epoch": 1.56, "learning_rate": 0.00012107972536816597, "loss": 3.1711, "step": 5397 }, { "epoch": 1.56, "learning_rate": 0.00012092671300868402, "loss": 3.0709, "step": 5398 }, { "epoch": 1.56, "learning_rate": 0.00012077378409260903, "loss": 3.1338, "step": 5399 }, { "epoch": 1.56, "learning_rate": 0.00012062093865360457, "loss": 3.1171, "step": 5400 }, { "epoch": 1.56, "learning_rate": 0.00012046817672531568, "loss": 3.1967, "step": 5401 }, { "epoch": 1.56, "learning_rate": 0.00012031549834136923, "loss": 3.0787, "step": 5402 }, { "epoch": 1.56, "learning_rate": 0.00012016290353537318, "loss": 2.9692, "step": 5403 }, { "epoch": 1.57, "learning_rate": 0.0001200103923409177, "loss": 3.1746, "step": 5404 }, { "epoch": 1.57, "learning_rate": 0.00011985796479157424, "loss": 3.1496, "step": 5405 }, { "epoch": 1.57, "learning_rate": 0.0001197056209208956, "loss": 3.1575, "step": 5406 }, { "epoch": 1.57, "learning_rate": 0.00011955336076241679, "loss": 3.1932, "step": 5407 }, { "epoch": 1.57, "learning_rate": 0.0001194011843496537, "loss": 3.0708, "step": 5408 }, { "epoch": 1.57, "learning_rate": 0.00011924909171610433, "loss": 3.0858, "step": 5409 }, { "epoch": 1.57, "learning_rate": 0.00011909708289524801, "loss": 3.1443, "step": 5410 }, { "epoch": 1.57, "learning_rate": 0.00011894515792054572, "loss": 3.0889, "step": 5411 }, { "epoch": 1.57, "learning_rate": 0.0001187933168254397, "loss": 3.1304, "step": 5412 }, { "epoch": 1.57, "learning_rate": 0.00011864155964335422, "loss": 3.2459, "step": 5413 }, { "epoch": 1.57, "learning_rate": 0.0001184898864076946, "loss": 3.1676, "step": 5414 }, { "epoch": 1.57, "learning_rate": 0.00011833829715184774, "loss": 3.1648, "step": 5415 }, { "epoch": 1.57, "learning_rate": 0.00011818679190918247, "loss": 3.0778, "step": 5416 }, { "epoch": 1.57, "learning_rate": 0.00011803537071304855, "loss": 3.0615, "step": 5417 }, { "epoch": 1.57, "learning_rate": 0.00011788403359677769, "loss": 3.3043, "step": 5418 }, { "epoch": 1.57, "learning_rate": 0.00011773278059368292, "loss": 3.3182, "step": 5419 }, { "epoch": 1.57, "learning_rate": 0.00011758161173705894, "loss": 3.0587, "step": 5420 }, { "epoch": 1.57, "learning_rate": 0.00011743052706018142, "loss": 3.1818, "step": 5421 }, { "epoch": 1.57, "learning_rate": 0.0001172795265963082, "loss": 3.0737, "step": 5422 }, { "epoch": 1.57, "learning_rate": 0.00011712861037867795, "loss": 3.0625, "step": 5423 }, { "epoch": 1.57, "learning_rate": 0.00011697777844051105, "loss": 3.1656, "step": 5424 }, { "epoch": 1.57, "learning_rate": 0.0001168270308150094, "loss": 3.0614, "step": 5425 }, { "epoch": 1.57, "learning_rate": 0.00011667636753535632, "loss": 3.2706, "step": 5426 }, { "epoch": 1.57, "learning_rate": 0.00011652578863471664, "loss": 3.0948, "step": 5427 }, { "epoch": 1.57, "learning_rate": 0.00011637529414623621, "loss": 3.1239, "step": 5428 }, { "epoch": 1.57, "learning_rate": 0.00011622488410304288, "loss": 3.0686, "step": 5429 }, { "epoch": 1.57, "learning_rate": 0.00011607455853824539, "loss": 3.053, "step": 5430 }, { "epoch": 1.57, "learning_rate": 0.00011592431748493432, "loss": 3.1994, "step": 5431 }, { "epoch": 1.57, "learning_rate": 0.00011577416097618138, "loss": 3.2508, "step": 5432 }, { "epoch": 1.57, "learning_rate": 0.00011562408904503952, "loss": 3.2567, "step": 5433 }, { "epoch": 1.57, "learning_rate": 0.00011547410172454342, "loss": 3.2176, "step": 5434 }, { "epoch": 1.57, "learning_rate": 0.00011532419904770908, "loss": 3.2951, "step": 5435 }, { "epoch": 1.57, "learning_rate": 0.00011517438104753386, "loss": 3.0762, "step": 5436 }, { "epoch": 1.57, "learning_rate": 0.00011502464775699617, "loss": 3.2401, "step": 5437 }, { "epoch": 1.57, "learning_rate": 0.00011487499920905625, "loss": 3.0554, "step": 5438 }, { "epoch": 1.58, "learning_rate": 0.0001147254354366552, "loss": 3.1294, "step": 5439 }, { "epoch": 1.58, "learning_rate": 0.00011457595647271602, "loss": 3.0859, "step": 5440 }, { "epoch": 1.58, "learning_rate": 0.00011442656235014248, "loss": 3.1748, "step": 5441 }, { "epoch": 1.58, "learning_rate": 0.00011427725310181981, "loss": 3.0671, "step": 5442 }, { "epoch": 1.58, "learning_rate": 0.0001141280287606149, "loss": 3.0693, "step": 5443 }, { "epoch": 1.58, "learning_rate": 0.00011397888935937561, "loss": 3.2975, "step": 5444 }, { "epoch": 1.58, "learning_rate": 0.00011382983493093146, "loss": 3.0962, "step": 5445 }, { "epoch": 1.58, "learning_rate": 0.00011368086550809264, "loss": 3.0998, "step": 5446 }, { "epoch": 1.58, "learning_rate": 0.00011353198112365132, "loss": 3.2048, "step": 5447 }, { "epoch": 1.58, "learning_rate": 0.00011338318181038037, "loss": 3.1093, "step": 5448 }, { "epoch": 1.58, "learning_rate": 0.00011323446760103445, "loss": 2.993, "step": 5449 }, { "epoch": 1.58, "learning_rate": 0.00011308583852834914, "loss": 3.2219, "step": 5450 }, { "epoch": 1.58, "learning_rate": 0.00011293729462504115, "loss": 3.0682, "step": 5451 }, { "epoch": 1.58, "learning_rate": 0.0001127888359238089, "loss": 3.1374, "step": 5452 }, { "epoch": 1.58, "learning_rate": 0.00011264046245733178, "loss": 3.1466, "step": 5453 }, { "epoch": 1.58, "learning_rate": 0.00011249217425827063, "loss": 3.083, "step": 5454 }, { "epoch": 1.58, "learning_rate": 0.00011234397135926705, "loss": 3.1164, "step": 5455 }, { "epoch": 1.58, "learning_rate": 0.00011219585379294444, "loss": 3.1787, "step": 5456 }, { "epoch": 1.58, "learning_rate": 0.00011204782159190685, "loss": 3.2134, "step": 5457 }, { "epoch": 1.58, "learning_rate": 0.00011189987478874014, "loss": 3.0724, "step": 5458 }, { "epoch": 1.58, "learning_rate": 0.00011175201341601088, "loss": 3.1622, "step": 5459 }, { "epoch": 1.58, "learning_rate": 0.00011160423750626692, "loss": 3.2536, "step": 5460 }, { "epoch": 1.58, "learning_rate": 0.00011145654709203746, "loss": 3.1974, "step": 5461 }, { "epoch": 1.58, "learning_rate": 0.00011130894220583293, "loss": 3.0959, "step": 5462 }, { "epoch": 1.58, "learning_rate": 0.00011116142288014486, "loss": 3.1825, "step": 5463 }, { "epoch": 1.58, "learning_rate": 0.00011101398914744565, "loss": 3.0801, "step": 5464 }, { "epoch": 1.58, "learning_rate": 0.0001108666410401894, "loss": 3.0428, "step": 5465 }, { "epoch": 1.58, "learning_rate": 0.00011071937859081077, "loss": 3.0085, "step": 5466 }, { "epoch": 1.58, "learning_rate": 0.00011057220183172623, "loss": 2.9627, "step": 5467 }, { "epoch": 1.58, "learning_rate": 0.00011042511079533273, "loss": 3.2802, "step": 5468 }, { "epoch": 1.58, "learning_rate": 0.00011027810551400868, "loss": 3.1886, "step": 5469 }, { "epoch": 1.58, "learning_rate": 0.00011013118602011357, "loss": 3.0375, "step": 5470 }, { "epoch": 1.58, "learning_rate": 0.00010998435234598814, "loss": 3.2455, "step": 5471 }, { "epoch": 1.58, "learning_rate": 0.00010983760452395414, "loss": 3.0744, "step": 5472 }, { "epoch": 1.59, "learning_rate": 0.00010969094258631418, "loss": 3.283, "step": 5473 }, { "epoch": 1.59, "learning_rate": 0.00010954436656535249, "loss": 3.1899, "step": 5474 }, { "epoch": 1.59, "learning_rate": 0.00010939787649333372, "loss": 3.2315, "step": 5475 }, { "epoch": 1.59, "learning_rate": 0.00010925147240250427, "loss": 3.1046, "step": 5476 }, { "epoch": 1.59, "learning_rate": 0.00010910515432509104, "loss": 3.1273, "step": 5477 }, { "epoch": 1.59, "learning_rate": 0.0001089589222933024, "loss": 3.1227, "step": 5478 }, { "epoch": 1.59, "learning_rate": 0.00010881277633932779, "loss": 3.1894, "step": 5479 }, { "epoch": 1.59, "learning_rate": 0.00010866671649533722, "loss": 3.1886, "step": 5480 }, { "epoch": 1.59, "learning_rate": 0.00010852074279348235, "loss": 3.1275, "step": 5481 }, { "epoch": 1.59, "learning_rate": 0.00010837485526589535, "loss": 3.0769, "step": 5482 }, { "epoch": 1.59, "learning_rate": 0.00010822905394468995, "loss": 3.0361, "step": 5483 }, { "epoch": 1.59, "learning_rate": 0.00010808333886196037, "loss": 3.137, "step": 5484 }, { "epoch": 1.59, "learning_rate": 0.00010793771004978236, "loss": 3.1749, "step": 5485 }, { "epoch": 1.59, "learning_rate": 0.00010779216754021215, "loss": 3.1162, "step": 5486 }, { "epoch": 1.59, "learning_rate": 0.00010764671136528741, "loss": 3.1732, "step": 5487 }, { "epoch": 1.59, "learning_rate": 0.00010750134155702673, "loss": 3.1083, "step": 5488 }, { "epoch": 1.59, "learning_rate": 0.00010735605814742933, "loss": 3.1178, "step": 5489 }, { "epoch": 1.59, "learning_rate": 0.000107210861168476, "loss": 3.1654, "step": 5490 }, { "epoch": 1.59, "learning_rate": 0.00010706575065212792, "loss": 3.0944, "step": 5491 }, { "epoch": 1.59, "learning_rate": 0.0001069207266303277, "loss": 3.1974, "step": 5492 }, { "epoch": 1.59, "learning_rate": 0.00010677578913499852, "loss": 3.0679, "step": 5493 }, { "epoch": 1.59, "learning_rate": 0.00010663093819804493, "loss": 3.1568, "step": 5494 }, { "epoch": 1.59, "learning_rate": 0.00010648617385135196, "loss": 3.1797, "step": 5495 }, { "epoch": 1.59, "learning_rate": 0.0001063414961267859, "loss": 3.0465, "step": 5496 }, { "epoch": 1.59, "learning_rate": 0.00010619690505619417, "loss": 3.1455, "step": 5497 }, { "epoch": 1.59, "learning_rate": 0.00010605240067140443, "loss": 3.1708, "step": 5498 }, { "epoch": 1.59, "learning_rate": 0.00010590798300422599, "loss": 3.1423, "step": 5499 }, { "epoch": 1.59, "learning_rate": 0.0001057636520864485, "loss": 3.0129, "step": 5500 }, { "epoch": 1.59, "learning_rate": 0.00010561940794984298, "loss": 3.0769, "step": 5501 }, { "epoch": 1.59, "learning_rate": 0.00010547525062616093, "loss": 3.2102, "step": 5502 }, { "epoch": 1.59, "learning_rate": 0.00010533118014713522, "loss": 3.1989, "step": 5503 }, { "epoch": 1.59, "learning_rate": 0.00010518719654447894, "loss": 3.2395, "step": 5504 }, { "epoch": 1.59, "learning_rate": 0.00010504329984988676, "loss": 3.0653, "step": 5505 }, { "epoch": 1.59, "learning_rate": 0.00010489949009503385, "loss": 3.1303, "step": 5506 }, { "epoch": 1.59, "learning_rate": 0.0001047557673115761, "loss": 3.1334, "step": 5507 }, { "epoch": 1.6, "learning_rate": 0.00010461213153115079, "loss": 3.02, "step": 5508 }, { "epoch": 1.6, "learning_rate": 0.00010446858278537535, "loss": 3.1165, "step": 5509 }, { "epoch": 1.6, "learning_rate": 0.00010432512110584869, "loss": 2.9439, "step": 5510 }, { "epoch": 1.6, "learning_rate": 0.00010418174652415002, "loss": 3.1927, "step": 5511 }, { "epoch": 1.6, "learning_rate": 0.00010403845907183989, "loss": 3.1363, "step": 5512 }, { "epoch": 1.6, "learning_rate": 0.00010389525878045919, "loss": 3.0676, "step": 5513 }, { "epoch": 1.6, "learning_rate": 0.00010375214568152997, "loss": 3.1188, "step": 5514 }, { "epoch": 1.6, "learning_rate": 0.00010360911980655502, "loss": 3.055, "step": 5515 }, { "epoch": 1.6, "learning_rate": 0.00010346618118701768, "loss": 3.0288, "step": 5516 }, { "epoch": 1.6, "learning_rate": 0.00010332332985438247, "loss": 3.2292, "step": 5517 }, { "epoch": 1.6, "learning_rate": 0.00010318056584009433, "loss": 3.0528, "step": 5518 }, { "epoch": 1.6, "learning_rate": 0.00010303788917557938, "loss": 3.0817, "step": 5519 }, { "epoch": 1.6, "learning_rate": 0.000102895299892244, "loss": 3.1629, "step": 5520 }, { "epoch": 1.6, "learning_rate": 0.00010275279802147591, "loss": 3.1764, "step": 5521 }, { "epoch": 1.6, "learning_rate": 0.00010261038359464304, "loss": 3.17, "step": 5522 }, { "epoch": 1.6, "learning_rate": 0.00010246805664309439, "loss": 3.0638, "step": 5523 }, { "epoch": 1.6, "learning_rate": 0.00010232581719815982, "loss": 3.0679, "step": 5524 }, { "epoch": 1.6, "learning_rate": 0.00010218366529114948, "loss": 3.1037, "step": 5525 }, { "epoch": 1.6, "learning_rate": 0.0001020416009533548, "loss": 3.0619, "step": 5526 }, { "epoch": 1.6, "learning_rate": 0.00010189962421604738, "loss": 3.1645, "step": 5527 }, { "epoch": 1.6, "learning_rate": 0.00010175773511048009, "loss": 3.0997, "step": 5528 }, { "epoch": 1.6, "learning_rate": 0.00010161593366788596, "loss": 3.2458, "step": 5529 }, { "epoch": 1.6, "learning_rate": 0.00010147421991947914, "loss": 3.2387, "step": 5530 }, { "epoch": 1.6, "learning_rate": 0.00010133259389645427, "loss": 3.2158, "step": 5531 }, { "epoch": 1.6, "learning_rate": 0.00010119105562998699, "loss": 3.0364, "step": 5532 }, { "epoch": 1.6, "learning_rate": 0.00010104960515123307, "loss": 3.1223, "step": 5533 }, { "epoch": 1.6, "learning_rate": 0.00010090824249132929, "loss": 3.3009, "step": 5534 }, { "epoch": 1.6, "learning_rate": 0.00010076696768139326, "loss": 3.0936, "step": 5535 }, { "epoch": 1.6, "learning_rate": 0.00010062578075252282, "loss": 3.1831, "step": 5536 }, { "epoch": 1.6, "learning_rate": 0.00010048468173579695, "loss": 3.1967, "step": 5537 }, { "epoch": 1.6, "learning_rate": 0.00010034367066227474, "loss": 3.0485, "step": 5538 }, { "epoch": 1.6, "learning_rate": 0.00010020274756299641, "loss": 3.2266, "step": 5539 }, { "epoch": 1.6, "learning_rate": 0.00010006191246898255, "loss": 3.1436, "step": 5540 }, { "epoch": 1.6, "learning_rate": 9.992116541123464e-05, "loss": 3.2538, "step": 5541 }, { "epoch": 1.61, "learning_rate": 9.97805064207345e-05, "loss": 3.0057, "step": 5542 }, { "epoch": 1.61, "learning_rate": 9.963993552844436e-05, "loss": 3.0449, "step": 5543 }, { "epoch": 1.61, "learning_rate": 9.949945276530781e-05, "loss": 3.1463, "step": 5544 }, { "epoch": 1.61, "learning_rate": 9.935905816224817e-05, "loss": 3.1752, "step": 5545 }, { "epoch": 1.61, "learning_rate": 9.921875175017003e-05, "loss": 3.0902, "step": 5546 }, { "epoch": 1.61, "learning_rate": 9.907853355995817e-05, "loss": 3.1715, "step": 5547 }, { "epoch": 1.61, "learning_rate": 9.893840362247809e-05, "loss": 3.0875, "step": 5548 }, { "epoch": 1.61, "learning_rate": 9.879836196857595e-05, "loss": 3.0317, "step": 5549 }, { "epoch": 1.61, "learning_rate": 9.865840862907837e-05, "loss": 3.1437, "step": 5550 }, { "epoch": 1.61, "learning_rate": 9.851854363479257e-05, "loss": 3.1242, "step": 5551 }, { "epoch": 1.61, "learning_rate": 9.837876701650605e-05, "loss": 3.0736, "step": 5552 }, { "epoch": 1.61, "learning_rate": 9.823907880498744e-05, "loss": 3.2624, "step": 5553 }, { "epoch": 1.61, "learning_rate": 9.809947903098526e-05, "loss": 3.1708, "step": 5554 }, { "epoch": 1.61, "learning_rate": 9.795996772522919e-05, "loss": 3.0965, "step": 5555 }, { "epoch": 1.61, "learning_rate": 9.782054491842879e-05, "loss": 2.9626, "step": 5556 }, { "epoch": 1.61, "learning_rate": 9.768121064127461e-05, "loss": 3.0937, "step": 5557 }, { "epoch": 1.61, "learning_rate": 9.754196492443763e-05, "loss": 3.1123, "step": 5558 }, { "epoch": 1.61, "learning_rate": 9.740280779856936e-05, "loss": 3.1006, "step": 5559 }, { "epoch": 1.61, "learning_rate": 9.726373929430155e-05, "loss": 3.1795, "step": 5560 }, { "epoch": 1.61, "learning_rate": 9.712475944224658e-05, "loss": 3.1755, "step": 5561 }, { "epoch": 1.61, "learning_rate": 9.69858682729976e-05, "loss": 3.047, "step": 5562 }, { "epoch": 1.61, "learning_rate": 9.684706581712766e-05, "loss": 3.2352, "step": 5563 }, { "epoch": 1.61, "learning_rate": 9.670835210519091e-05, "loss": 3.1047, "step": 5564 }, { "epoch": 1.61, "learning_rate": 9.656972716772144e-05, "loss": 3.0115, "step": 5565 }, { "epoch": 1.61, "learning_rate": 9.643119103523413e-05, "loss": 3.0072, "step": 5566 }, { "epoch": 1.61, "learning_rate": 9.629274373822422e-05, "loss": 3.131, "step": 5567 }, { "epoch": 1.61, "learning_rate": 9.615438530716752e-05, "loss": 3.1, "step": 5568 }, { "epoch": 1.61, "learning_rate": 9.601611577251984e-05, "loss": 3.0952, "step": 5569 }, { "epoch": 1.61, "learning_rate": 9.587793516471805e-05, "loss": 3.197, "step": 5570 }, { "epoch": 1.61, "learning_rate": 9.573984351417897e-05, "loss": 3.2412, "step": 5571 }, { "epoch": 1.61, "learning_rate": 9.56018408512998e-05, "loss": 3.1781, "step": 5572 }, { "epoch": 1.61, "learning_rate": 9.546392720645868e-05, "loss": 3.1386, "step": 5573 }, { "epoch": 1.61, "learning_rate": 9.53261026100135e-05, "loss": 3.1958, "step": 5574 }, { "epoch": 1.61, "learning_rate": 9.518836709230299e-05, "loss": 3.049, "step": 5575 }, { "epoch": 1.61, "learning_rate": 9.505072068364623e-05, "loss": 3.1813, "step": 5576 }, { "epoch": 1.62, "learning_rate": 9.491316341434264e-05, "loss": 3.0863, "step": 5577 }, { "epoch": 1.62, "learning_rate": 9.477569531467172e-05, "loss": 3.0816, "step": 5578 }, { "epoch": 1.62, "learning_rate": 9.46383164148939e-05, "loss": 3.2418, "step": 5579 }, { "epoch": 1.62, "learning_rate": 9.450102674524952e-05, "loss": 3.2126, "step": 5580 }, { "epoch": 1.62, "learning_rate": 9.43638263359593e-05, "loss": 2.9755, "step": 5581 }, { "epoch": 1.62, "learning_rate": 9.422671521722459e-05, "loss": 3.0607, "step": 5582 }, { "epoch": 1.62, "learning_rate": 9.408969341922696e-05, "loss": 3.0805, "step": 5583 }, { "epoch": 1.62, "learning_rate": 9.395276097212841e-05, "loss": 3.1012, "step": 5584 }, { "epoch": 1.62, "learning_rate": 9.381591790607081e-05, "loss": 3.2185, "step": 5585 }, { "epoch": 1.62, "learning_rate": 9.367916425117712e-05, "loss": 3.2632, "step": 5586 }, { "epoch": 1.62, "learning_rate": 9.354250003754983e-05, "loss": 3.08, "step": 5587 }, { "epoch": 1.62, "learning_rate": 9.340592529527236e-05, "loss": 3.011, "step": 5588 }, { "epoch": 1.62, "learning_rate": 9.326944005440819e-05, "loss": 3.0774, "step": 5589 }, { "epoch": 1.62, "learning_rate": 9.31330443450008e-05, "loss": 3.0464, "step": 5590 }, { "epoch": 1.62, "learning_rate": 9.299673819707454e-05, "loss": 3.1466, "step": 5591 }, { "epoch": 1.62, "learning_rate": 9.286052164063369e-05, "loss": 3.0501, "step": 5592 }, { "epoch": 1.62, "learning_rate": 9.272439470566301e-05, "loss": 3.0369, "step": 5593 }, { "epoch": 1.62, "learning_rate": 9.258835742212718e-05, "loss": 3.0716, "step": 5594 }, { "epoch": 1.62, "learning_rate": 9.245240981997155e-05, "loss": 3.1176, "step": 5595 }, { "epoch": 1.62, "learning_rate": 9.231655192912136e-05, "loss": 3.1047, "step": 5596 }, { "epoch": 1.62, "learning_rate": 9.218078377948257e-05, "loss": 3.1662, "step": 5597 }, { "epoch": 1.62, "learning_rate": 9.204510540094096e-05, "loss": 3.0296, "step": 5598 }, { "epoch": 1.62, "learning_rate": 9.190951682336251e-05, "loss": 3.1674, "step": 5599 }, { "epoch": 1.62, "learning_rate": 9.177401807659386e-05, "loss": 3.1993, "step": 5600 }, { "epoch": 1.62, "learning_rate": 9.163860919046152e-05, "loss": 3.0883, "step": 5601 }, { "epoch": 1.62, "learning_rate": 9.150329019477254e-05, "loss": 3.1375, "step": 5602 }, { "epoch": 1.62, "learning_rate": 9.136806111931367e-05, "loss": 3.0646, "step": 5603 }, { "epoch": 1.62, "learning_rate": 9.123292199385246e-05, "loss": 3.1663, "step": 5604 }, { "epoch": 1.62, "learning_rate": 9.109787284813615e-05, "loss": 3.2081, "step": 5605 }, { "epoch": 1.62, "learning_rate": 9.096291371189258e-05, "loss": 3.3023, "step": 5606 }, { "epoch": 1.62, "learning_rate": 9.082804461482952e-05, "loss": 3.2478, "step": 5607 }, { "epoch": 1.62, "learning_rate": 9.069326558663487e-05, "loss": 3.1789, "step": 5608 }, { "epoch": 1.62, "learning_rate": 9.055857665697693e-05, "loss": 3.1448, "step": 5609 }, { "epoch": 1.62, "learning_rate": 9.042397785550405e-05, "loss": 3.1976, "step": 5610 }, { "epoch": 1.62, "learning_rate": 9.028946921184494e-05, "loss": 3.0925, "step": 5611 }, { "epoch": 1.63, "learning_rate": 9.015505075560798e-05, "loss": 3.1302, "step": 5612 }, { "epoch": 1.63, "learning_rate": 9.002072251638221e-05, "loss": 3.2994, "step": 5613 }, { "epoch": 1.63, "learning_rate": 8.98864845237365e-05, "loss": 3.2657, "step": 5614 }, { "epoch": 1.63, "learning_rate": 8.975233680722e-05, "loss": 3.0787, "step": 5615 }, { "epoch": 1.63, "learning_rate": 8.961827939636197e-05, "loss": 3.2262, "step": 5616 }, { "epoch": 1.63, "learning_rate": 8.948431232067156e-05, "loss": 3.1494, "step": 5617 }, { "epoch": 1.63, "learning_rate": 8.93504356096384e-05, "loss": 3.2401, "step": 5618 }, { "epoch": 1.63, "learning_rate": 8.921664929273204e-05, "loss": 3.0623, "step": 5619 }, { "epoch": 1.63, "learning_rate": 8.908295339940231e-05, "loss": 3.1681, "step": 5620 }, { "epoch": 1.63, "learning_rate": 8.894934795907872e-05, "loss": 3.1344, "step": 5621 }, { "epoch": 1.63, "learning_rate": 8.881583300117136e-05, "loss": 3.1869, "step": 5622 }, { "epoch": 1.63, "learning_rate": 8.868240855506993e-05, "loss": 3.0666, "step": 5623 }, { "epoch": 1.63, "learning_rate": 8.854907465014478e-05, "loss": 3.1513, "step": 5624 }, { "epoch": 1.63, "learning_rate": 8.841583131574576e-05, "loss": 3.2698, "step": 5625 }, { "epoch": 1.63, "learning_rate": 8.828267858120293e-05, "loss": 3.1768, "step": 5626 }, { "epoch": 1.63, "learning_rate": 8.81496164758267e-05, "loss": 2.9771, "step": 5627 }, { "epoch": 1.63, "learning_rate": 8.801664502890722e-05, "loss": 3.2075, "step": 5628 }, { "epoch": 1.63, "learning_rate": 8.7883764269715e-05, "loss": 3.0786, "step": 5629 }, { "epoch": 1.63, "learning_rate": 8.775097422750011e-05, "loss": 3.1317, "step": 5630 }, { "epoch": 1.63, "learning_rate": 8.761827493149321e-05, "loss": 3.1709, "step": 5631 }, { "epoch": 1.63, "learning_rate": 8.748566641090433e-05, "loss": 3.0478, "step": 5632 }, { "epoch": 1.63, "learning_rate": 8.735314869492428e-05, "loss": 3.2399, "step": 5633 }, { "epoch": 1.63, "learning_rate": 8.722072181272311e-05, "loss": 3.0607, "step": 5634 }, { "epoch": 1.63, "learning_rate": 8.708838579345147e-05, "loss": 3.0165, "step": 5635 }, { "epoch": 1.63, "learning_rate": 8.695614066623991e-05, "loss": 3.1985, "step": 5636 }, { "epoch": 1.63, "learning_rate": 8.682398646019857e-05, "loss": 3.1545, "step": 5637 }, { "epoch": 1.63, "learning_rate": 8.66919232044181e-05, "loss": 3.0805, "step": 5638 }, { "epoch": 1.63, "learning_rate": 8.655995092796865e-05, "loss": 3.1113, "step": 5639 }, { "epoch": 1.63, "learning_rate": 8.642806965990079e-05, "loss": 3.0384, "step": 5640 }, { "epoch": 1.63, "learning_rate": 8.629627942924473e-05, "loss": 3.0908, "step": 5641 }, { "epoch": 1.63, "learning_rate": 8.61645802650109e-05, "loss": 3.1961, "step": 5642 }, { "epoch": 1.63, "learning_rate": 8.603297219618934e-05, "loss": 3.1144, "step": 5643 }, { "epoch": 1.63, "learning_rate": 8.590145525175031e-05, "loss": 3.1511, "step": 5644 }, { "epoch": 1.63, "learning_rate": 8.577002946064416e-05, "loss": 3.1327, "step": 5645 }, { "epoch": 1.64, "learning_rate": 8.563869485180059e-05, "loss": 3.0826, "step": 5646 }, { "epoch": 1.64, "learning_rate": 8.550745145412997e-05, "loss": 3.2003, "step": 5647 }, { "epoch": 1.64, "learning_rate": 8.537629929652186e-05, "loss": 3.1246, "step": 5648 }, { "epoch": 1.64, "learning_rate": 8.524523840784643e-05, "loss": 3.1919, "step": 5649 }, { "epoch": 1.64, "learning_rate": 8.511426881695311e-05, "loss": 3.2014, "step": 5650 }, { "epoch": 1.64, "learning_rate": 8.49833905526718e-05, "loss": 3.2336, "step": 5651 }, { "epoch": 1.64, "learning_rate": 8.485260364381186e-05, "loss": 3.1385, "step": 5652 }, { "epoch": 1.64, "learning_rate": 8.472190811916274e-05, "loss": 3.1219, "step": 5653 }, { "epoch": 1.64, "learning_rate": 8.459130400749404e-05, "loss": 3.0361, "step": 5654 }, { "epoch": 1.64, "learning_rate": 8.446079133755458e-05, "loss": 3.1297, "step": 5655 }, { "epoch": 1.64, "learning_rate": 8.433037013807365e-05, "loss": 3.0088, "step": 5656 }, { "epoch": 1.64, "learning_rate": 8.420004043776003e-05, "loss": 3.069, "step": 5657 }, { "epoch": 1.64, "learning_rate": 8.406980226530276e-05, "loss": 3.0421, "step": 5658 }, { "epoch": 1.64, "learning_rate": 8.393965564937017e-05, "loss": 3.129, "step": 5659 }, { "epoch": 1.64, "learning_rate": 8.380960061861098e-05, "loss": 3.079, "step": 5660 }, { "epoch": 1.64, "learning_rate": 8.367963720165334e-05, "loss": 3.3014, "step": 5661 }, { "epoch": 1.64, "learning_rate": 8.354976542710558e-05, "loss": 3.1775, "step": 5662 }, { "epoch": 1.64, "learning_rate": 8.341998532355565e-05, "loss": 3.191, "step": 5663 }, { "epoch": 1.64, "learning_rate": 8.329029691957124e-05, "loss": 3.0846, "step": 5664 }, { "epoch": 1.64, "learning_rate": 8.31607002437002e-05, "loss": 3.0621, "step": 5665 }, { "epoch": 1.64, "learning_rate": 8.303119532446973e-05, "loss": 3.1856, "step": 5666 }, { "epoch": 1.64, "learning_rate": 8.290178219038725e-05, "loss": 3.0607, "step": 5667 }, { "epoch": 1.64, "learning_rate": 8.277246086993961e-05, "loss": 3.2026, "step": 5668 }, { "epoch": 1.64, "learning_rate": 8.264323139159385e-05, "loss": 3.2192, "step": 5669 }, { "epoch": 1.64, "learning_rate": 8.251409378379638e-05, "loss": 3.1192, "step": 5670 }, { "epoch": 1.64, "learning_rate": 8.23850480749736e-05, "loss": 3.0606, "step": 5671 }, { "epoch": 1.64, "learning_rate": 8.225609429353187e-05, "loss": 3.207, "step": 5672 }, { "epoch": 1.64, "learning_rate": 8.212723246785681e-05, "loss": 3.1878, "step": 5673 }, { "epoch": 1.64, "learning_rate": 8.199846262631438e-05, "loss": 3.1628, "step": 5674 }, { "epoch": 1.64, "learning_rate": 8.186978479724971e-05, "loss": 3.042, "step": 5675 }, { "epoch": 1.64, "learning_rate": 8.17411990089883e-05, "loss": 3.0535, "step": 5676 }, { "epoch": 1.64, "learning_rate": 8.161270528983477e-05, "loss": 3.1311, "step": 5677 }, { "epoch": 1.64, "learning_rate": 8.148430366807403e-05, "loss": 3.1216, "step": 5678 }, { "epoch": 1.64, "learning_rate": 8.135599417197021e-05, "loss": 3.1101, "step": 5679 }, { "epoch": 1.64, "learning_rate": 8.122777682976745e-05, "loss": 3.1245, "step": 5680 }, { "epoch": 1.65, "learning_rate": 8.109965166968975e-05, "loss": 3.1276, "step": 5681 }, { "epoch": 1.65, "learning_rate": 8.097161871994042e-05, "loss": 3.1533, "step": 5682 }, { "epoch": 1.65, "learning_rate": 8.084367800870285e-05, "loss": 3.1673, "step": 5683 }, { "epoch": 1.65, "learning_rate": 8.07158295641397e-05, "loss": 3.1783, "step": 5684 }, { "epoch": 1.65, "learning_rate": 8.058807341439389e-05, "loss": 3.1357, "step": 5685 }, { "epoch": 1.65, "learning_rate": 8.046040958758744e-05, "loss": 3.2349, "step": 5686 }, { "epoch": 1.65, "learning_rate": 8.033283811182246e-05, "loss": 3.1471, "step": 5687 }, { "epoch": 1.65, "learning_rate": 8.02053590151805e-05, "loss": 3.1883, "step": 5688 }, { "epoch": 1.65, "learning_rate": 8.007797232572306e-05, "loss": 3.0984, "step": 5689 }, { "epoch": 1.65, "learning_rate": 7.995067807149093e-05, "loss": 3.0762, "step": 5690 }, { "epoch": 1.65, "learning_rate": 7.982347628050468e-05, "loss": 3.0534, "step": 5691 }, { "epoch": 1.65, "learning_rate": 7.969636698076476e-05, "loss": 3.1992, "step": 5692 }, { "epoch": 1.65, "learning_rate": 7.956935020025081e-05, "loss": 3.1692, "step": 5693 }, { "epoch": 1.65, "learning_rate": 7.944242596692264e-05, "loss": 3.2134, "step": 5694 }, { "epoch": 1.65, "learning_rate": 7.93155943087192e-05, "loss": 3.2619, "step": 5695 }, { "epoch": 1.65, "learning_rate": 7.918885525355934e-05, "loss": 3.098, "step": 5696 }, { "epoch": 1.65, "learning_rate": 7.906220882934146e-05, "loss": 3.0488, "step": 5697 }, { "epoch": 1.65, "learning_rate": 7.893565506394374e-05, "loss": 3.2197, "step": 5698 }, { "epoch": 1.65, "learning_rate": 7.880919398522362e-05, "loss": 3.0536, "step": 5699 }, { "epoch": 1.65, "learning_rate": 7.86828256210182e-05, "loss": 3.1299, "step": 5700 }, { "epoch": 1.65, "learning_rate": 7.855654999914457e-05, "loss": 2.9909, "step": 5701 }, { "epoch": 1.65, "learning_rate": 7.843036714739887e-05, "loss": 3.1417, "step": 5702 }, { "epoch": 1.65, "learning_rate": 7.830427709355725e-05, "loss": 3.1359, "step": 5703 }, { "epoch": 1.65, "learning_rate": 7.817827986537507e-05, "loss": 3.1367, "step": 5704 }, { "epoch": 1.65, "learning_rate": 7.805237549058752e-05, "loss": 3.122, "step": 5705 }, { "epoch": 1.65, "learning_rate": 7.792656399690923e-05, "loss": 3.246, "step": 5706 }, { "epoch": 1.65, "learning_rate": 7.780084541203464e-05, "loss": 3.2403, "step": 5707 }, { "epoch": 1.65, "learning_rate": 7.767521976363735e-05, "loss": 3.1602, "step": 5708 }, { "epoch": 1.65, "learning_rate": 7.754968707937054e-05, "loss": 3.2944, "step": 5709 }, { "epoch": 1.65, "learning_rate": 7.742424738686732e-05, "loss": 3.204, "step": 5710 }, { "epoch": 1.65, "learning_rate": 7.729890071373984e-05, "loss": 3.1059, "step": 5711 }, { "epoch": 1.65, "learning_rate": 7.717364708758023e-05, "loss": 3.0879, "step": 5712 }, { "epoch": 1.65, "learning_rate": 7.704848653595964e-05, "loss": 3.0976, "step": 5713 }, { "epoch": 1.65, "learning_rate": 7.692341908642914e-05, "loss": 3.0776, "step": 5714 }, { "epoch": 1.66, "learning_rate": 7.67984447665192e-05, "loss": 3.0764, "step": 5715 }, { "epoch": 1.66, "learning_rate": 7.667356360373984e-05, "loss": 3.1327, "step": 5716 }, { "epoch": 1.66, "learning_rate": 7.654877562558038e-05, "loss": 3.1778, "step": 5717 }, { "epoch": 1.66, "learning_rate": 7.642408085950964e-05, "loss": 3.342, "step": 5718 }, { "epoch": 1.66, "learning_rate": 7.629947933297626e-05, "loss": 3.0878, "step": 5719 }, { "epoch": 1.66, "learning_rate": 7.617497107340788e-05, "loss": 3.0365, "step": 5720 }, { "epoch": 1.66, "learning_rate": 7.60505561082121e-05, "loss": 3.1462, "step": 5721 }, { "epoch": 1.66, "learning_rate": 7.592623446477543e-05, "loss": 3.2017, "step": 5722 }, { "epoch": 1.66, "learning_rate": 7.580200617046434e-05, "loss": 3.0974, "step": 5723 }, { "epoch": 1.66, "learning_rate": 7.567787125262449e-05, "loss": 3.0495, "step": 5724 }, { "epoch": 1.66, "learning_rate": 7.555382973858116e-05, "loss": 3.0326, "step": 5725 }, { "epoch": 1.66, "learning_rate": 7.542988165563892e-05, "loss": 3.174, "step": 5726 }, { "epoch": 1.66, "learning_rate": 7.530602703108158e-05, "loss": 3.2053, "step": 5727 }, { "epoch": 1.66, "learning_rate": 7.518226589217286e-05, "loss": 3.0889, "step": 5728 }, { "epoch": 1.66, "learning_rate": 7.505859826615551e-05, "loss": 3.1619, "step": 5729 }, { "epoch": 1.66, "learning_rate": 7.4935024180252e-05, "loss": 3.1477, "step": 5730 }, { "epoch": 1.66, "learning_rate": 7.481154366166382e-05, "loss": 2.9796, "step": 5731 }, { "epoch": 1.66, "learning_rate": 7.468815673757218e-05, "loss": 3.2387, "step": 5732 }, { "epoch": 1.66, "learning_rate": 7.456486343513764e-05, "loss": 3.0035, "step": 5733 }, { "epoch": 1.66, "learning_rate": 7.444166378150013e-05, "loss": 3.064, "step": 5734 }, { "epoch": 1.66, "learning_rate": 7.431855780377894e-05, "loss": 3.0552, "step": 5735 }, { "epoch": 1.66, "learning_rate": 7.419554552907259e-05, "loss": 3.1756, "step": 5736 }, { "epoch": 1.66, "learning_rate": 7.407262698445932e-05, "loss": 3.1005, "step": 5737 }, { "epoch": 1.66, "learning_rate": 7.394980219699632e-05, "loss": 3.1331, "step": 5738 }, { "epoch": 1.66, "learning_rate": 7.382707119372051e-05, "loss": 3.0275, "step": 5739 }, { "epoch": 1.66, "learning_rate": 7.370443400164794e-05, "loss": 3.1569, "step": 5740 }, { "epoch": 1.66, "learning_rate": 7.358189064777432e-05, "loss": 3.1197, "step": 5741 }, { "epoch": 1.66, "learning_rate": 7.345944115907421e-05, "loss": 3.0455, "step": 5742 }, { "epoch": 1.66, "learning_rate": 7.333708556250195e-05, "loss": 3.1007, "step": 5743 }, { "epoch": 1.66, "learning_rate": 7.321482388499096e-05, "loss": 3.1533, "step": 5744 }, { "epoch": 1.66, "learning_rate": 7.30926561534539e-05, "loss": 3.1551, "step": 5745 }, { "epoch": 1.66, "learning_rate": 7.297058239478316e-05, "loss": 3.1592, "step": 5746 }, { "epoch": 1.66, "learning_rate": 7.284860263585003e-05, "loss": 3.2132, "step": 5747 }, { "epoch": 1.66, "learning_rate": 7.272671690350529e-05, "loss": 3.2199, "step": 5748 }, { "epoch": 1.66, "learning_rate": 7.260492522457906e-05, "loss": 3.2039, "step": 5749 }, { "epoch": 1.67, "learning_rate": 7.248322762588084e-05, "loss": 3.1203, "step": 5750 }, { "epoch": 1.67, "learning_rate": 7.236162413419895e-05, "loss": 3.0179, "step": 5751 }, { "epoch": 1.67, "learning_rate": 7.224011477630166e-05, "loss": 3.0848, "step": 5752 }, { "epoch": 1.67, "learning_rate": 7.211869957893591e-05, "loss": 3.0705, "step": 5753 }, { "epoch": 1.67, "learning_rate": 7.199737856882843e-05, "loss": 3.0928, "step": 5754 }, { "epoch": 1.67, "learning_rate": 7.187615177268486e-05, "loss": 3.1805, "step": 5755 }, { "epoch": 1.67, "learning_rate": 7.175501921719007e-05, "loss": 3.1842, "step": 5756 }, { "epoch": 1.67, "learning_rate": 7.163398092900852e-05, "loss": 3.2316, "step": 5757 }, { "epoch": 1.67, "learning_rate": 7.151303693478362e-05, "loss": 3.0629, "step": 5758 }, { "epoch": 1.67, "learning_rate": 7.139218726113838e-05, "loss": 3.2306, "step": 5759 }, { "epoch": 1.67, "learning_rate": 7.127143193467444e-05, "loss": 3.1869, "step": 5760 }, { "epoch": 1.67, "learning_rate": 7.115077098197337e-05, "loss": 3.2043, "step": 5761 }, { "epoch": 1.67, "learning_rate": 7.10302044295954e-05, "loss": 3.1551, "step": 5762 }, { "epoch": 1.67, "learning_rate": 7.090973230408033e-05, "loss": 3.1294, "step": 5763 }, { "epoch": 1.67, "learning_rate": 7.078935463194707e-05, "loss": 3.1149, "step": 5764 }, { "epoch": 1.67, "learning_rate": 7.066907143969353e-05, "loss": 3.2406, "step": 5765 }, { "epoch": 1.67, "learning_rate": 7.054888275379712e-05, "loss": 3.0397, "step": 5766 }, { "epoch": 1.67, "learning_rate": 7.042878860071439e-05, "loss": 3.2545, "step": 5767 }, { "epoch": 1.67, "learning_rate": 7.030878900688114e-05, "loss": 3.0968, "step": 5768 }, { "epoch": 1.67, "learning_rate": 7.018888399871204e-05, "loss": 3.0862, "step": 5769 }, { "epoch": 1.67, "learning_rate": 7.006907360260129e-05, "loss": 3.0908, "step": 5770 }, { "epoch": 1.67, "learning_rate": 6.9949357844922e-05, "loss": 3.0943, "step": 5771 }, { "epoch": 1.67, "learning_rate": 6.982973675202675e-05, "loss": 3.109, "step": 5772 }, { "epoch": 1.67, "learning_rate": 6.971021035024694e-05, "loss": 3.1406, "step": 5773 }, { "epoch": 1.67, "learning_rate": 6.959077866589326e-05, "loss": 3.1201, "step": 5774 }, { "epoch": 1.67, "learning_rate": 6.947144172525566e-05, "loss": 3.1079, "step": 5775 }, { "epoch": 1.67, "learning_rate": 6.935219955460309e-05, "loss": 3.0473, "step": 5776 }, { "epoch": 1.67, "learning_rate": 6.923305218018394e-05, "loss": 3.0841, "step": 5777 }, { "epoch": 1.67, "learning_rate": 6.911399962822518e-05, "loss": 3.1898, "step": 5778 }, { "epoch": 1.67, "learning_rate": 6.899504192493344e-05, "loss": 3.1737, "step": 5779 }, { "epoch": 1.67, "learning_rate": 6.887617909649407e-05, "loss": 3.0332, "step": 5780 }, { "epoch": 1.67, "learning_rate": 6.875741116907191e-05, "loss": 3.0799, "step": 5781 }, { "epoch": 1.67, "learning_rate": 6.863873816881061e-05, "loss": 3.2181, "step": 5782 }, { "epoch": 1.67, "learning_rate": 6.852016012183299e-05, "loss": 3.1488, "step": 5783 }, { "epoch": 1.68, "learning_rate": 6.840167705424105e-05, "loss": 3.129, "step": 5784 }, { "epoch": 1.68, "learning_rate": 6.828328899211584e-05, "loss": 3.2644, "step": 5785 }, { "epoch": 1.68, "learning_rate": 6.81649959615176e-05, "loss": 3.1889, "step": 5786 }, { "epoch": 1.68, "learning_rate": 6.80467979884854e-05, "loss": 3.1024, "step": 5787 }, { "epoch": 1.68, "learning_rate": 6.792869509903777e-05, "loss": 3.2234, "step": 5788 }, { "epoch": 1.68, "learning_rate": 6.781068731917173e-05, "loss": 3.0658, "step": 5789 }, { "epoch": 1.68, "learning_rate": 6.769277467486407e-05, "loss": 3.2215, "step": 5790 }, { "epoch": 1.68, "learning_rate": 6.757495719206996e-05, "loss": 3.1552, "step": 5791 }, { "epoch": 1.68, "learning_rate": 6.745723489672412e-05, "loss": 3.0855, "step": 5792 }, { "epoch": 1.68, "learning_rate": 6.733960781474019e-05, "loss": 3.1169, "step": 5793 }, { "epoch": 1.68, "learning_rate": 6.722207597201064e-05, "loss": 3.0435, "step": 5794 }, { "epoch": 1.68, "learning_rate": 6.71046393944073e-05, "loss": 3.286, "step": 5795 }, { "epoch": 1.68, "learning_rate": 6.698729810778065e-05, "loss": 3.081, "step": 5796 }, { "epoch": 1.68, "learning_rate": 6.687005213796071e-05, "loss": 3.1921, "step": 5797 }, { "epoch": 1.68, "learning_rate": 6.675290151075591e-05, "loss": 3.1434, "step": 5798 }, { "epoch": 1.68, "learning_rate": 6.663584625195424e-05, "loss": 3.104, "step": 5799 }, { "epoch": 1.68, "learning_rate": 6.651888638732228e-05, "loss": 3.2131, "step": 5800 }, { "epoch": 1.68, "learning_rate": 6.640202194260586e-05, "loss": 3.0813, "step": 5801 }, { "epoch": 1.68, "learning_rate": 6.628525294352988e-05, "loss": 3.1544, "step": 5802 }, { "epoch": 1.68, "learning_rate": 6.616857941579779e-05, "loss": 3.1577, "step": 5803 }, { "epoch": 1.68, "learning_rate": 6.60520013850926e-05, "loss": 3.0804, "step": 5804 }, { "epoch": 1.68, "learning_rate": 6.593551887707577e-05, "loss": 3.123, "step": 5805 }, { "epoch": 1.68, "learning_rate": 6.581913191738826e-05, "loss": 3.1361, "step": 5806 }, { "epoch": 1.68, "learning_rate": 6.570284053164944e-05, "loss": 3.2301, "step": 5807 }, { "epoch": 1.68, "learning_rate": 6.558664474545817e-05, "loss": 3.0878, "step": 5808 }, { "epoch": 1.68, "learning_rate": 6.547054458439178e-05, "loss": 3.0973, "step": 5809 }, { "epoch": 1.68, "learning_rate": 6.535454007400688e-05, "loss": 3.0193, "step": 5810 }, { "epoch": 1.68, "learning_rate": 6.523863123983909e-05, "loss": 3.0989, "step": 5811 }, { "epoch": 1.68, "learning_rate": 6.51228181074025e-05, "loss": 3.1453, "step": 5812 }, { "epoch": 1.68, "learning_rate": 6.500710070219079e-05, "loss": 3.1774, "step": 5813 }, { "epoch": 1.68, "learning_rate": 6.48914790496759e-05, "loss": 3.0116, "step": 5814 }, { "epoch": 1.68, "learning_rate": 6.477595317530933e-05, "loss": 3.2688, "step": 5815 }, { "epoch": 1.68, "learning_rate": 6.466052310452092e-05, "loss": 3.0009, "step": 5816 }, { "epoch": 1.68, "learning_rate": 6.454518886271981e-05, "loss": 3.2751, "step": 5817 }, { "epoch": 1.68, "learning_rate": 6.442995047529388e-05, "loss": 3.1206, "step": 5818 }, { "epoch": 1.69, "learning_rate": 6.431480796760991e-05, "loss": 3.1342, "step": 5819 }, { "epoch": 1.69, "learning_rate": 6.419976136501376e-05, "loss": 3.0335, "step": 5820 }, { "epoch": 1.69, "learning_rate": 6.40848106928299e-05, "loss": 3.0244, "step": 5821 }, { "epoch": 1.69, "learning_rate": 6.39699559763619e-05, "loss": 2.9725, "step": 5822 }, { "epoch": 1.69, "learning_rate": 6.3855197240892e-05, "loss": 3.1518, "step": 5823 }, { "epoch": 1.69, "learning_rate": 6.374053451168166e-05, "loss": 3.0842, "step": 5824 }, { "epoch": 1.69, "learning_rate": 6.362596781397068e-05, "loss": 3.0639, "step": 5825 }, { "epoch": 1.69, "learning_rate": 6.351149717297833e-05, "loss": 3.0486, "step": 5826 }, { "epoch": 1.69, "learning_rate": 6.339712261390212e-05, "loss": 3.0668, "step": 5827 }, { "epoch": 1.69, "learning_rate": 6.328284416191892e-05, "loss": 3.1715, "step": 5828 }, { "epoch": 1.69, "learning_rate": 6.316866184218434e-05, "loss": 3.2332, "step": 5829 }, { "epoch": 1.69, "learning_rate": 6.305457567983247e-05, "loss": 3.2108, "step": 5830 }, { "epoch": 1.69, "learning_rate": 6.294058569997674e-05, "loss": 3.1008, "step": 5831 }, { "epoch": 1.69, "learning_rate": 6.282669192770896e-05, "loss": 3.0863, "step": 5832 }, { "epoch": 1.69, "learning_rate": 6.271289438810013e-05, "loss": 3.0901, "step": 5833 }, { "epoch": 1.69, "learning_rate": 6.259919310619977e-05, "loss": 3.1385, "step": 5834 }, { "epoch": 1.69, "learning_rate": 6.248558810703647e-05, "loss": 3.1787, "step": 5835 }, { "epoch": 1.69, "learning_rate": 6.237207941561734e-05, "loss": 3.0745, "step": 5836 }, { "epoch": 1.69, "learning_rate": 6.225866705692856e-05, "loss": 3.3117, "step": 5837 }, { "epoch": 1.69, "learning_rate": 6.214535105593505e-05, "loss": 3.0915, "step": 5838 }, { "epoch": 1.69, "learning_rate": 6.203213143758035e-05, "loss": 3.0393, "step": 5839 }, { "epoch": 1.69, "learning_rate": 6.191900822678698e-05, "loss": 3.1505, "step": 5840 }, { "epoch": 1.69, "learning_rate": 6.180598144845606e-05, "loss": 3.1008, "step": 5841 }, { "epoch": 1.69, "learning_rate": 6.169305112746776e-05, "loss": 3.1517, "step": 5842 }, { "epoch": 1.69, "learning_rate": 6.158021728868062e-05, "loss": 3.0368, "step": 5843 }, { "epoch": 1.69, "learning_rate": 6.146747995693224e-05, "loss": 3.1443, "step": 5844 }, { "epoch": 1.69, "learning_rate": 6.135483915703888e-05, "loss": 3.1922, "step": 5845 }, { "epoch": 1.69, "learning_rate": 6.124229491379574e-05, "loss": 3.1315, "step": 5846 }, { "epoch": 1.69, "learning_rate": 6.112984725197645e-05, "loss": 3.0991, "step": 5847 }, { "epoch": 1.69, "learning_rate": 6.1017496196333454e-05, "loss": 3.1582, "step": 5848 }, { "epoch": 1.69, "learning_rate": 6.090524177159812e-05, "loss": 3.0246, "step": 5849 }, { "epoch": 1.69, "learning_rate": 6.079308400248029e-05, "loss": 3.1043, "step": 5850 }, { "epoch": 1.69, "learning_rate": 6.068102291366884e-05, "loss": 3.1463, "step": 5851 }, { "epoch": 1.69, "learning_rate": 6.056905852983102e-05, "loss": 3.1716, "step": 5852 }, { "epoch": 1.7, "learning_rate": 6.0457190875612964e-05, "loss": 3.0289, "step": 5853 }, { "epoch": 1.7, "learning_rate": 6.034541997563964e-05, "loss": 3.1747, "step": 5854 }, { "epoch": 1.7, "learning_rate": 6.0233745854514636e-05, "loss": 3.1044, "step": 5855 }, { "epoch": 1.7, "learning_rate": 6.012216853682001e-05, "loss": 3.0298, "step": 5856 }, { "epoch": 1.7, "learning_rate": 6.0010688047116735e-05, "loss": 3.2529, "step": 5857 }, { "epoch": 1.7, "learning_rate": 5.989930440994451e-05, "loss": 3.1578, "step": 5858 }, { "epoch": 1.7, "learning_rate": 5.978801764982145e-05, "loss": 3.126, "step": 5859 }, { "epoch": 1.7, "learning_rate": 5.967682779124478e-05, "loss": 3.2123, "step": 5860 }, { "epoch": 1.7, "learning_rate": 5.956573485868988e-05, "loss": 3.1169, "step": 5861 }, { "epoch": 1.7, "learning_rate": 5.945473887661118e-05, "loss": 3.1828, "step": 5862 }, { "epoch": 1.7, "learning_rate": 5.934383986944158e-05, "loss": 3.1016, "step": 5863 }, { "epoch": 1.7, "learning_rate": 5.9233037861592896e-05, "loss": 3.104, "step": 5864 }, { "epoch": 1.7, "learning_rate": 5.9122332877455276e-05, "loss": 3.1052, "step": 5865 }, { "epoch": 1.7, "learning_rate": 5.901172494139739e-05, "loss": 3.179, "step": 5866 }, { "epoch": 1.7, "learning_rate": 5.890121407776716e-05, "loss": 3.2006, "step": 5867 }, { "epoch": 1.7, "learning_rate": 5.879080031089046e-05, "loss": 3.1153, "step": 5868 }, { "epoch": 1.7, "learning_rate": 5.8680483665072335e-05, "loss": 3.2712, "step": 5869 }, { "epoch": 1.7, "learning_rate": 5.857026416459593e-05, "loss": 3.1874, "step": 5870 }, { "epoch": 1.7, "learning_rate": 5.84601418337235e-05, "loss": 3.1764, "step": 5871 }, { "epoch": 1.7, "learning_rate": 5.8350116696695596e-05, "loss": 3.0982, "step": 5872 }, { "epoch": 1.7, "learning_rate": 5.824018877773163e-05, "loss": 3.0777, "step": 5873 }, { "epoch": 1.7, "learning_rate": 5.813035810102935e-05, "loss": 3.2982, "step": 5874 }, { "epoch": 1.7, "learning_rate": 5.8020624690765076e-05, "loss": 3.0828, "step": 5875 }, { "epoch": 1.7, "learning_rate": 5.7910988571094e-05, "loss": 3.1596, "step": 5876 }, { "epoch": 1.7, "learning_rate": 5.78014497661497e-05, "loss": 3.2105, "step": 5877 }, { "epoch": 1.7, "learning_rate": 5.76920083000444e-05, "loss": 3.2571, "step": 5878 }, { "epoch": 1.7, "learning_rate": 5.758266419686881e-05, "loss": 3.2297, "step": 5879 }, { "epoch": 1.7, "learning_rate": 5.747341748069229e-05, "loss": 3.1762, "step": 5880 }, { "epoch": 1.7, "learning_rate": 5.7364268175562786e-05, "loss": 2.9494, "step": 5881 }, { "epoch": 1.7, "learning_rate": 5.725521630550684e-05, "loss": 3.1783, "step": 5882 }, { "epoch": 1.7, "learning_rate": 5.714626189452937e-05, "loss": 3.1142, "step": 5883 }, { "epoch": 1.7, "learning_rate": 5.70374049666138e-05, "loss": 3.1604, "step": 5884 }, { "epoch": 1.7, "learning_rate": 5.6928645545722524e-05, "loss": 3.3185, "step": 5885 }, { "epoch": 1.7, "learning_rate": 5.6819983655795935e-05, "loss": 3.0474, "step": 5886 }, { "epoch": 1.7, "learning_rate": 5.671141932075335e-05, "loss": 3.0614, "step": 5887 }, { "epoch": 1.71, "learning_rate": 5.660295256449233e-05, "loss": 3.2309, "step": 5888 }, { "epoch": 1.71, "learning_rate": 5.649458341088914e-05, "loss": 3.1124, "step": 5889 }, { "epoch": 1.71, "learning_rate": 5.638631188379856e-05, "loss": 3.12, "step": 5890 }, { "epoch": 1.71, "learning_rate": 5.6278138007053944e-05, "loss": 3.0623, "step": 5891 }, { "epoch": 1.71, "learning_rate": 5.617006180446688e-05, "loss": 3.1212, "step": 5892 }, { "epoch": 1.71, "learning_rate": 5.606208329982754e-05, "loss": 3.1292, "step": 5893 }, { "epoch": 1.71, "learning_rate": 5.5954202516904864e-05, "loss": 3.026, "step": 5894 }, { "epoch": 1.71, "learning_rate": 5.584641947944591e-05, "loss": 3.1871, "step": 5895 }, { "epoch": 1.71, "learning_rate": 5.573873421117642e-05, "loss": 3.0561, "step": 5896 }, { "epoch": 1.71, "learning_rate": 5.563114673580061e-05, "loss": 3.0537, "step": 5897 }, { "epoch": 1.71, "learning_rate": 5.55236570770013e-05, "loss": 3.1289, "step": 5898 }, { "epoch": 1.71, "learning_rate": 5.54162652584394e-05, "loss": 3.1259, "step": 5899 }, { "epoch": 1.71, "learning_rate": 5.530897130375467e-05, "loss": 3.1487, "step": 5900 }, { "epoch": 1.71, "learning_rate": 5.5201775236565056e-05, "loss": 3.0727, "step": 5901 }, { "epoch": 1.71, "learning_rate": 5.509467708046706e-05, "loss": 2.9689, "step": 5902 }, { "epoch": 1.71, "learning_rate": 5.498767685903572e-05, "loss": 3.2156, "step": 5903 }, { "epoch": 1.71, "learning_rate": 5.4880774595824246e-05, "loss": 3.1497, "step": 5904 }, { "epoch": 1.71, "learning_rate": 5.477397031436465e-05, "loss": 3.1182, "step": 5905 }, { "epoch": 1.71, "learning_rate": 5.466726403816713e-05, "loss": 3.2049, "step": 5906 }, { "epoch": 1.71, "learning_rate": 5.45606557907205e-05, "loss": 3.1578, "step": 5907 }, { "epoch": 1.71, "learning_rate": 5.445414559549167e-05, "loss": 3.0453, "step": 5908 }, { "epoch": 1.71, "learning_rate": 5.4347733475926365e-05, "loss": 3.1014, "step": 5909 }, { "epoch": 1.71, "learning_rate": 5.4241419455448414e-05, "loss": 3.1318, "step": 5910 }, { "epoch": 1.71, "learning_rate": 5.41352035574601e-05, "loss": 3.1573, "step": 5911 }, { "epoch": 1.71, "learning_rate": 5.4029085805342324e-05, "loss": 3.1595, "step": 5912 }, { "epoch": 1.71, "learning_rate": 5.392306622245407e-05, "loss": 3.114, "step": 5913 }, { "epoch": 1.71, "learning_rate": 5.38171448321329e-05, "loss": 3.0953, "step": 5914 }, { "epoch": 1.71, "learning_rate": 5.3711321657694825e-05, "loss": 3.1468, "step": 5915 }, { "epoch": 1.71, "learning_rate": 5.36055967224342e-05, "loss": 3.0251, "step": 5916 }, { "epoch": 1.71, "learning_rate": 5.3499970049623483e-05, "loss": 3.0255, "step": 5917 }, { "epoch": 1.71, "learning_rate": 5.3394441662513945e-05, "loss": 3.0289, "step": 5918 }, { "epoch": 1.71, "learning_rate": 5.3289011584334855e-05, "loss": 3.1095, "step": 5919 }, { "epoch": 1.71, "learning_rate": 5.318367983829392e-05, "loss": 3.0044, "step": 5920 }, { "epoch": 1.71, "learning_rate": 5.307844644757742e-05, "loss": 3.2112, "step": 5921 }, { "epoch": 1.72, "learning_rate": 5.297331143534973e-05, "loss": 3.0923, "step": 5922 }, { "epoch": 1.72, "learning_rate": 5.286827482475365e-05, "loss": 3.1513, "step": 5923 }, { "epoch": 1.72, "learning_rate": 5.276333663891036e-05, "loss": 3.1145, "step": 5924 }, { "epoch": 1.72, "learning_rate": 5.26584969009195e-05, "loss": 3.043, "step": 5925 }, { "epoch": 1.72, "learning_rate": 5.255375563385867e-05, "loss": 3.1938, "step": 5926 }, { "epoch": 1.72, "learning_rate": 5.244911286078419e-05, "loss": 3.0888, "step": 5927 }, { "epoch": 1.72, "learning_rate": 5.234456860473041e-05, "loss": 3.1117, "step": 5928 }, { "epoch": 1.72, "learning_rate": 5.224012288871011e-05, "loss": 3.0747, "step": 5929 }, { "epoch": 1.72, "learning_rate": 5.213577573571443e-05, "loss": 3.1275, "step": 5930 }, { "epoch": 1.72, "learning_rate": 5.2031527168712624e-05, "loss": 3.2608, "step": 5931 }, { "epoch": 1.72, "learning_rate": 5.192737721065255e-05, "loss": 3.0666, "step": 5932 }, { "epoch": 1.72, "learning_rate": 5.182332588446009e-05, "loss": 3.1642, "step": 5933 }, { "epoch": 1.72, "learning_rate": 5.17193732130396e-05, "loss": 3.0845, "step": 5934 }, { "epoch": 1.72, "learning_rate": 5.1615519219273546e-05, "loss": 3.1004, "step": 5935 }, { "epoch": 1.72, "learning_rate": 5.151176392602291e-05, "loss": 3.1509, "step": 5936 }, { "epoch": 1.72, "learning_rate": 5.140810735612655e-05, "loss": 3.0493, "step": 5937 }, { "epoch": 1.72, "learning_rate": 5.13045495324021e-05, "loss": 3.0786, "step": 5938 }, { "epoch": 1.72, "learning_rate": 5.120109047764504e-05, "loss": 3.1068, "step": 5939 }, { "epoch": 1.72, "learning_rate": 5.109773021462921e-05, "loss": 3.093, "step": 5940 }, { "epoch": 1.72, "learning_rate": 5.099446876610692e-05, "loss": 3.164, "step": 5941 }, { "epoch": 1.72, "learning_rate": 5.089130615480841e-05, "loss": 2.9833, "step": 5942 }, { "epoch": 1.72, "learning_rate": 5.07882424034426e-05, "loss": 3.155, "step": 5943 }, { "epoch": 1.72, "learning_rate": 5.068527753469604e-05, "loss": 3.0512, "step": 5944 }, { "epoch": 1.72, "learning_rate": 5.058241157123411e-05, "loss": 3.146, "step": 5945 }, { "epoch": 1.72, "learning_rate": 5.047964453569992e-05, "loss": 3.2512, "step": 5946 }, { "epoch": 1.72, "learning_rate": 5.037697645071526e-05, "loss": 3.1434, "step": 5947 }, { "epoch": 1.72, "learning_rate": 5.027440733887972e-05, "loss": 3.1983, "step": 5948 }, { "epoch": 1.72, "learning_rate": 5.017193722277136e-05, "loss": 3.0799, "step": 5949 }, { "epoch": 1.72, "learning_rate": 5.006956612494651e-05, "loss": 3.0886, "step": 5950 }, { "epoch": 1.72, "learning_rate": 4.996729406793943e-05, "loss": 3.1598, "step": 5951 }, { "epoch": 1.72, "learning_rate": 4.986512107426283e-05, "loss": 3.2882, "step": 5952 }, { "epoch": 1.72, "learning_rate": 4.9763047166407395e-05, "loss": 3.1816, "step": 5953 }, { "epoch": 1.72, "learning_rate": 4.966107236684225e-05, "loss": 3.137, "step": 5954 }, { "epoch": 1.72, "learning_rate": 4.9559196698014455e-05, "loss": 3.0786, "step": 5955 }, { "epoch": 1.72, "learning_rate": 4.94574201823495e-05, "loss": 3.2056, "step": 5956 }, { "epoch": 1.73, "learning_rate": 4.9355742842250695e-05, "loss": 3.1278, "step": 5957 }, { "epoch": 1.73, "learning_rate": 4.925416470009991e-05, "loss": 3.0525, "step": 5958 }, { "epoch": 1.73, "learning_rate": 4.91526857782571e-05, "loss": 3.178, "step": 5959 }, { "epoch": 1.73, "learning_rate": 4.9051306099060055e-05, "loss": 3.1103, "step": 5960 }, { "epoch": 1.73, "learning_rate": 4.895002568482509e-05, "loss": 3.1031, "step": 5961 }, { "epoch": 1.73, "learning_rate": 4.884884455784644e-05, "loss": 3.085, "step": 5962 }, { "epoch": 1.73, "learning_rate": 4.8747762740396714e-05, "loss": 3.1366, "step": 5963 }, { "epoch": 1.73, "learning_rate": 4.864678025472635e-05, "loss": 3.0422, "step": 5964 }, { "epoch": 1.73, "learning_rate": 4.854589712306423e-05, "loss": 3.0585, "step": 5965 }, { "epoch": 1.73, "learning_rate": 4.844511336761709e-05, "loss": 3.0954, "step": 5966 }, { "epoch": 1.73, "learning_rate": 4.834442901057001e-05, "loss": 3.1752, "step": 5967 }, { "epoch": 1.73, "learning_rate": 4.824384407408622e-05, "loss": 3.1914, "step": 5968 }, { "epoch": 1.73, "learning_rate": 4.814335858030672e-05, "loss": 3.0803, "step": 5969 }, { "epoch": 1.73, "learning_rate": 4.80429725513511e-05, "loss": 3.0941, "step": 5970 }, { "epoch": 1.73, "learning_rate": 4.794268600931651e-05, "loss": 3.1277, "step": 5971 }, { "epoch": 1.73, "learning_rate": 4.7842498976278794e-05, "loss": 3.1871, "step": 5972 }, { "epoch": 1.73, "learning_rate": 4.7742411474291425e-05, "loss": 3.1268, "step": 5973 }, { "epoch": 1.73, "learning_rate": 4.7642423525386225e-05, "loss": 3.1671, "step": 5974 }, { "epoch": 1.73, "learning_rate": 4.754253515157286e-05, "loss": 3.0443, "step": 5975 }, { "epoch": 1.73, "learning_rate": 4.7442746374839365e-05, "loss": 3.0991, "step": 5976 }, { "epoch": 1.73, "learning_rate": 4.734305721715182e-05, "loss": 3.1885, "step": 5977 }, { "epoch": 1.73, "learning_rate": 4.724346770045401e-05, "loss": 3.1831, "step": 5978 }, { "epoch": 1.73, "learning_rate": 4.7143977846668294e-05, "loss": 3.0665, "step": 5979 }, { "epoch": 1.73, "learning_rate": 4.704458767769471e-05, "loss": 3.2454, "step": 5980 }, { "epoch": 1.73, "learning_rate": 4.6945297215411576e-05, "loss": 3.177, "step": 5981 }, { "epoch": 1.73, "learning_rate": 4.684610648167503e-05, "loss": 3.0679, "step": 5982 }, { "epoch": 1.73, "learning_rate": 4.674701549831961e-05, "loss": 3.0118, "step": 5983 }, { "epoch": 1.73, "learning_rate": 4.664802428715753e-05, "loss": 3.0512, "step": 5984 }, { "epoch": 1.73, "learning_rate": 4.654913286997925e-05, "loss": 3.2122, "step": 5985 }, { "epoch": 1.73, "learning_rate": 4.64503412685533e-05, "loss": 3.0368, "step": 5986 }, { "epoch": 1.73, "learning_rate": 4.6351649504626e-05, "loss": 3.086, "step": 5987 }, { "epoch": 1.73, "learning_rate": 4.6253057599922044e-05, "loss": 3.1825, "step": 5988 }, { "epoch": 1.73, "learning_rate": 4.6154565576143724e-05, "loss": 3.136, "step": 5989 }, { "epoch": 1.73, "learning_rate": 4.6056173454971815e-05, "loss": 3.2387, "step": 5990 }, { "epoch": 1.74, "learning_rate": 4.595788125806466e-05, "loss": 3.0936, "step": 5991 }, { "epoch": 1.74, "learning_rate": 4.585968900705889e-05, "loss": 3.1251, "step": 5992 }, { "epoch": 1.74, "learning_rate": 4.5761596723569e-05, "loss": 3.0895, "step": 5993 }, { "epoch": 1.74, "learning_rate": 4.566360442918754e-05, "loss": 2.9366, "step": 5994 }, { "epoch": 1.74, "learning_rate": 4.556571214548516e-05, "loss": 3.1494, "step": 5995 }, { "epoch": 1.74, "learning_rate": 4.546791989401011e-05, "loss": 3.1151, "step": 5996 }, { "epoch": 1.74, "learning_rate": 4.537022769628923e-05, "loss": 3.2048, "step": 5997 }, { "epoch": 1.74, "learning_rate": 4.527263557382666e-05, "loss": 3.1502, "step": 5998 }, { "epoch": 1.74, "learning_rate": 4.517514354810509e-05, "loss": 3.0745, "step": 5999 }, { "epoch": 1.74, "learning_rate": 4.5077751640584696e-05, "loss": 3.1254, "step": 6000 }, { "epoch": 1.74, "learning_rate": 4.4980459872704e-05, "loss": 3.1738, "step": 6001 }, { "epoch": 1.74, "learning_rate": 4.4883268265879404e-05, "loss": 3.0157, "step": 6002 }, { "epoch": 1.74, "learning_rate": 4.4786176841504935e-05, "loss": 3.2618, "step": 6003 }, { "epoch": 1.74, "learning_rate": 4.468918562095309e-05, "loss": 3.0559, "step": 6004 }, { "epoch": 1.74, "learning_rate": 4.459229462557379e-05, "loss": 3.0632, "step": 6005 }, { "epoch": 1.74, "learning_rate": 4.449550387669537e-05, "loss": 3.1193, "step": 6006 }, { "epoch": 1.74, "learning_rate": 4.4398813395623626e-05, "loss": 3.1638, "step": 6007 }, { "epoch": 1.74, "learning_rate": 4.43022232036428e-05, "loss": 3.1171, "step": 6008 }, { "epoch": 1.74, "learning_rate": 4.4205733322014496e-05, "loss": 3.0651, "step": 6009 }, { "epoch": 1.74, "learning_rate": 4.410934377197867e-05, "loss": 3.1376, "step": 6010 }, { "epoch": 1.74, "learning_rate": 4.401305457475313e-05, "loss": 3.1768, "step": 6011 }, { "epoch": 1.74, "learning_rate": 4.391686575153331e-05, "loss": 3.0957, "step": 6012 }, { "epoch": 1.74, "learning_rate": 4.382077732349299e-05, "loss": 3.0247, "step": 6013 }, { "epoch": 1.74, "learning_rate": 4.372478931178336e-05, "loss": 3.0169, "step": 6014 }, { "epoch": 1.74, "learning_rate": 4.362890173753392e-05, "loss": 3.1424, "step": 6015 }, { "epoch": 1.74, "learning_rate": 4.353311462185183e-05, "loss": 3.1256, "step": 6016 }, { "epoch": 1.74, "learning_rate": 4.343742798582229e-05, "loss": 3.0539, "step": 6017 }, { "epoch": 1.74, "learning_rate": 4.3341841850508104e-05, "loss": 3.0544, "step": 6018 }, { "epoch": 1.74, "learning_rate": 4.324635623695028e-05, "loss": 3.1493, "step": 6019 }, { "epoch": 1.74, "learning_rate": 4.315097116616767e-05, "loss": 3.2077, "step": 6020 }, { "epoch": 1.74, "learning_rate": 4.305568665915666e-05, "loss": 3.0578, "step": 6021 }, { "epoch": 1.74, "learning_rate": 4.296050273689195e-05, "loss": 3.156, "step": 6022 }, { "epoch": 1.74, "learning_rate": 4.286541942032562e-05, "loss": 3.1038, "step": 6023 }, { "epoch": 1.74, "learning_rate": 4.277043673038816e-05, "loss": 3.0752, "step": 6024 }, { "epoch": 1.74, "learning_rate": 4.267555468798734e-05, "loss": 3.1069, "step": 6025 }, { "epoch": 1.75, "learning_rate": 4.2580773314009225e-05, "loss": 3.2482, "step": 6026 }, { "epoch": 1.75, "learning_rate": 4.248609262931735e-05, "loss": 3.0604, "step": 6027 }, { "epoch": 1.75, "learning_rate": 4.239151265475344e-05, "loss": 3.0564, "step": 6028 }, { "epoch": 1.75, "learning_rate": 4.2297033411136886e-05, "loss": 2.9924, "step": 6029 }, { "epoch": 1.75, "learning_rate": 4.220265491926489e-05, "loss": 3.1359, "step": 6030 }, { "epoch": 1.75, "learning_rate": 4.21083771999125e-05, "loss": 2.9635, "step": 6031 }, { "epoch": 1.75, "learning_rate": 4.2014200273832404e-05, "loss": 3.1017, "step": 6032 }, { "epoch": 1.75, "learning_rate": 4.1920124161755555e-05, "loss": 3.0713, "step": 6033 }, { "epoch": 1.75, "learning_rate": 4.182614888439018e-05, "loss": 3.1888, "step": 6034 }, { "epoch": 1.75, "learning_rate": 4.1732274462422725e-05, "loss": 3.2342, "step": 6035 }, { "epoch": 1.75, "learning_rate": 4.1638500916517165e-05, "loss": 3.1207, "step": 6036 }, { "epoch": 1.75, "learning_rate": 4.1544828267315425e-05, "loss": 3.1116, "step": 6037 }, { "epoch": 1.75, "learning_rate": 4.145125653543719e-05, "loss": 3.1723, "step": 6038 }, { "epoch": 1.75, "learning_rate": 4.135778574147997e-05, "loss": 3.1291, "step": 6039 }, { "epoch": 1.75, "learning_rate": 4.1264415906018924e-05, "loss": 3.0732, "step": 6040 }, { "epoch": 1.75, "learning_rate": 4.117114704960695e-05, "loss": 3.204, "step": 6041 }, { "epoch": 1.75, "learning_rate": 4.107797919277506e-05, "loss": 3.134, "step": 6042 }, { "epoch": 1.75, "learning_rate": 4.098491235603163e-05, "loss": 3.1801, "step": 6043 }, { "epoch": 1.75, "learning_rate": 4.0891946559863055e-05, "loss": 3.055, "step": 6044 }, { "epoch": 1.75, "learning_rate": 4.079908182473335e-05, "loss": 3.1006, "step": 6045 }, { "epoch": 1.75, "learning_rate": 4.0706318171084334e-05, "loss": 3.1204, "step": 6046 }, { "epoch": 1.75, "learning_rate": 4.061365561933567e-05, "loss": 3.1425, "step": 6047 }, { "epoch": 1.75, "learning_rate": 4.05210941898847e-05, "loss": 3.1938, "step": 6048 }, { "epoch": 1.75, "learning_rate": 4.042863390310642e-05, "loss": 3.1896, "step": 6049 }, { "epoch": 1.75, "learning_rate": 4.0336274779353535e-05, "loss": 3.0589, "step": 6050 }, { "epoch": 1.75, "learning_rate": 4.024401683895668e-05, "loss": 3.1176, "step": 6051 }, { "epoch": 1.75, "learning_rate": 4.0151860102224044e-05, "loss": 3.1769, "step": 6052 }, { "epoch": 1.75, "learning_rate": 4.005980458944169e-05, "loss": 3.0197, "step": 6053 }, { "epoch": 1.75, "learning_rate": 3.996785032087324e-05, "loss": 3.0603, "step": 6054 }, { "epoch": 1.75, "learning_rate": 3.987599731676022e-05, "loss": 3.2522, "step": 6055 }, { "epoch": 1.75, "learning_rate": 3.978424559732158e-05, "loss": 3.0098, "step": 6056 }, { "epoch": 1.75, "learning_rate": 3.969259518275431e-05, "loss": 3.1194, "step": 6057 }, { "epoch": 1.75, "learning_rate": 3.960104609323284e-05, "loss": 3.0131, "step": 6058 }, { "epoch": 1.75, "learning_rate": 3.950959834890927e-05, "loss": 3.0291, "step": 6059 }, { "epoch": 1.76, "learning_rate": 3.941825196991378e-05, "loss": 3.1139, "step": 6060 }, { "epoch": 1.76, "learning_rate": 3.932700697635372e-05, "loss": 3.0906, "step": 6061 }, { "epoch": 1.76, "learning_rate": 3.923586338831453e-05, "loss": 3.1001, "step": 6062 }, { "epoch": 1.76, "learning_rate": 3.914482122585905e-05, "loss": 3.2197, "step": 6063 }, { "epoch": 1.76, "learning_rate": 3.905388050902808e-05, "loss": 3.0445, "step": 6064 }, { "epoch": 1.76, "learning_rate": 3.896304125783973e-05, "loss": 3.2518, "step": 6065 }, { "epoch": 1.76, "learning_rate": 3.8872303492290154e-05, "loss": 3.1282, "step": 6066 }, { "epoch": 1.76, "learning_rate": 3.878166723235288e-05, "loss": 3.0362, "step": 6067 }, { "epoch": 1.76, "learning_rate": 3.8691132497979066e-05, "loss": 3.1454, "step": 6068 }, { "epoch": 1.76, "learning_rate": 3.8600699309097876e-05, "loss": 3.0578, "step": 6069 }, { "epoch": 1.76, "learning_rate": 3.851036768561572e-05, "loss": 3.095, "step": 6070 }, { "epoch": 1.76, "learning_rate": 3.842013764741686e-05, "loss": 3.0728, "step": 6071 }, { "epoch": 1.76, "learning_rate": 3.8330009214363194e-05, "loss": 3.1818, "step": 6072 }, { "epoch": 1.76, "learning_rate": 3.823998240629434e-05, "loss": 3.026, "step": 6073 }, { "epoch": 1.76, "learning_rate": 3.815005724302717e-05, "loss": 3.0081, "step": 6074 }, { "epoch": 1.76, "learning_rate": 3.806023374435663e-05, "loss": 3.0505, "step": 6075 }, { "epoch": 1.76, "learning_rate": 3.797051193005507e-05, "loss": 3.0276, "step": 6076 }, { "epoch": 1.76, "learning_rate": 3.788089181987236e-05, "loss": 3.1396, "step": 6077 }, { "epoch": 1.76, "learning_rate": 3.779137343353623e-05, "loss": 3.2437, "step": 6078 }, { "epoch": 1.76, "learning_rate": 3.770195679075178e-05, "loss": 3.1643, "step": 6079 }, { "epoch": 1.76, "learning_rate": 3.761264191120189e-05, "loss": 3.1272, "step": 6080 }, { "epoch": 1.76, "learning_rate": 3.7523428814546954e-05, "loss": 3.0861, "step": 6081 }, { "epoch": 1.76, "learning_rate": 3.743431752042509e-05, "loss": 3.0726, "step": 6082 }, { "epoch": 1.76, "learning_rate": 3.734530804845177e-05, "loss": 3.1536, "step": 6083 }, { "epoch": 1.76, "learning_rate": 3.725640041822026e-05, "loss": 3.1487, "step": 6084 }, { "epoch": 1.76, "learning_rate": 3.716759464930125e-05, "loss": 3.0443, "step": 6085 }, { "epoch": 1.76, "learning_rate": 3.70788907612431e-05, "loss": 3.0403, "step": 6086 }, { "epoch": 1.76, "learning_rate": 3.699028877357186e-05, "loss": 3.1099, "step": 6087 }, { "epoch": 1.76, "learning_rate": 3.690178870579075e-05, "loss": 3.1775, "step": 6088 }, { "epoch": 1.76, "learning_rate": 3.681339057738103e-05, "loss": 3.1721, "step": 6089 }, { "epoch": 1.76, "learning_rate": 3.672509440780125e-05, "loss": 2.9877, "step": 6090 }, { "epoch": 1.76, "learning_rate": 3.6636900216487745e-05, "loss": 3.2046, "step": 6091 }, { "epoch": 1.76, "learning_rate": 3.654880802285393e-05, "loss": 3.1411, "step": 6092 }, { "epoch": 1.76, "learning_rate": 3.646081784629141e-05, "loss": 2.9192, "step": 6093 }, { "epoch": 1.76, "learning_rate": 3.637292970616879e-05, "loss": 3.0551, "step": 6094 }, { "epoch": 1.77, "learning_rate": 3.6285143621832386e-05, "loss": 3.1556, "step": 6095 }, { "epoch": 1.77, "learning_rate": 3.619745961260623e-05, "loss": 3.145, "step": 6096 }, { "epoch": 1.77, "learning_rate": 3.610987769779156e-05, "loss": 3.1005, "step": 6097 }, { "epoch": 1.77, "learning_rate": 3.60223978966675e-05, "loss": 2.9397, "step": 6098 }, { "epoch": 1.77, "learning_rate": 3.593502022849043e-05, "loss": 2.9234, "step": 6099 }, { "epoch": 1.77, "learning_rate": 3.584774471249447e-05, "loss": 3.0883, "step": 6100 }, { "epoch": 1.77, "learning_rate": 3.5760571367890904e-05, "loss": 3.0439, "step": 6101 }, { "epoch": 1.77, "learning_rate": 3.567350021386895e-05, "loss": 3.0982, "step": 6102 }, { "epoch": 1.77, "learning_rate": 3.558653126959499e-05, "loss": 3.194, "step": 6103 }, { "epoch": 1.77, "learning_rate": 3.549966455421305e-05, "loss": 3.1463, "step": 6104 }, { "epoch": 1.77, "learning_rate": 3.5412900086844615e-05, "loss": 3.0536, "step": 6105 }, { "epoch": 1.77, "learning_rate": 3.532623788658873e-05, "loss": 3.0922, "step": 6106 }, { "epoch": 1.77, "learning_rate": 3.5239677972522035e-05, "loss": 3.0262, "step": 6107 }, { "epoch": 1.77, "learning_rate": 3.515322036369822e-05, "loss": 3.1892, "step": 6108 }, { "epoch": 1.77, "learning_rate": 3.506686507914902e-05, "loss": 3.082, "step": 6109 }, { "epoch": 1.77, "learning_rate": 3.4980612137883175e-05, "loss": 3.1276, "step": 6110 }, { "epoch": 1.77, "learning_rate": 3.4894461558887256e-05, "loss": 2.9493, "step": 6111 }, { "epoch": 1.77, "learning_rate": 3.4808413361125e-05, "loss": 3.0368, "step": 6112 }, { "epoch": 1.77, "learning_rate": 3.4722467563537696e-05, "loss": 3.164, "step": 6113 }, { "epoch": 1.77, "learning_rate": 3.4636624185044266e-05, "loss": 3.0932, "step": 6114 }, { "epoch": 1.77, "learning_rate": 3.455088324454092e-05, "loss": 3.1401, "step": 6115 }, { "epoch": 1.77, "learning_rate": 3.44652447609014e-05, "loss": 2.9825, "step": 6116 }, { "epoch": 1.77, "learning_rate": 3.4379708752976804e-05, "loss": 3.115, "step": 6117 }, { "epoch": 1.77, "learning_rate": 3.429427523959577e-05, "loss": 3.1157, "step": 6118 }, { "epoch": 1.77, "learning_rate": 3.420894423956422e-05, "loss": 3.1178, "step": 6119 }, { "epoch": 1.77, "learning_rate": 3.4123715771665786e-05, "loss": 3.1586, "step": 6120 }, { "epoch": 1.77, "learning_rate": 3.403858985466129e-05, "loss": 3.0353, "step": 6121 }, { "epoch": 1.77, "learning_rate": 3.3953566507288927e-05, "loss": 3.0048, "step": 6122 }, { "epoch": 1.77, "learning_rate": 3.3868645748264534e-05, "loss": 3.2191, "step": 6123 }, { "epoch": 1.77, "learning_rate": 3.378382759628129e-05, "loss": 3.108, "step": 6124 }, { "epoch": 1.77, "learning_rate": 3.3699112070009865e-05, "loss": 3.2803, "step": 6125 }, { "epoch": 1.77, "learning_rate": 3.3614499188098e-05, "loss": 3.0845, "step": 6126 }, { "epoch": 1.77, "learning_rate": 3.352998896917131e-05, "loss": 3.0518, "step": 6127 }, { "epoch": 1.77, "learning_rate": 3.344558143183246e-05, "loss": 3.0412, "step": 6128 }, { "epoch": 1.78, "learning_rate": 3.3361276594661706e-05, "loss": 3.0978, "step": 6129 }, { "epoch": 1.78, "learning_rate": 3.327707447621653e-05, "loss": 3.2128, "step": 6130 }, { "epoch": 1.78, "learning_rate": 3.319297509503205e-05, "loss": 3.0835, "step": 6131 }, { "epoch": 1.78, "learning_rate": 3.3108978469620407e-05, "loss": 3.087, "step": 6132 }, { "epoch": 1.78, "learning_rate": 3.3025084618471535e-05, "loss": 3.0215, "step": 6133 }, { "epoch": 1.78, "learning_rate": 3.294129356005249e-05, "loss": 3.1951, "step": 6134 }, { "epoch": 1.78, "learning_rate": 3.2857605312807684e-05, "loss": 3.1377, "step": 6135 }, { "epoch": 1.78, "learning_rate": 3.27740198951591e-05, "loss": 3.2303, "step": 6136 }, { "epoch": 1.78, "learning_rate": 3.2690537325505806e-05, "loss": 3.073, "step": 6137 }, { "epoch": 1.78, "learning_rate": 3.260715762222449e-05, "loss": 3.1163, "step": 6138 }, { "epoch": 1.78, "learning_rate": 3.252388080366903e-05, "loss": 3.0205, "step": 6139 }, { "epoch": 1.78, "learning_rate": 3.24407068881708e-05, "loss": 3.0895, "step": 6140 }, { "epoch": 1.78, "learning_rate": 3.235763589403829e-05, "loss": 3.0907, "step": 6141 }, { "epoch": 1.78, "learning_rate": 3.227466783955757e-05, "loss": 3.1009, "step": 6142 }, { "epoch": 1.78, "learning_rate": 3.219180274299205e-05, "loss": 3.1725, "step": 6143 }, { "epoch": 1.78, "learning_rate": 3.210904062258219e-05, "loss": 3.0727, "step": 6144 }, { "epoch": 1.78, "learning_rate": 3.202638149654613e-05, "loss": 3.1022, "step": 6145 }, { "epoch": 1.78, "learning_rate": 3.1943825383079106e-05, "loss": 3.0993, "step": 6146 }, { "epoch": 1.78, "learning_rate": 3.1861372300353854e-05, "loss": 3.2258, "step": 6147 }, { "epoch": 1.78, "learning_rate": 3.177902226652024e-05, "loss": 3.043, "step": 6148 }, { "epoch": 1.78, "learning_rate": 3.16967752997056e-05, "loss": 3.1962, "step": 6149 }, { "epoch": 1.78, "learning_rate": 3.161463141801446e-05, "loss": 3.167, "step": 6150 }, { "epoch": 1.78, "learning_rate": 3.153259063952879e-05, "loss": 3.1604, "step": 6151 }, { "epoch": 1.78, "learning_rate": 3.145065298230782e-05, "loss": 3.2486, "step": 6152 }, { "epoch": 1.78, "learning_rate": 3.136881846438794e-05, "loss": 3.0451, "step": 6153 }, { "epoch": 1.78, "learning_rate": 3.128708710378308e-05, "loss": 3.0778, "step": 6154 }, { "epoch": 1.78, "learning_rate": 3.12054589184842e-05, "loss": 3.2212, "step": 6155 }, { "epoch": 1.78, "learning_rate": 3.112393392645985e-05, "loss": 3.1389, "step": 6156 }, { "epoch": 1.78, "learning_rate": 3.104251214565545e-05, "loss": 3.0437, "step": 6157 }, { "epoch": 1.78, "learning_rate": 3.0961193593994186e-05, "loss": 3.1085, "step": 6158 }, { "epoch": 1.78, "learning_rate": 3.087997828937627e-05, "loss": 3.2129, "step": 6159 }, { "epoch": 1.78, "learning_rate": 3.0798866249679036e-05, "loss": 3.0632, "step": 6160 }, { "epoch": 1.78, "learning_rate": 3.071785749275741e-05, "loss": 3.1146, "step": 6161 }, { "epoch": 1.78, "learning_rate": 3.063695203644329e-05, "loss": 3.1002, "step": 6162 }, { "epoch": 1.78, "learning_rate": 3.0556149898546046e-05, "loss": 3.1694, "step": 6163 }, { "epoch": 1.79, "learning_rate": 3.0475451096852237e-05, "loss": 3.1584, "step": 6164 }, { "epoch": 1.79, "learning_rate": 3.039485564912564e-05, "loss": 3.1646, "step": 6165 }, { "epoch": 1.79, "learning_rate": 3.0314363573107294e-05, "loss": 3.0997, "step": 6166 }, { "epoch": 1.79, "learning_rate": 3.0233974886515468e-05, "loss": 3.0548, "step": 6167 }, { "epoch": 1.79, "learning_rate": 3.0153689607045842e-05, "loss": 3.0232, "step": 6168 }, { "epoch": 1.79, "learning_rate": 3.0073507752371e-05, "loss": 3.1437, "step": 6169 }, { "epoch": 1.79, "learning_rate": 2.9993429340141153e-05, "loss": 3.0972, "step": 6170 }, { "epoch": 1.79, "learning_rate": 2.9913454387983318e-05, "loss": 3.0373, "step": 6171 }, { "epoch": 1.79, "learning_rate": 2.9833582913502132e-05, "loss": 3.2651, "step": 6172 }, { "epoch": 1.79, "learning_rate": 2.97538149342792e-05, "loss": 3.1755, "step": 6173 }, { "epoch": 1.79, "learning_rate": 2.967415046787353e-05, "loss": 3.1516, "step": 6174 }, { "epoch": 1.79, "learning_rate": 2.9594589531821036e-05, "loss": 3.145, "step": 6175 }, { "epoch": 1.79, "learning_rate": 2.9515132143635215e-05, "loss": 3.1525, "step": 6176 }, { "epoch": 1.79, "learning_rate": 2.9435778320806627e-05, "loss": 3.1297, "step": 6177 }, { "epoch": 1.79, "learning_rate": 2.9356528080802914e-05, "loss": 3.1787, "step": 6178 }, { "epoch": 1.79, "learning_rate": 2.9277381441069063e-05, "loss": 3.1076, "step": 6179 }, { "epoch": 1.79, "learning_rate": 2.9198338419027137e-05, "loss": 3.047, "step": 6180 }, { "epoch": 1.79, "learning_rate": 2.911939903207661e-05, "loss": 3.1579, "step": 6181 }, { "epoch": 1.79, "learning_rate": 2.9040563297593804e-05, "loss": 3.1297, "step": 6182 }, { "epoch": 1.79, "learning_rate": 2.8961831232932555e-05, "loss": 3.2644, "step": 6183 }, { "epoch": 1.79, "learning_rate": 2.8883202855423673e-05, "loss": 3.1341, "step": 6184 }, { "epoch": 1.79, "learning_rate": 2.8804678182375198e-05, "loss": 3.1868, "step": 6185 }, { "epoch": 1.79, "learning_rate": 2.8726257231072474e-05, "loss": 3.2603, "step": 6186 }, { "epoch": 1.79, "learning_rate": 2.864794001877774e-05, "loss": 3.1676, "step": 6187 }, { "epoch": 1.79, "learning_rate": 2.8569726562730658e-05, "loss": 3.1233, "step": 6188 }, { "epoch": 1.79, "learning_rate": 2.8491616880147843e-05, "loss": 3.069, "step": 6189 }, { "epoch": 1.79, "learning_rate": 2.841361098822326e-05, "loss": 3.0773, "step": 6190 }, { "epoch": 1.79, "learning_rate": 2.83357089041279e-05, "loss": 3.0975, "step": 6191 }, { "epoch": 1.79, "learning_rate": 2.825791064500993e-05, "loss": 3.0335, "step": 6192 }, { "epoch": 1.79, "learning_rate": 2.818021622799466e-05, "loss": 3.0387, "step": 6193 }, { "epoch": 1.79, "learning_rate": 2.8102625670184566e-05, "loss": 3.0713, "step": 6194 }, { "epoch": 1.79, "learning_rate": 2.8025138988659327e-05, "loss": 3.1511, "step": 6195 }, { "epoch": 1.79, "learning_rate": 2.7947756200475517e-05, "loss": 3.2123, "step": 6196 }, { "epoch": 1.79, "learning_rate": 2.7870477322667176e-05, "loss": 3.1706, "step": 6197 }, { "epoch": 1.79, "learning_rate": 2.7793302372245145e-05, "loss": 3.1443, "step": 6198 }, { "epoch": 1.8, "learning_rate": 2.7716231366197665e-05, "loss": 3.1652, "step": 6199 }, { "epoch": 1.8, "learning_rate": 2.7639264321489888e-05, "loss": 3.1763, "step": 6200 }, { "epoch": 1.8, "learning_rate": 2.7562401255064207e-05, "loss": 3.1487, "step": 6201 }, { "epoch": 1.8, "learning_rate": 2.7485642183840032e-05, "loss": 3.0922, "step": 6202 }, { "epoch": 1.8, "learning_rate": 2.740898712471396e-05, "loss": 3.0595, "step": 6203 }, { "epoch": 1.8, "learning_rate": 2.733243609455971e-05, "loss": 3.0, "step": 6204 }, { "epoch": 1.8, "learning_rate": 2.7255989110227973e-05, "loss": 3.0418, "step": 6205 }, { "epoch": 1.8, "learning_rate": 2.7179646188546738e-05, "loss": 3.1038, "step": 6206 }, { "epoch": 1.8, "learning_rate": 2.710340734632083e-05, "loss": 3.206, "step": 6207 }, { "epoch": 1.8, "learning_rate": 2.702727260033244e-05, "loss": 3.0555, "step": 6208 }, { "epoch": 1.8, "learning_rate": 2.695124196734061e-05, "loss": 3.1806, "step": 6209 }, { "epoch": 1.8, "learning_rate": 2.6875315464081564e-05, "loss": 3.0993, "step": 6210 }, { "epoch": 1.8, "learning_rate": 2.6799493107268646e-05, "loss": 3.152, "step": 6211 }, { "epoch": 1.8, "learning_rate": 2.672377491359229e-05, "loss": 3.157, "step": 6212 }, { "epoch": 1.8, "learning_rate": 2.6648160899719888e-05, "loss": 3.1223, "step": 6213 }, { "epoch": 1.8, "learning_rate": 2.657265108229584e-05, "loss": 3.2137, "step": 6214 }, { "epoch": 1.8, "learning_rate": 2.6497245477941967e-05, "loss": 3.0486, "step": 6215 }, { "epoch": 1.8, "learning_rate": 2.6421944103256656e-05, "loss": 3.1507, "step": 6216 }, { "epoch": 1.8, "learning_rate": 2.634674697481576e-05, "loss": 3.2027, "step": 6217 }, { "epoch": 1.8, "learning_rate": 2.6271654109171927e-05, "loss": 3.0697, "step": 6218 }, { "epoch": 1.8, "learning_rate": 2.6196665522855045e-05, "loss": 3.1612, "step": 6219 }, { "epoch": 1.8, "learning_rate": 2.6121781232371853e-05, "loss": 3.2194, "step": 6220 }, { "epoch": 1.8, "learning_rate": 2.604700125420645e-05, "loss": 3.096, "step": 6221 }, { "epoch": 1.8, "learning_rate": 2.5972325604819492e-05, "loss": 3.122, "step": 6222 }, { "epoch": 1.8, "learning_rate": 2.5897754300649112e-05, "loss": 3.2408, "step": 6223 }, { "epoch": 1.8, "learning_rate": 2.5823287358110292e-05, "loss": 2.9659, "step": 6224 }, { "epoch": 1.8, "learning_rate": 2.574892479359492e-05, "loss": 3.0851, "step": 6225 }, { "epoch": 1.8, "learning_rate": 2.5674666623472177e-05, "loss": 3.1031, "step": 6226 }, { "epoch": 1.8, "learning_rate": 2.560051286408793e-05, "loss": 3.1885, "step": 6227 }, { "epoch": 1.8, "learning_rate": 2.5526463531765464e-05, "loss": 3.2135, "step": 6228 }, { "epoch": 1.8, "learning_rate": 2.545251864280479e-05, "loss": 2.9322, "step": 6229 }, { "epoch": 1.8, "learning_rate": 2.537867821348305e-05, "loss": 3.1367, "step": 6230 }, { "epoch": 1.8, "learning_rate": 2.5304942260054253e-05, "loss": 3.1428, "step": 6231 }, { "epoch": 1.8, "learning_rate": 2.523131079874963e-05, "loss": 3.0451, "step": 6232 }, { "epoch": 1.81, "learning_rate": 2.5157783845777216e-05, "loss": 3.1494, "step": 6233 }, { "epoch": 1.81, "learning_rate": 2.5084361417322067e-05, "loss": 3.1537, "step": 6234 }, { "epoch": 1.81, "learning_rate": 2.5011043529546363e-05, "loss": 3.1055, "step": 6235 }, { "epoch": 1.81, "learning_rate": 2.4937830198589086e-05, "loss": 3.0113, "step": 6236 }, { "epoch": 1.81, "learning_rate": 2.486472144056634e-05, "loss": 3.1561, "step": 6237 }, { "epoch": 1.81, "learning_rate": 2.4791717271571202e-05, "loss": 3.2308, "step": 6238 }, { "epoch": 1.81, "learning_rate": 2.471881770767376e-05, "loss": 3.1286, "step": 6239 }, { "epoch": 1.81, "learning_rate": 2.464602276492084e-05, "loss": 3.0813, "step": 6240 }, { "epoch": 1.81, "learning_rate": 2.4573332459336628e-05, "loss": 3.101, "step": 6241 }, { "epoch": 1.81, "learning_rate": 2.450074680692188e-05, "loss": 3.1187, "step": 6242 }, { "epoch": 1.81, "learning_rate": 2.442826582365454e-05, "loss": 3.1196, "step": 6243 }, { "epoch": 1.81, "learning_rate": 2.4355889525489505e-05, "loss": 3.0235, "step": 6244 }, { "epoch": 1.81, "learning_rate": 2.428361792835848e-05, "loss": 3.2031, "step": 6245 }, { "epoch": 1.81, "learning_rate": 2.4211451048170296e-05, "loss": 3.1032, "step": 6246 }, { "epoch": 1.81, "learning_rate": 2.4139388900810688e-05, "loss": 3.1277, "step": 6247 }, { "epoch": 1.81, "learning_rate": 2.4067431502142413e-05, "loss": 3.0769, "step": 6248 }, { "epoch": 1.81, "learning_rate": 2.3995578868004908e-05, "loss": 3.0699, "step": 6249 }, { "epoch": 1.81, "learning_rate": 2.3923831014214803e-05, "loss": 3.084, "step": 6250 }, { "epoch": 1.81, "learning_rate": 2.3852187956565576e-05, "loss": 3.1903, "step": 6251 }, { "epoch": 1.81, "learning_rate": 2.378064971082755e-05, "loss": 3.1221, "step": 6252 }, { "epoch": 1.81, "learning_rate": 2.3709216292748194e-05, "loss": 3.0171, "step": 6253 }, { "epoch": 1.81, "learning_rate": 2.363788771805159e-05, "loss": 3.0426, "step": 6254 }, { "epoch": 1.81, "learning_rate": 2.3566664002439066e-05, "loss": 3.1029, "step": 6255 }, { "epoch": 1.81, "learning_rate": 2.3495545161588694e-05, "loss": 3.1745, "step": 6256 }, { "epoch": 1.81, "learning_rate": 2.3424531211155563e-05, "loss": 3.1584, "step": 6257 }, { "epoch": 1.81, "learning_rate": 2.335362216677139e-05, "loss": 3.1612, "step": 6258 }, { "epoch": 1.81, "learning_rate": 2.3282818044045306e-05, "loss": 3.223, "step": 6259 }, { "epoch": 1.81, "learning_rate": 2.321211885856278e-05, "loss": 3.0827, "step": 6260 }, { "epoch": 1.81, "learning_rate": 2.314152462588659e-05, "loss": 3.1879, "step": 6261 }, { "epoch": 1.81, "learning_rate": 2.3071035361556192e-05, "loss": 3.0752, "step": 6262 }, { "epoch": 1.81, "learning_rate": 2.3000651081088064e-05, "loss": 3.1168, "step": 6263 }, { "epoch": 1.81, "learning_rate": 2.2930371799975592e-05, "loss": 3.2242, "step": 6264 }, { "epoch": 1.81, "learning_rate": 2.2860197533688843e-05, "loss": 3.1362, "step": 6265 }, { "epoch": 1.81, "learning_rate": 2.2790128297675073e-05, "loss": 3.2278, "step": 6266 }, { "epoch": 1.81, "learning_rate": 2.272016410735811e-05, "loss": 3.1278, "step": 6267 }, { "epoch": 1.82, "learning_rate": 2.2650304978138912e-05, "loss": 3.1338, "step": 6268 }, { "epoch": 1.82, "learning_rate": 2.2580550925395128e-05, "loss": 3.1196, "step": 6269 }, { "epoch": 1.82, "learning_rate": 2.2510901964481358e-05, "loss": 3.14, "step": 6270 }, { "epoch": 1.82, "learning_rate": 2.2441358110729006e-05, "loss": 3.1376, "step": 6271 }, { "epoch": 1.82, "learning_rate": 2.237191937944649e-05, "loss": 3.0487, "step": 6272 }, { "epoch": 1.82, "learning_rate": 2.2302585785919094e-05, "loss": 3.1273, "step": 6273 }, { "epoch": 1.82, "learning_rate": 2.22333573454086e-05, "loss": 3.2614, "step": 6274 }, { "epoch": 1.82, "learning_rate": 2.21642340731541e-05, "loss": 3.1334, "step": 6275 }, { "epoch": 1.82, "learning_rate": 2.2095215984371197e-05, "loss": 3.1012, "step": 6276 }, { "epoch": 1.82, "learning_rate": 2.202630309425263e-05, "loss": 3.1209, "step": 6277 }, { "epoch": 1.82, "learning_rate": 2.195749541796771e-05, "loss": 3.0694, "step": 6278 }, { "epoch": 1.82, "learning_rate": 2.1888792970662706e-05, "loss": 3.1122, "step": 6279 }, { "epoch": 1.82, "learning_rate": 2.1820195767460803e-05, "loss": 3.0099, "step": 6280 }, { "epoch": 1.82, "learning_rate": 2.1751703823461866e-05, "loss": 3.0901, "step": 6281 }, { "epoch": 1.82, "learning_rate": 2.1683317153742778e-05, "loss": 3.1909, "step": 6282 }, { "epoch": 1.82, "learning_rate": 2.1615035773356996e-05, "loss": 3.1488, "step": 6283 }, { "epoch": 1.82, "learning_rate": 2.1546859697335052e-05, "loss": 3.1188, "step": 6284 }, { "epoch": 1.82, "learning_rate": 2.1478788940684114e-05, "loss": 3.1619, "step": 6285 }, { "epoch": 1.82, "learning_rate": 2.1410823518388302e-05, "loss": 3.1038, "step": 6286 }, { "epoch": 1.82, "learning_rate": 2.134296344540848e-05, "loss": 3.1216, "step": 6287 }, { "epoch": 1.82, "learning_rate": 2.1275208736682262e-05, "loss": 3.0857, "step": 6288 }, { "epoch": 1.82, "learning_rate": 2.1207559407124155e-05, "loss": 3.1864, "step": 6289 }, { "epoch": 1.82, "learning_rate": 2.114001547162542e-05, "loss": 3.0873, "step": 6290 }, { "epoch": 1.82, "learning_rate": 2.1072576945054324e-05, "loss": 3.1745, "step": 6291 }, { "epoch": 1.82, "learning_rate": 2.100524384225555e-05, "loss": 3.0182, "step": 6292 }, { "epoch": 1.82, "learning_rate": 2.0938016178050912e-05, "loss": 3.0848, "step": 6293 }, { "epoch": 1.82, "learning_rate": 2.0870893967238792e-05, "loss": 3.1946, "step": 6294 }, { "epoch": 1.82, "learning_rate": 2.0803877224594536e-05, "loss": 3.2287, "step": 6295 }, { "epoch": 1.82, "learning_rate": 2.0736965964870124e-05, "loss": 3.1475, "step": 6296 }, { "epoch": 1.82, "learning_rate": 2.067016020279433e-05, "loss": 3.1075, "step": 6297 }, { "epoch": 1.82, "learning_rate": 2.0603459953072834e-05, "loss": 3.1745, "step": 6298 }, { "epoch": 1.82, "learning_rate": 2.0536865230387948e-05, "loss": 3.1149, "step": 6299 }, { "epoch": 1.82, "learning_rate": 2.0470376049398942e-05, "loss": 3.3075, "step": 6300 }, { "epoch": 1.82, "learning_rate": 2.0403992424741613e-05, "loss": 3.0925, "step": 6301 }, { "epoch": 1.83, "learning_rate": 2.033771437102877e-05, "loss": 3.0459, "step": 6302 }, { "epoch": 1.83, "learning_rate": 2.0271541902849633e-05, "loss": 3.1562, "step": 6303 }, { "epoch": 1.83, "learning_rate": 2.0205475034770604e-05, "loss": 3.1506, "step": 6304 }, { "epoch": 1.83, "learning_rate": 2.013951378133455e-05, "loss": 3.2096, "step": 6305 }, { "epoch": 1.83, "learning_rate": 2.0073658157061137e-05, "loss": 3.1631, "step": 6306 }, { "epoch": 1.83, "learning_rate": 2.0007908176446877e-05, "loss": 3.1004, "step": 6307 }, { "epoch": 1.83, "learning_rate": 1.9942263853964915e-05, "loss": 3.1105, "step": 6308 }, { "epoch": 1.83, "learning_rate": 1.987672520406525e-05, "loss": 3.1035, "step": 6309 }, { "epoch": 1.83, "learning_rate": 1.9811292241174562e-05, "loss": 3.1308, "step": 6310 }, { "epoch": 1.83, "learning_rate": 1.974596497969622e-05, "loss": 3.1432, "step": 6311 }, { "epoch": 1.83, "learning_rate": 1.9680743434010386e-05, "loss": 3.1862, "step": 6312 }, { "epoch": 1.83, "learning_rate": 1.9615627618473964e-05, "loss": 3.1164, "step": 6313 }, { "epoch": 1.83, "learning_rate": 1.955061754742049e-05, "loss": 3.0971, "step": 6314 }, { "epoch": 1.83, "learning_rate": 1.9485713235160295e-05, "loss": 3.1791, "step": 6315 }, { "epoch": 1.83, "learning_rate": 1.9420914695980563e-05, "loss": 3.0816, "step": 6316 }, { "epoch": 1.83, "learning_rate": 1.9356221944144824e-05, "loss": 3.0991, "step": 6317 }, { "epoch": 1.83, "learning_rate": 1.9291634993893803e-05, "loss": 3.1261, "step": 6318 }, { "epoch": 1.83, "learning_rate": 1.9227153859444456e-05, "loss": 3.0616, "step": 6319 }, { "epoch": 1.83, "learning_rate": 1.9162778554990822e-05, "loss": 3.1082, "step": 6320 }, { "epoch": 1.83, "learning_rate": 1.9098509094703452e-05, "loss": 3.1024, "step": 6321 }, { "epoch": 1.83, "learning_rate": 1.90343454927297e-05, "loss": 3.0202, "step": 6322 }, { "epoch": 1.83, "learning_rate": 1.8970287763193428e-05, "loss": 3.0735, "step": 6323 }, { "epoch": 1.83, "learning_rate": 1.8906335920195416e-05, "loss": 3.1481, "step": 6324 }, { "epoch": 1.83, "learning_rate": 1.884248997781307e-05, "loss": 3.1614, "step": 6325 }, { "epoch": 1.83, "learning_rate": 1.877874995010037e-05, "loss": 2.9909, "step": 6326 }, { "epoch": 1.83, "learning_rate": 1.871511585108826e-05, "loss": 3.1483, "step": 6327 }, { "epoch": 1.83, "learning_rate": 1.8651587694783922e-05, "loss": 3.1272, "step": 6328 }, { "epoch": 1.83, "learning_rate": 1.8588165495171673e-05, "loss": 3.0809, "step": 6329 }, { "epoch": 1.83, "learning_rate": 1.852484926621223e-05, "loss": 3.0576, "step": 6330 }, { "epoch": 1.83, "learning_rate": 1.8461639021843058e-05, "loss": 3.0726, "step": 6331 }, { "epoch": 1.83, "learning_rate": 1.8398534775978304e-05, "loss": 3.1004, "step": 6332 }, { "epoch": 1.83, "learning_rate": 1.8335536542508746e-05, "loss": 3.1923, "step": 6333 }, { "epoch": 1.83, "learning_rate": 1.827264433530196e-05, "loss": 3.1047, "step": 6334 }, { "epoch": 1.83, "learning_rate": 1.820985816820192e-05, "loss": 3.2106, "step": 6335 }, { "epoch": 1.83, "learning_rate": 1.8147178055029577e-05, "loss": 3.0289, "step": 6336 }, { "epoch": 1.84, "learning_rate": 1.8084604009582227e-05, "loss": 3.0494, "step": 6337 }, { "epoch": 1.84, "learning_rate": 1.802213604563413e-05, "loss": 3.1464, "step": 6338 }, { "epoch": 1.84, "learning_rate": 1.7959774176935837e-05, "loss": 3.1448, "step": 6339 }, { "epoch": 1.84, "learning_rate": 1.7897518417214876e-05, "loss": 3.0576, "step": 6340 }, { "epoch": 1.84, "learning_rate": 1.7835368780175164e-05, "loss": 3.146, "step": 6341 }, { "epoch": 1.84, "learning_rate": 1.7773325279497487e-05, "loss": 3.1263, "step": 6342 }, { "epoch": 1.84, "learning_rate": 1.771138792883914e-05, "loss": 3.1386, "step": 6343 }, { "epoch": 1.84, "learning_rate": 1.7649556741833994e-05, "loss": 3.1723, "step": 6344 }, { "epoch": 1.84, "learning_rate": 1.7587831732092717e-05, "loss": 3.3039, "step": 6345 }, { "epoch": 1.84, "learning_rate": 1.7526212913202444e-05, "loss": 3.1389, "step": 6346 }, { "epoch": 1.84, "learning_rate": 1.7464700298727042e-05, "loss": 3.1866, "step": 6347 }, { "epoch": 1.84, "learning_rate": 1.740329390220685e-05, "loss": 3.173, "step": 6348 }, { "epoch": 1.84, "learning_rate": 1.734199373715911e-05, "loss": 3.0617, "step": 6349 }, { "epoch": 1.84, "learning_rate": 1.7280799817077362e-05, "loss": 3.067, "step": 6350 }, { "epoch": 1.84, "learning_rate": 1.7219712155431945e-05, "loss": 2.9978, "step": 6351 }, { "epoch": 1.84, "learning_rate": 1.7158730765669816e-05, "loss": 3.0746, "step": 6352 }, { "epoch": 1.84, "learning_rate": 1.7097855661214357e-05, "loss": 3.005, "step": 6353 }, { "epoch": 1.84, "learning_rate": 1.70370868554659e-05, "loss": 3.0737, "step": 6354 }, { "epoch": 1.84, "learning_rate": 1.6976424361800912e-05, "loss": 3.0536, "step": 6355 }, { "epoch": 1.84, "learning_rate": 1.6915868193572935e-05, "loss": 3.0977, "step": 6356 }, { "epoch": 1.84, "learning_rate": 1.6855418364111695e-05, "loss": 3.182, "step": 6357 }, { "epoch": 1.84, "learning_rate": 1.6795074886723827e-05, "loss": 3.1073, "step": 6358 }, { "epoch": 1.84, "learning_rate": 1.673483777469237e-05, "loss": 3.1384, "step": 6359 }, { "epoch": 1.84, "learning_rate": 1.6674707041276936e-05, "loss": 3.1393, "step": 6360 }, { "epoch": 1.84, "learning_rate": 1.6614682699713944e-05, "loss": 3.2779, "step": 6361 }, { "epoch": 1.84, "learning_rate": 1.6554764763216046e-05, "loss": 3.1476, "step": 6362 }, { "epoch": 1.84, "learning_rate": 1.6494953244972854e-05, "loss": 3.0892, "step": 6363 }, { "epoch": 1.84, "learning_rate": 1.643524815815023e-05, "loss": 3.1203, "step": 6364 }, { "epoch": 1.84, "learning_rate": 1.637564951589082e-05, "loss": 3.0482, "step": 6365 }, { "epoch": 1.84, "learning_rate": 1.631615733131364e-05, "loss": 3.1882, "step": 6366 }, { "epoch": 1.84, "learning_rate": 1.6256771617514487e-05, "loss": 3.0241, "step": 6367 }, { "epoch": 1.84, "learning_rate": 1.6197492387565627e-05, "loss": 3.0305, "step": 6368 }, { "epoch": 1.84, "learning_rate": 1.6138319654515897e-05, "loss": 2.9972, "step": 6369 }, { "epoch": 1.84, "learning_rate": 1.607925343139066e-05, "loss": 3.161, "step": 6370 }, { "epoch": 1.85, "learning_rate": 1.6020293731191794e-05, "loss": 3.1261, "step": 6371 }, { "epoch": 1.85, "learning_rate": 1.596144056689791e-05, "loss": 3.0534, "step": 6372 }, { "epoch": 1.85, "learning_rate": 1.5902693951463877e-05, "loss": 3.0869, "step": 6373 }, { "epoch": 1.85, "learning_rate": 1.5844053897821454e-05, "loss": 2.9584, "step": 6374 }, { "epoch": 1.85, "learning_rate": 1.5785520418878597e-05, "loss": 3.1653, "step": 6375 }, { "epoch": 1.85, "learning_rate": 1.572709352752011e-05, "loss": 3.0914, "step": 6376 }, { "epoch": 1.85, "learning_rate": 1.566877323660709e-05, "loss": 3.119, "step": 6377 }, { "epoch": 1.85, "learning_rate": 1.5610559558977446e-05, "loss": 3.213, "step": 6378 }, { "epoch": 1.85, "learning_rate": 1.55524525074453e-05, "loss": 3.121, "step": 6379 }, { "epoch": 1.85, "learning_rate": 1.5494452094801494e-05, "loss": 3.0992, "step": 6380 }, { "epoch": 1.85, "learning_rate": 1.5436558333813356e-05, "loss": 3.1636, "step": 6381 }, { "epoch": 1.85, "learning_rate": 1.537877123722464e-05, "loss": 3.1216, "step": 6382 }, { "epoch": 1.85, "learning_rate": 1.53210908177559e-05, "loss": 3.1121, "step": 6383 }, { "epoch": 1.85, "learning_rate": 1.526351708810386e-05, "loss": 3.0118, "step": 6384 }, { "epoch": 1.85, "learning_rate": 1.5206050060942001e-05, "loss": 3.245, "step": 6385 }, { "epoch": 1.85, "learning_rate": 1.5148689748920197e-05, "loss": 3.0742, "step": 6386 }, { "epoch": 1.85, "learning_rate": 1.5091436164664963e-05, "loss": 3.1396, "step": 6387 }, { "epoch": 1.85, "learning_rate": 1.503428932077916e-05, "loss": 3.1392, "step": 6388 }, { "epoch": 1.85, "learning_rate": 1.4977249229842172e-05, "loss": 3.0874, "step": 6389 }, { "epoch": 1.85, "learning_rate": 1.4920315904410064e-05, "loss": 3.083, "step": 6390 }, { "epoch": 1.85, "learning_rate": 1.4863489357015093e-05, "loss": 3.099, "step": 6391 }, { "epoch": 1.85, "learning_rate": 1.4806769600166359e-05, "loss": 3.119, "step": 6392 }, { "epoch": 1.85, "learning_rate": 1.4750156646349156e-05, "loss": 3.1154, "step": 6393 }, { "epoch": 1.85, "learning_rate": 1.4693650508025513e-05, "loss": 3.1144, "step": 6394 }, { "epoch": 1.85, "learning_rate": 1.4637251197633705e-05, "loss": 3.168, "step": 6395 }, { "epoch": 1.85, "learning_rate": 1.4580958727588745e-05, "loss": 3.0573, "step": 6396 }, { "epoch": 1.85, "learning_rate": 1.4524773110281885e-05, "loss": 3.1149, "step": 6397 }, { "epoch": 1.85, "learning_rate": 1.4468694358081013e-05, "loss": 3.0852, "step": 6398 }, { "epoch": 1.85, "learning_rate": 1.4412722483330532e-05, "loss": 3.2055, "step": 6399 }, { "epoch": 1.85, "learning_rate": 1.4356857498351029e-05, "loss": 3.0752, "step": 6400 }, { "epoch": 1.85, "learning_rate": 1.430109941544e-05, "loss": 3.1552, "step": 6401 }, { "epoch": 1.85, "learning_rate": 1.4245448246871017e-05, "loss": 3.242, "step": 6402 }, { "epoch": 1.85, "learning_rate": 1.4189904004894338e-05, "loss": 3.1432, "step": 6403 }, { "epoch": 1.85, "learning_rate": 1.413446670173657e-05, "loss": 3.1599, "step": 6404 }, { "epoch": 1.85, "learning_rate": 1.4079136349601008e-05, "loss": 3.2014, "step": 6405 }, { "epoch": 1.86, "learning_rate": 1.4023912960667017e-05, "loss": 3.1798, "step": 6406 }, { "epoch": 1.86, "learning_rate": 1.3968796547090767e-05, "loss": 3.1114, "step": 6407 }, { "epoch": 1.86, "learning_rate": 1.3913787121004718e-05, "loss": 3.1716, "step": 6408 }, { "epoch": 1.86, "learning_rate": 1.3858884694517736e-05, "loss": 3.2266, "step": 6409 }, { "epoch": 1.86, "learning_rate": 1.3804089279715326e-05, "loss": 3.1591, "step": 6410 }, { "epoch": 1.86, "learning_rate": 1.374940088865917e-05, "loss": 3.1218, "step": 6411 }, { "epoch": 1.86, "learning_rate": 1.3694819533387636e-05, "loss": 3.1426, "step": 6412 }, { "epoch": 1.86, "learning_rate": 1.364034522591534e-05, "loss": 3.0111, "step": 6413 }, { "epoch": 1.86, "learning_rate": 1.3585977978233577e-05, "loss": 3.0267, "step": 6414 }, { "epoch": 1.86, "learning_rate": 1.3531717802309829e-05, "loss": 3.1814, "step": 6415 }, { "epoch": 1.86, "learning_rate": 1.3477564710088097e-05, "loss": 3.2315, "step": 6416 }, { "epoch": 1.86, "learning_rate": 1.3423518713488847e-05, "loss": 3.1598, "step": 6417 }, { "epoch": 1.86, "learning_rate": 1.336957982440884e-05, "loss": 3.0794, "step": 6418 }, { "epoch": 1.86, "learning_rate": 1.3315748054721466e-05, "loss": 3.2218, "step": 6419 }, { "epoch": 1.86, "learning_rate": 1.3262023416276414e-05, "loss": 3.0482, "step": 6420 }, { "epoch": 1.86, "learning_rate": 1.3208405920899835e-05, "loss": 3.0691, "step": 6421 }, { "epoch": 1.86, "learning_rate": 1.3154895580394177e-05, "loss": 3.224, "step": 6422 }, { "epoch": 1.86, "learning_rate": 1.3101492406538518e-05, "loss": 3.112, "step": 6423 }, { "epoch": 1.86, "learning_rate": 1.3048196411088009e-05, "loss": 3.0941, "step": 6424 }, { "epoch": 1.86, "learning_rate": 1.2995007605774656e-05, "loss": 3.1237, "step": 6425 }, { "epoch": 1.86, "learning_rate": 1.2941926002306537e-05, "loss": 3.0528, "step": 6426 }, { "epoch": 1.86, "learning_rate": 1.2888951612368082e-05, "loss": 3.0388, "step": 6427 }, { "epoch": 1.86, "learning_rate": 1.2836084447620467e-05, "loss": 3.0796, "step": 6428 }, { "epoch": 1.86, "learning_rate": 1.2783324519700935e-05, "loss": 3.0084, "step": 6429 }, { "epoch": 1.86, "learning_rate": 1.2730671840223362e-05, "loss": 3.0655, "step": 6430 }, { "epoch": 1.86, "learning_rate": 1.2678126420777814e-05, "loss": 3.044, "step": 6431 }, { "epoch": 1.86, "learning_rate": 1.2625688272930925e-05, "loss": 3.0145, "step": 6432 }, { "epoch": 1.86, "learning_rate": 1.2573357408225516e-05, "loss": 3.1551, "step": 6433 }, { "epoch": 1.86, "learning_rate": 1.2521133838181043e-05, "loss": 3.2641, "step": 6434 }, { "epoch": 1.86, "learning_rate": 1.2469017574293085e-05, "loss": 3.1316, "step": 6435 }, { "epoch": 1.86, "learning_rate": 1.2417008628033688e-05, "loss": 3.2041, "step": 6436 }, { "epoch": 1.86, "learning_rate": 1.2365107010851417e-05, "loss": 3.115, "step": 6437 }, { "epoch": 1.86, "learning_rate": 1.2313312734171078e-05, "loss": 2.9428, "step": 6438 }, { "epoch": 1.86, "learning_rate": 1.2261625809393884e-05, "loss": 3.0693, "step": 6439 }, { "epoch": 1.87, "learning_rate": 1.2210046247897344e-05, "loss": 3.0712, "step": 6440 }, { "epoch": 1.87, "learning_rate": 1.2158574061035432e-05, "loss": 3.0785, "step": 6441 }, { "epoch": 1.87, "learning_rate": 1.2107209260138475e-05, "loss": 3.0498, "step": 6442 }, { "epoch": 1.87, "learning_rate": 1.2055951856513147e-05, "loss": 3.1652, "step": 6443 }, { "epoch": 1.87, "learning_rate": 1.2004801861442372e-05, "loss": 3.0856, "step": 6444 }, { "epoch": 1.87, "learning_rate": 1.1953759286185584e-05, "loss": 3.1266, "step": 6445 }, { "epoch": 1.87, "learning_rate": 1.1902824141978575e-05, "loss": 3.0183, "step": 6446 }, { "epoch": 1.87, "learning_rate": 1.185199644003332e-05, "loss": 3.1794, "step": 6447 }, { "epoch": 1.87, "learning_rate": 1.1801276191538369e-05, "loss": 3.1448, "step": 6448 }, { "epoch": 1.87, "learning_rate": 1.1750663407658402e-05, "loss": 3.0938, "step": 6449 }, { "epoch": 1.87, "learning_rate": 1.1700158099534618e-05, "loss": 3.172, "step": 6450 }, { "epoch": 1.87, "learning_rate": 1.164976027828446e-05, "loss": 3.1913, "step": 6451 }, { "epoch": 1.87, "learning_rate": 1.1599469955001718e-05, "loss": 3.0513, "step": 6452 }, { "epoch": 1.87, "learning_rate": 1.1549287140756592e-05, "loss": 3.1635, "step": 6453 }, { "epoch": 1.87, "learning_rate": 1.149921184659547e-05, "loss": 3.0089, "step": 6454 }, { "epoch": 1.87, "learning_rate": 1.1449244083541199e-05, "loss": 3.1717, "step": 6455 }, { "epoch": 1.87, "learning_rate": 1.1399383862592927e-05, "loss": 3.2397, "step": 6456 }, { "epoch": 1.87, "learning_rate": 1.1349631194726151e-05, "loss": 3.073, "step": 6457 }, { "epoch": 1.87, "learning_rate": 1.129998609089261e-05, "loss": 3.0997, "step": 6458 }, { "epoch": 1.87, "learning_rate": 1.125044856202051e-05, "loss": 3.1539, "step": 6459 }, { "epoch": 1.87, "learning_rate": 1.1201018619014181e-05, "loss": 3.1912, "step": 6460 }, { "epoch": 1.87, "learning_rate": 1.1151696272754475e-05, "loss": 3.1343, "step": 6461 }, { "epoch": 1.87, "learning_rate": 1.1102481534098374e-05, "loss": 3.1613, "step": 6462 }, { "epoch": 1.87, "learning_rate": 1.1053374413879269e-05, "loss": 3.1971, "step": 6463 }, { "epoch": 1.87, "learning_rate": 1.1004374922906847e-05, "loss": 3.2249, "step": 6464 }, { "epoch": 1.87, "learning_rate": 1.0955483071967144e-05, "loss": 3.2099, "step": 6465 }, { "epoch": 1.87, "learning_rate": 1.0906698871822552e-05, "loss": 3.1964, "step": 6466 }, { "epoch": 1.87, "learning_rate": 1.0858022333211481e-05, "loss": 3.04, "step": 6467 }, { "epoch": 1.87, "learning_rate": 1.0809453466849028e-05, "loss": 3.1474, "step": 6468 }, { "epoch": 1.87, "learning_rate": 1.0760992283426252e-05, "loss": 3.1436, "step": 6469 }, { "epoch": 1.87, "learning_rate": 1.0712638793610785e-05, "loss": 3.1062, "step": 6470 }, { "epoch": 1.87, "learning_rate": 1.0664393008046281e-05, "loss": 3.1872, "step": 6471 }, { "epoch": 1.87, "learning_rate": 1.0616254937352964e-05, "loss": 2.9585, "step": 6472 }, { "epoch": 1.87, "learning_rate": 1.0568224592127197e-05, "loss": 3.2496, "step": 6473 }, { "epoch": 1.87, "learning_rate": 1.0520301982941572e-05, "loss": 3.164, "step": 6474 }, { "epoch": 1.88, "learning_rate": 1.0472487120345097e-05, "loss": 3.2019, "step": 6475 }, { "epoch": 1.88, "learning_rate": 1.0424780014863022e-05, "loss": 3.1041, "step": 6476 }, { "epoch": 1.88, "learning_rate": 1.0377180676996834e-05, "loss": 3.1425, "step": 6477 }, { "epoch": 1.88, "learning_rate": 1.0329689117224261e-05, "loss": 3.0253, "step": 6478 }, { "epoch": 1.88, "learning_rate": 1.0282305345999498e-05, "loss": 3.1091, "step": 6479 }, { "epoch": 1.88, "learning_rate": 1.0235029373752758e-05, "loss": 3.1686, "step": 6480 }, { "epoch": 1.88, "learning_rate": 1.0187861210890714e-05, "loss": 3.1576, "step": 6481 }, { "epoch": 1.88, "learning_rate": 1.014080086779634e-05, "loss": 3.1219, "step": 6482 }, { "epoch": 1.88, "learning_rate": 1.0093848354828572e-05, "loss": 3.0322, "step": 6483 }, { "epoch": 1.88, "learning_rate": 1.004700368232303e-05, "loss": 2.9939, "step": 6484 }, { "epoch": 1.88, "learning_rate": 1.0000266860591245e-05, "loss": 3.0838, "step": 6485 }, { "epoch": 1.88, "learning_rate": 9.953637899921209e-06, "loss": 3.058, "step": 6486 }, { "epoch": 1.88, "learning_rate": 9.907116810577043e-06, "loss": 3.1716, "step": 6487 }, { "epoch": 1.88, "learning_rate": 9.860703602799281e-06, "loss": 3.1987, "step": 6488 }, { "epoch": 1.88, "learning_rate": 9.814398286804582e-06, "loss": 3.1385, "step": 6489 }, { "epoch": 1.88, "learning_rate": 9.768200872785848e-06, "loss": 3.2481, "step": 6490 }, { "epoch": 1.88, "learning_rate": 9.722111370912334e-06, "loss": 3.0922, "step": 6491 }, { "epoch": 1.88, "learning_rate": 9.67612979132948e-06, "loss": 3.0689, "step": 6492 }, { "epoch": 1.88, "learning_rate": 9.63025614415891e-06, "loss": 3.1911, "step": 6493 }, { "epoch": 1.88, "learning_rate": 9.584490439498605e-06, "loss": 3.1391, "step": 6494 }, { "epoch": 1.88, "learning_rate": 9.538832687422728e-06, "loss": 3.165, "step": 6495 }, { "epoch": 1.88, "learning_rate": 9.49328289798157e-06, "loss": 3.1902, "step": 6496 }, { "epoch": 1.88, "learning_rate": 9.447841081201947e-06, "loss": 3.105, "step": 6497 }, { "epoch": 1.88, "learning_rate": 9.402507247086578e-06, "loss": 3.1774, "step": 6498 }, { "epoch": 1.88, "learning_rate": 9.357281405614648e-06, "loss": 3.2452, "step": 6499 }, { "epoch": 1.88, "learning_rate": 9.312163566741416e-06, "loss": 3.134, "step": 6500 }, { "epoch": 1.88, "learning_rate": 9.267153740398494e-06, "loss": 3.061, "step": 6501 }, { "epoch": 1.88, "learning_rate": 9.222251936493674e-06, "loss": 3.1878, "step": 6502 }, { "epoch": 1.88, "learning_rate": 9.17745816491089e-06, "loss": 3.1199, "step": 6503 }, { "epoch": 1.88, "learning_rate": 9.132772435510362e-06, "loss": 3.0505, "step": 6504 }, { "epoch": 1.88, "learning_rate": 9.088194758128555e-06, "loss": 3.136, "step": 6505 }, { "epoch": 1.88, "learning_rate": 9.043725142578118e-06, "loss": 3.0947, "step": 6506 }, { "epoch": 1.88, "learning_rate": 8.999363598647892e-06, "loss": 3.1023, "step": 6507 }, { "epoch": 1.88, "learning_rate": 8.95511013610295e-06, "loss": 3.1275, "step": 6508 }, { "epoch": 1.89, "learning_rate": 8.91096476468467e-06, "loss": 3.2256, "step": 6509 }, { "epoch": 1.89, "learning_rate": 8.866927494110388e-06, "loss": 3.1785, "step": 6510 }, { "epoch": 1.89, "learning_rate": 8.822998334073905e-06, "loss": 3.0855, "step": 6511 }, { "epoch": 1.89, "learning_rate": 8.779177294245044e-06, "loss": 3.127, "step": 6512 }, { "epoch": 1.89, "learning_rate": 8.73546438426992e-06, "loss": 3.1186, "step": 6513 }, { "epoch": 1.89, "learning_rate": 8.691859613770836e-06, "loss": 3.0505, "step": 6514 }, { "epoch": 1.89, "learning_rate": 8.648362992346336e-06, "loss": 3.1687, "step": 6515 }, { "epoch": 1.89, "learning_rate": 8.604974529571042e-06, "loss": 3.1518, "step": 6516 }, { "epoch": 1.89, "learning_rate": 8.561694234995754e-06, "loss": 3.1206, "step": 6517 }, { "epoch": 1.89, "learning_rate": 8.518522118147742e-06, "loss": 3.227, "step": 6518 }, { "epoch": 1.89, "learning_rate": 8.475458188530016e-06, "loss": 3.1349, "step": 6519 }, { "epoch": 1.89, "learning_rate": 8.432502455622215e-06, "loss": 3.1655, "step": 6520 }, { "epoch": 1.89, "learning_rate": 8.389654928879831e-06, "loss": 3.1374, "step": 6521 }, { "epoch": 1.89, "learning_rate": 8.346915617734707e-06, "loss": 3.1228, "step": 6522 }, { "epoch": 1.89, "learning_rate": 8.304284531594819e-06, "loss": 3.1044, "step": 6523 }, { "epoch": 1.89, "learning_rate": 8.261761679844327e-06, "loss": 3.1756, "step": 6524 }, { "epoch": 1.89, "learning_rate": 8.21934707184352e-06, "loss": 3.2662, "step": 6525 }, { "epoch": 1.89, "learning_rate": 8.177040716928986e-06, "loss": 3.1271, "step": 6526 }, { "epoch": 1.89, "learning_rate": 8.134842624413385e-06, "loss": 3.1252, "step": 6527 }, { "epoch": 1.89, "learning_rate": 8.092752803585512e-06, "loss": 3.1733, "step": 6528 }, { "epoch": 1.89, "learning_rate": 8.0507712637104e-06, "loss": 3.1694, "step": 6529 }, { "epoch": 1.89, "learning_rate": 8.008898014029209e-06, "loss": 3.1329, "step": 6530 }, { "epoch": 1.89, "learning_rate": 7.967133063759291e-06, "loss": 3.2065, "step": 6531 }, { "epoch": 1.89, "learning_rate": 7.925476422094124e-06, "loss": 3.2147, "step": 6532 }, { "epoch": 1.89, "learning_rate": 7.883928098203374e-06, "loss": 3.2289, "step": 6533 }, { "epoch": 1.89, "learning_rate": 7.842488101232892e-06, "loss": 3.1734, "step": 6534 }, { "epoch": 1.89, "learning_rate": 7.801156440304657e-06, "loss": 3.1445, "step": 6535 }, { "epoch": 1.89, "learning_rate": 7.759933124516727e-06, "loss": 3.0373, "step": 6536 }, { "epoch": 1.89, "learning_rate": 7.718818162943397e-06, "loss": 3.1655, "step": 6537 }, { "epoch": 1.89, "learning_rate": 7.67781156463515e-06, "loss": 3.1298, "step": 6538 }, { "epoch": 1.89, "learning_rate": 7.636913338618379e-06, "loss": 3.151, "step": 6539 }, { "epoch": 1.89, "learning_rate": 7.59612349389599e-06, "loss": 3.1662, "step": 6540 }, { "epoch": 1.89, "learning_rate": 7.5554420394467475e-06, "loss": 3.1336, "step": 6541 }, { "epoch": 1.89, "learning_rate": 7.514868984225598e-06, "loss": 3.1456, "step": 6542 }, { "epoch": 1.89, "learning_rate": 7.474404337163731e-06, "loss": 3.1809, "step": 6543 }, { "epoch": 1.9, "learning_rate": 7.434048107168523e-06, "loss": 3.07, "step": 6544 }, { "epoch": 1.9, "learning_rate": 7.3938003031231994e-06, "loss": 3.0673, "step": 6545 }, { "epoch": 1.9, "learning_rate": 7.353660933887396e-06, "loss": 3.3123, "step": 6546 }, { "epoch": 1.9, "learning_rate": 7.3136300082967124e-06, "loss": 3.0501, "step": 6547 }, { "epoch": 1.9, "learning_rate": 7.273707535162988e-06, "loss": 3.1938, "step": 6548 }, { "epoch": 1.9, "learning_rate": 7.233893523274193e-06, "loss": 3.1424, "step": 6549 }, { "epoch": 1.9, "learning_rate": 7.1941879813943176e-06, "loss": 3.1305, "step": 6550 }, { "epoch": 1.9, "learning_rate": 7.154590918263482e-06, "loss": 3.2114, "step": 6551 }, { "epoch": 1.9, "learning_rate": 7.115102342598101e-06, "loss": 2.985, "step": 6552 }, { "epoch": 1.9, "learning_rate": 7.075722263090556e-06, "loss": 2.9967, "step": 6553 }, { "epoch": 1.9, "learning_rate": 7.036450688409302e-06, "loss": 3.2987, "step": 6554 }, { "epoch": 1.9, "learning_rate": 6.997287627199034e-06, "loss": 3.1176, "step": 6555 }, { "epoch": 1.9, "learning_rate": 6.9582330880805235e-06, "loss": 2.9769, "step": 6556 }, { "epoch": 1.9, "learning_rate": 6.9192870796506155e-06, "loss": 3.0675, "step": 6557 }, { "epoch": 1.9, "learning_rate": 6.8804496104823425e-06, "loss": 3.1121, "step": 6558 }, { "epoch": 1.9, "learning_rate": 6.841720689124698e-06, "loss": 2.953, "step": 6559 }, { "epoch": 1.9, "learning_rate": 6.803100324102918e-06, "loss": 3.047, "step": 6560 }, { "epoch": 1.9, "learning_rate": 6.764588523918314e-06, "loss": 3.0858, "step": 6561 }, { "epoch": 1.9, "learning_rate": 6.726185297048326e-06, "loss": 3.0979, "step": 6562 }, { "epoch": 1.9, "learning_rate": 6.68789065194636e-06, "loss": 3.1084, "step": 6563 }, { "epoch": 1.9, "learning_rate": 6.649704597042061e-06, "loss": 3.1673, "step": 6564 }, { "epoch": 1.9, "learning_rate": 6.611627140741206e-06, "loss": 3.0525, "step": 6565 }, { "epoch": 1.9, "learning_rate": 6.573658291425421e-06, "loss": 3.1141, "step": 6566 }, { "epoch": 1.9, "learning_rate": 6.535798057452691e-06, "loss": 3.0109, "step": 6567 }, { "epoch": 1.9, "learning_rate": 6.498046447156958e-06, "loss": 3.0086, "step": 6568 }, { "epoch": 1.9, "learning_rate": 6.460403468848353e-06, "loss": 3.152, "step": 6569 }, { "epoch": 1.9, "learning_rate": 6.422869130812914e-06, "loss": 3.0007, "step": 6570 }, { "epoch": 1.9, "learning_rate": 6.385443441312977e-06, "loss": 3.0666, "step": 6571 }, { "epoch": 1.9, "learning_rate": 6.348126408586841e-06, "loss": 3.1098, "step": 6572 }, { "epoch": 1.9, "learning_rate": 6.310918040848823e-06, "loss": 3.2256, "step": 6573 }, { "epoch": 1.9, "learning_rate": 6.273818346289539e-06, "loss": 3.0961, "step": 6574 }, { "epoch": 1.9, "learning_rate": 6.236827333075401e-06, "loss": 3.0497, "step": 6575 }, { "epoch": 1.9, "learning_rate": 6.199945009349173e-06, "loss": 3.0716, "step": 6576 }, { "epoch": 1.9, "learning_rate": 6.163171383229527e-06, "loss": 3.1237, "step": 6577 }, { "epoch": 1.91, "learning_rate": 6.12650646281121e-06, "loss": 3.0781, "step": 6578 }, { "epoch": 1.91, "learning_rate": 6.0899502561651554e-06, "loss": 3.1141, "step": 6579 }, { "epoch": 1.91, "learning_rate": 6.053502771338204e-06, "loss": 3.161, "step": 6580 }, { "epoch": 1.91, "learning_rate": 6.017164016353438e-06, "loss": 3.1361, "step": 6581 }, { "epoch": 1.91, "learning_rate": 5.980933999209792e-06, "loss": 3.1931, "step": 6582 }, { "epoch": 1.91, "learning_rate": 5.9448127278824986e-06, "loss": 3.1015, "step": 6583 }, { "epoch": 1.91, "learning_rate": 5.908800210322696e-06, "loss": 3.1451, "step": 6584 }, { "epoch": 1.91, "learning_rate": 5.872896454457655e-06, "loss": 3.1517, "step": 6585 }, { "epoch": 1.91, "learning_rate": 5.837101468190609e-06, "loss": 3.0217, "step": 6586 }, { "epoch": 1.91, "learning_rate": 5.8014152594010324e-06, "loss": 3.0352, "step": 6587 }, { "epoch": 1.91, "learning_rate": 5.76583783594431e-06, "loss": 3.0468, "step": 6588 }, { "epoch": 1.91, "learning_rate": 5.730369205651842e-06, "loss": 3.2401, "step": 6589 }, { "epoch": 1.91, "learning_rate": 5.695009376331217e-06, "loss": 3.0762, "step": 6590 }, { "epoch": 1.91, "learning_rate": 5.659758355765987e-06, "loss": 3.1973, "step": 6591 }, { "epoch": 1.91, "learning_rate": 5.624616151715834e-06, "loss": 3.0739, "step": 6592 }, { "epoch": 1.91, "learning_rate": 5.589582771916291e-06, "loss": 3.1586, "step": 6593 }, { "epoch": 1.91, "learning_rate": 5.5546582240791345e-06, "loss": 3.0276, "step": 6594 }, { "epoch": 1.91, "learning_rate": 5.51984251589216e-06, "loss": 3.2684, "step": 6595 }, { "epoch": 1.91, "learning_rate": 5.48513565501918e-06, "loss": 3.1695, "step": 6596 }, { "epoch": 1.91, "learning_rate": 5.450537649099918e-06, "loss": 3.2277, "step": 6597 }, { "epoch": 1.91, "learning_rate": 5.416048505750393e-06, "loss": 3.1205, "step": 6598 }, { "epoch": 1.91, "learning_rate": 5.38166823256242e-06, "loss": 3.0206, "step": 6599 }, { "epoch": 1.91, "learning_rate": 5.347396837104057e-06, "loss": 3.1656, "step": 6600 }, { "epoch": 1.91, "learning_rate": 5.313234326919158e-06, "loss": 3.0998, "step": 6601 }, { "epoch": 1.91, "learning_rate": 5.279180709527765e-06, "loss": 3.1064, "step": 6602 }, { "epoch": 1.91, "learning_rate": 5.245235992425934e-06, "loss": 3.1081, "step": 6603 }, { "epoch": 1.91, "learning_rate": 5.211400183085746e-06, "loss": 3.1318, "step": 6604 }, { "epoch": 1.91, "learning_rate": 5.177673288955353e-06, "loss": 3.0762, "step": 6605 }, { "epoch": 1.91, "learning_rate": 5.1440553174588174e-06, "loss": 3.1076, "step": 6606 }, { "epoch": 1.91, "learning_rate": 5.110546275996275e-06, "loss": 3.1163, "step": 6607 }, { "epoch": 1.91, "learning_rate": 5.077146171943936e-06, "loss": 3.1445, "step": 6608 }, { "epoch": 1.91, "learning_rate": 5.0438550126539755e-06, "loss": 3.1288, "step": 6609 }, { "epoch": 1.91, "learning_rate": 5.010672805454586e-06, "loss": 3.1066, "step": 6610 }, { "epoch": 1.91, "learning_rate": 4.977599557649981e-06, "loss": 3.004, "step": 6611 }, { "epoch": 1.91, "learning_rate": 4.944635276520393e-06, "loss": 3.1546, "step": 6612 }, { "epoch": 1.92, "learning_rate": 4.911779969322127e-06, "loss": 3.2847, "step": 6613 }, { "epoch": 1.92, "learning_rate": 4.879033643287456e-06, "loss": 3.2426, "step": 6614 }, { "epoch": 1.92, "learning_rate": 4.846396305624612e-06, "loss": 3.0965, "step": 6615 }, { "epoch": 1.92, "learning_rate": 4.813867963517904e-06, "loss": 3.1232, "step": 6616 }, { "epoch": 1.92, "learning_rate": 4.7814486241276045e-06, "loss": 3.0834, "step": 6617 }, { "epoch": 1.92, "learning_rate": 4.749138294590005e-06, "loss": 3.1261, "step": 6618 }, { "epoch": 1.92, "learning_rate": 4.716936982017472e-06, "loss": 3.0876, "step": 6619 }, { "epoch": 1.92, "learning_rate": 4.684844693498225e-06, "loss": 3.309, "step": 6620 }, { "epoch": 1.92, "learning_rate": 4.652861436096556e-06, "loss": 3.1672, "step": 6621 }, { "epoch": 1.92, "learning_rate": 4.62098721685289e-06, "loss": 3.1924, "step": 6622 }, { "epoch": 1.92, "learning_rate": 4.589222042783447e-06, "loss": 3.1482, "step": 6623 }, { "epoch": 1.92, "learning_rate": 4.55756592088058e-06, "loss": 3.2945, "step": 6624 }, { "epoch": 1.92, "learning_rate": 4.526018858112546e-06, "loss": 3.1823, "step": 6625 }, { "epoch": 1.92, "learning_rate": 4.49458086142357e-06, "loss": 3.1074, "step": 6626 }, { "epoch": 1.92, "learning_rate": 4.463251937734059e-06, "loss": 3.2325, "step": 6627 }, { "epoch": 1.92, "learning_rate": 4.43203209394022e-06, "loss": 3.0032, "step": 6628 }, { "epoch": 1.92, "learning_rate": 4.400921336914276e-06, "loss": 3.1205, "step": 6629 }, { "epoch": 1.92, "learning_rate": 4.369919673504585e-06, "loss": 3.0627, "step": 6630 }, { "epoch": 1.92, "learning_rate": 4.339027110535298e-06, "loss": 3.1729, "step": 6631 }, { "epoch": 1.92, "learning_rate": 4.308243654806643e-06, "loss": 3.0825, "step": 6632 }, { "epoch": 1.92, "learning_rate": 4.277569313094809e-06, "loss": 2.9559, "step": 6633 }, { "epoch": 1.92, "learning_rate": 4.247004092152007e-06, "loss": 3.1059, "step": 6634 }, { "epoch": 1.92, "learning_rate": 4.216547998706355e-06, "loss": 3.069, "step": 6635 }, { "epoch": 1.92, "learning_rate": 4.186201039462045e-06, "loss": 3.1254, "step": 6636 }, { "epoch": 1.92, "learning_rate": 4.155963221099124e-06, "loss": 3.0965, "step": 6637 }, { "epoch": 1.92, "learning_rate": 4.125834550273766e-06, "loss": 3.1988, "step": 6638 }, { "epoch": 1.92, "learning_rate": 4.095815033618e-06, "loss": 3.0437, "step": 6639 }, { "epoch": 1.92, "learning_rate": 4.065904677739873e-06, "loss": 3.048, "step": 6640 }, { "epoch": 1.92, "learning_rate": 4.036103489223397e-06, "loss": 3.1192, "step": 6641 }, { "epoch": 1.92, "learning_rate": 4.006411474628491e-06, "loss": 3.0545, "step": 6642 }, { "epoch": 1.92, "learning_rate": 3.976828640491203e-06, "loss": 3.0196, "step": 6643 }, { "epoch": 1.92, "learning_rate": 3.947354993323326e-06, "loss": 3.1585, "step": 6644 }, { "epoch": 1.92, "learning_rate": 3.917990539612892e-06, "loss": 3.1762, "step": 6645 }, { "epoch": 1.92, "learning_rate": 3.888735285823564e-06, "loss": 3.1073, "step": 6646 }, { "epoch": 1.93, "learning_rate": 3.8595892383953005e-06, "loss": 3.1317, "step": 6647 }, { "epoch": 1.93, "learning_rate": 3.830552403743803e-06, "loss": 3.059, "step": 6648 }, { "epoch": 1.93, "learning_rate": 3.8016247882607937e-06, "loss": 3.126, "step": 6649 }, { "epoch": 1.93, "learning_rate": 3.7728063983139547e-06, "loss": 3.0564, "step": 6650 }, { "epoch": 1.93, "learning_rate": 3.7440972402469355e-06, "loss": 3.046, "step": 6651 }, { "epoch": 1.93, "learning_rate": 3.715497320379346e-06, "loss": 3.2579, "step": 6652 }, { "epoch": 1.93, "learning_rate": 3.6870066450067075e-06, "loss": 3.0391, "step": 6653 }, { "epoch": 1.93, "learning_rate": 3.6586252204005577e-06, "loss": 3.1155, "step": 6654 }, { "epoch": 1.93, "learning_rate": 3.6303530528082883e-06, "loss": 3.1711, "step": 6655 }, { "epoch": 1.93, "learning_rate": 3.6021901484533658e-06, "loss": 3.0661, "step": 6656 }, { "epoch": 1.93, "learning_rate": 3.5741365135351643e-06, "loss": 3.1122, "step": 6657 }, { "epoch": 1.93, "learning_rate": 3.5461921542288558e-06, "loss": 3.1051, "step": 6658 }, { "epoch": 1.93, "learning_rate": 3.518357076685852e-06, "loss": 2.994, "step": 6659 }, { "epoch": 1.93, "learning_rate": 3.4906312870331967e-06, "loss": 3.1956, "step": 6660 }, { "epoch": 1.93, "learning_rate": 3.4630147913741193e-06, "loss": 3.0257, "step": 6661 }, { "epoch": 1.93, "learning_rate": 3.435507595787646e-06, "loss": 3.2278, "step": 6662 }, { "epoch": 1.93, "learning_rate": 3.408109706328766e-06, "loss": 3.0691, "step": 6663 }, { "epoch": 1.93, "learning_rate": 3.3808211290284885e-06, "loss": 3.0541, "step": 6664 }, { "epoch": 1.93, "learning_rate": 3.353641869893731e-06, "loss": 3.1371, "step": 6665 }, { "epoch": 1.93, "learning_rate": 3.326571934907263e-06, "loss": 3.0868, "step": 6666 }, { "epoch": 1.93, "learning_rate": 3.2996113300278186e-06, "loss": 3.2443, "step": 6667 }, { "epoch": 1.93, "learning_rate": 3.2727600611901497e-06, "loss": 3.0647, "step": 6668 }, { "epoch": 1.93, "learning_rate": 3.2460181343048623e-06, "loss": 3.2229, "step": 6669 }, { "epoch": 1.93, "learning_rate": 3.219385555258525e-06, "loss": 3.2333, "step": 6670 }, { "epoch": 1.93, "learning_rate": 3.1928623299136152e-06, "loss": 3.1131, "step": 6671 }, { "epoch": 1.93, "learning_rate": 3.166448464108629e-06, "loss": 3.1104, "step": 6672 }, { "epoch": 1.93, "learning_rate": 3.1401439636577488e-06, "loss": 3.1835, "step": 6673 }, { "epoch": 1.93, "learning_rate": 3.1139488343513987e-06, "loss": 3.1722, "step": 6674 }, { "epoch": 1.93, "learning_rate": 3.0878630819556886e-06, "loss": 3.1088, "step": 6675 }, { "epoch": 1.93, "learning_rate": 3.061886712212747e-06, "loss": 3.1898, "step": 6676 }, { "epoch": 1.93, "learning_rate": 3.0360197308406667e-06, "loss": 3.2285, "step": 6677 }, { "epoch": 1.93, "learning_rate": 3.0102621435333934e-06, "loss": 3.1814, "step": 6678 }, { "epoch": 1.93, "learning_rate": 2.98461395596078e-06, "loss": 3.0481, "step": 6679 }, { "epoch": 1.93, "learning_rate": 2.959075173768588e-06, "loss": 2.9798, "step": 6680 }, { "epoch": 1.93, "learning_rate": 2.933645802578655e-06, "loss": 3.1877, "step": 6681 }, { "epoch": 1.94, "learning_rate": 2.9083258479885023e-06, "loss": 3.1271, "step": 6682 }, { "epoch": 1.94, "learning_rate": 2.883115315571727e-06, "loss": 3.1703, "step": 6683 }, { "epoch": 1.94, "learning_rate": 2.858014210877835e-06, "loss": 3.0959, "step": 6684 }, { "epoch": 1.94, "learning_rate": 2.833022539432073e-06, "loss": 3.1577, "step": 6685 }, { "epoch": 1.94, "learning_rate": 2.8081403067358736e-06, "loss": 3.0853, "step": 6686 }, { "epoch": 1.94, "learning_rate": 2.7833675182663e-06, "loss": 3.0689, "step": 6687 }, { "epoch": 1.94, "learning_rate": 2.758704179476601e-06, "loss": 3.2256, "step": 6688 }, { "epoch": 1.94, "learning_rate": 2.7341502957956564e-06, "loss": 3.1787, "step": 6689 }, { "epoch": 1.94, "learning_rate": 2.7097058726284207e-06, "loss": 3.1872, "step": 6690 }, { "epoch": 1.94, "learning_rate": 2.6853709153557003e-06, "loss": 3.1374, "step": 6691 }, { "epoch": 1.94, "learning_rate": 2.661145429334322e-06, "loss": 3.1215, "step": 6692 }, { "epoch": 1.94, "learning_rate": 2.637029419896797e-06, "loss": 3.0955, "step": 6693 }, { "epoch": 1.94, "learning_rate": 2.613022892351713e-06, "loss": 3.107, "step": 6694 }, { "epoch": 1.94, "learning_rate": 2.589125851983509e-06, "loss": 3.2329, "step": 6695 }, { "epoch": 1.94, "learning_rate": 2.5653383040524227e-06, "loss": 3.164, "step": 6696 }, { "epoch": 1.94, "learning_rate": 2.5416602537948196e-06, "loss": 3.1893, "step": 6697 }, { "epoch": 1.94, "learning_rate": 2.518091706422698e-06, "loss": 3.0646, "step": 6698 }, { "epoch": 1.94, "learning_rate": 2.494632667124186e-06, "loss": 3.1814, "step": 6699 }, { "epoch": 1.94, "learning_rate": 2.4712831410630964e-06, "loss": 3.0469, "step": 6700 }, { "epoch": 1.94, "learning_rate": 2.448043133379374e-06, "loss": 2.9907, "step": 6701 }, { "epoch": 1.94, "learning_rate": 2.424912649188593e-06, "loss": 3.115, "step": 6702 }, { "epoch": 1.94, "learning_rate": 2.4018916935823475e-06, "loss": 3.1553, "step": 6703 }, { "epoch": 1.94, "learning_rate": 2.378980271628195e-06, "loss": 3.1778, "step": 6704 }, { "epoch": 1.94, "learning_rate": 2.3561783883694897e-06, "loss": 3.1571, "step": 6705 }, { "epoch": 1.94, "learning_rate": 2.3334860488254395e-06, "loss": 3.1781, "step": 6706 }, { "epoch": 1.94, "learning_rate": 2.310903257991215e-06, "loss": 3.1866, "step": 6707 }, { "epoch": 1.94, "learning_rate": 2.2884300208378393e-06, "loss": 3.1239, "step": 6708 }, { "epoch": 1.94, "learning_rate": 2.2660663423123005e-06, "loss": 3.0597, "step": 6709 }, { "epoch": 1.94, "learning_rate": 2.243812227337272e-06, "loss": 3.1686, "step": 6710 }, { "epoch": 1.94, "learning_rate": 2.2216676808115566e-06, "loss": 3.0893, "step": 6711 }, { "epoch": 1.94, "learning_rate": 2.1996327076096446e-06, "loss": 3.0646, "step": 6712 }, { "epoch": 1.94, "learning_rate": 2.177707312581989e-06, "loss": 3.0691, "step": 6713 }, { "epoch": 1.94, "learning_rate": 2.1558915005548964e-06, "loss": 3.0052, "step": 6714 }, { "epoch": 1.94, "learning_rate": 2.1341852763306357e-06, "loss": 3.2306, "step": 6715 }, { "epoch": 1.95, "learning_rate": 2.112588644687219e-06, "loss": 3.0507, "step": 6716 }, { "epoch": 1.95, "learning_rate": 2.0911016103786207e-06, "loss": 3.0642, "step": 6717 }, { "epoch": 1.95, "learning_rate": 2.069724178134613e-06, "loss": 3.0167, "step": 6718 }, { "epoch": 1.95, "learning_rate": 2.0484563526609877e-06, "loss": 3.0788, "step": 6719 }, { "epoch": 1.95, "learning_rate": 2.027298138639333e-06, "loss": 3.1587, "step": 6720 }, { "epoch": 1.95, "learning_rate": 2.006249540726979e-06, "loss": 3.1517, "step": 6721 }, { "epoch": 1.95, "learning_rate": 1.98531056355733e-06, "loss": 3.1374, "step": 6722 }, { "epoch": 1.95, "learning_rate": 1.9644812117395327e-06, "loss": 3.1044, "step": 6723 }, { "epoch": 1.95, "learning_rate": 1.943761489858642e-06, "loss": 3.0862, "step": 6724 }, { "epoch": 1.95, "learning_rate": 1.92315140247562e-06, "loss": 3.0958, "step": 6725 }, { "epoch": 1.95, "learning_rate": 1.9026509541272275e-06, "loss": 3.0533, "step": 6726 }, { "epoch": 1.95, "learning_rate": 1.8822601493261315e-06, "loss": 3.114, "step": 6727 }, { "epoch": 1.95, "learning_rate": 1.8619789925608533e-06, "loss": 3.0877, "step": 6728 }, { "epoch": 1.95, "learning_rate": 1.8418074882958213e-06, "loss": 3.1191, "step": 6729 }, { "epoch": 1.95, "learning_rate": 1.8217456409711508e-06, "loss": 3.1173, "step": 6730 }, { "epoch": 1.95, "learning_rate": 1.8017934550030867e-06, "loss": 3.0344, "step": 6731 }, { "epoch": 1.95, "learning_rate": 1.781950934783505e-06, "loss": 3.1849, "step": 6732 }, { "epoch": 1.95, "learning_rate": 1.7622180846803004e-06, "loss": 3.0601, "step": 6733 }, { "epoch": 1.95, "learning_rate": 1.7425949090371097e-06, "loss": 3.0951, "step": 6734 }, { "epoch": 1.95, "learning_rate": 1.7230814121735327e-06, "loss": 3.0339, "step": 6735 }, { "epoch": 1.95, "learning_rate": 1.7036775983849673e-06, "loss": 3.1062, "step": 6736 }, { "epoch": 1.95, "learning_rate": 1.6843834719426077e-06, "loss": 3.1881, "step": 6737 }, { "epoch": 1.95, "learning_rate": 1.6651990370936676e-06, "loss": 3.0898, "step": 6738 }, { "epoch": 1.95, "learning_rate": 1.646124298061047e-06, "loss": 3.1736, "step": 6739 }, { "epoch": 1.95, "learning_rate": 1.6271592590435536e-06, "loss": 3.0992, "step": 6740 }, { "epoch": 1.95, "learning_rate": 1.6083039242159036e-06, "loss": 3.1851, "step": 6741 }, { "epoch": 1.95, "learning_rate": 1.589558297728555e-06, "loss": 3.0764, "step": 6742 }, { "epoch": 1.95, "learning_rate": 1.5709223837079845e-06, "loss": 3.1067, "step": 6743 }, { "epoch": 1.95, "learning_rate": 1.5523961862564107e-06, "loss": 3.1616, "step": 6744 }, { "epoch": 1.95, "learning_rate": 1.5339797094517939e-06, "loss": 3.0898, "step": 6745 }, { "epoch": 1.95, "learning_rate": 1.5156729573481687e-06, "loss": 3.2208, "step": 6746 }, { "epoch": 1.95, "learning_rate": 1.497475933975312e-06, "loss": 3.0507, "step": 6747 }, { "epoch": 1.95, "learning_rate": 1.4793886433387416e-06, "loss": 3.0989, "step": 6748 }, { "epoch": 1.95, "learning_rate": 1.4614110894199394e-06, "loss": 3.1607, "step": 6749 }, { "epoch": 1.95, "learning_rate": 1.4435432761762956e-06, "loss": 3.2414, "step": 6750 }, { "epoch": 1.96, "learning_rate": 1.4257852075408307e-06, "loss": 3.0513, "step": 6751 }, { "epoch": 1.96, "learning_rate": 1.4081368874226396e-06, "loss": 3.1658, "step": 6752 }, { "epoch": 1.96, "learning_rate": 1.3905983197065597e-06, "loss": 2.9693, "step": 6753 }, { "epoch": 1.96, "learning_rate": 1.3731695082531692e-06, "loss": 3.1665, "step": 6754 }, { "epoch": 1.96, "learning_rate": 1.355850456899066e-06, "loss": 3.0752, "step": 6755 }, { "epoch": 1.96, "learning_rate": 1.3386411694565892e-06, "loss": 3.0448, "step": 6756 }, { "epoch": 1.96, "learning_rate": 1.3215416497138755e-06, "loss": 2.961, "step": 6757 }, { "epoch": 1.96, "learning_rate": 1.304551901435025e-06, "loss": 3.0816, "step": 6758 }, { "epoch": 1.96, "learning_rate": 1.2876719283598236e-06, "loss": 3.2536, "step": 6759 }, { "epoch": 1.96, "learning_rate": 1.2709017342039663e-06, "loss": 3.1568, "step": 6760 }, { "epoch": 1.96, "learning_rate": 1.254241322659111e-06, "loss": 3.1017, "step": 6761 }, { "epoch": 1.96, "learning_rate": 1.2376906973924906e-06, "loss": 3.2607, "step": 6762 }, { "epoch": 1.96, "learning_rate": 1.2212498620474132e-06, "loss": 3.1339, "step": 6763 }, { "epoch": 1.96, "learning_rate": 1.204918820242873e-06, "loss": 3.1992, "step": 6764 }, { "epoch": 1.96, "learning_rate": 1.1886975755736606e-06, "loss": 3.1328, "step": 6765 }, { "epoch": 1.96, "learning_rate": 1.1725861316105869e-06, "loss": 3.1167, "step": 6766 }, { "epoch": 1.96, "learning_rate": 1.1565844919001479e-06, "loss": 3.113, "step": 6767 }, { "epoch": 1.96, "learning_rate": 1.1406926599646372e-06, "loss": 3.0854, "step": 6768 }, { "epoch": 1.96, "learning_rate": 1.1249106393023124e-06, "loss": 3.0557, "step": 6769 }, { "epoch": 1.96, "learning_rate": 1.1092384333871719e-06, "loss": 3.1149, "step": 6770 }, { "epoch": 1.96, "learning_rate": 1.093676045669012e-06, "loss": 3.1887, "step": 6771 }, { "epoch": 1.96, "learning_rate": 1.0782234795735369e-06, "loss": 3.1836, "step": 6772 }, { "epoch": 1.96, "learning_rate": 1.062880738502303e-06, "loss": 3.0745, "step": 6773 }, { "epoch": 1.96, "learning_rate": 1.0476478258324984e-06, "loss": 3.0747, "step": 6774 }, { "epoch": 1.96, "learning_rate": 1.032524744917329e-06, "loss": 3.0798, "step": 6775 }, { "epoch": 1.96, "learning_rate": 1.0175114990857438e-06, "loss": 3.1467, "step": 6776 }, { "epoch": 1.96, "learning_rate": 1.0026080916425428e-06, "loss": 3.1378, "step": 6777 }, { "epoch": 1.96, "learning_rate": 9.878145258683802e-07, "loss": 3.0692, "step": 6778 }, { "epoch": 1.96, "learning_rate": 9.731308050195954e-07, "loss": 3.2619, "step": 6779 }, { "epoch": 1.96, "learning_rate": 9.585569323284915e-07, "loss": 3.1351, "step": 6780 }, { "epoch": 1.96, "learning_rate": 9.440929110031138e-07, "loss": 3.068, "step": 6781 }, { "epoch": 1.96, "learning_rate": 9.297387442273597e-07, "loss": 3.0807, "step": 6782 }, { "epoch": 1.96, "learning_rate": 9.15494435160924e-07, "loss": 3.0431, "step": 6783 }, { "epoch": 1.96, "learning_rate": 9.013599869394096e-07, "loss": 3.0765, "step": 6784 }, { "epoch": 1.96, "learning_rate": 8.873354026740499e-07, "loss": 3.1644, "step": 6785 }, { "epoch": 1.97, "learning_rate": 8.734206854520422e-07, "loss": 3.1755, "step": 6786 }, { "epoch": 1.97, "learning_rate": 8.59615838336436e-07, "loss": 3.0669, "step": 6787 }, { "epoch": 1.97, "learning_rate": 8.459208643659122e-07, "loss": 2.9631, "step": 6788 }, { "epoch": 1.97, "learning_rate": 8.323357665551146e-07, "loss": 3.0475, "step": 6789 }, { "epoch": 1.97, "learning_rate": 8.188605478944289e-07, "loss": 3.0898, "step": 6790 }, { "epoch": 1.97, "learning_rate": 8.054952113501491e-07, "loss": 3.1258, "step": 6791 }, { "epoch": 1.97, "learning_rate": 7.92239759864255e-07, "loss": 3.1756, "step": 6792 }, { "epoch": 1.97, "learning_rate": 7.79094196354635e-07, "loss": 3.2198, "step": 6793 }, { "epoch": 1.97, "learning_rate": 7.660585237149742e-07, "loss": 3.2482, "step": 6794 }, { "epoch": 1.97, "learning_rate": 7.531327448146441e-07, "loss": 3.1899, "step": 6795 }, { "epoch": 1.97, "learning_rate": 7.403168624990353e-07, "loss": 2.9881, "step": 6796 }, { "epoch": 1.97, "learning_rate": 7.276108795892245e-07, "loss": 3.1387, "step": 6797 }, { "epoch": 1.97, "learning_rate": 7.150147988820854e-07, "loss": 3.1051, "step": 6798 }, { "epoch": 1.97, "learning_rate": 7.025286231502892e-07, "loss": 3.3018, "step": 6799 }, { "epoch": 1.97, "learning_rate": 6.901523551424705e-07, "loss": 3.2559, "step": 6800 }, { "epoch": 1.97, "learning_rate": 6.778859975828388e-07, "loss": 3.2025, "step": 6801 }, { "epoch": 1.97, "learning_rate": 6.657295531715679e-07, "loss": 3.1552, "step": 6802 }, { "epoch": 1.97, "learning_rate": 6.536830245845726e-07, "loss": 3.0959, "step": 6803 }, { "epoch": 1.97, "learning_rate": 6.417464144736207e-07, "loss": 3.1548, "step": 6804 }, { "epoch": 1.97, "learning_rate": 6.299197254662214e-07, "loss": 3.0946, "step": 6805 }, { "epoch": 1.97, "learning_rate": 6.182029601657368e-07, "loss": 3.0277, "step": 6806 }, { "epoch": 1.97, "learning_rate": 6.06596121151326e-07, "loss": 3.0718, "step": 6807 }, { "epoch": 1.97, "learning_rate": 5.950992109779452e-07, "loss": 3.1978, "step": 6808 }, { "epoch": 1.97, "learning_rate": 5.837122321763477e-07, "loss": 3.0582, "step": 6809 }, { "epoch": 1.97, "learning_rate": 5.724351872530842e-07, "loss": 3.1567, "step": 6810 }, { "epoch": 1.97, "learning_rate": 5.612680786905023e-07, "loss": 3.3195, "step": 6811 }, { "epoch": 1.97, "learning_rate": 5.502109089467466e-07, "loss": 3.101, "step": 6812 }, { "epoch": 1.97, "learning_rate": 5.392636804557593e-07, "loss": 3.1405, "step": 6813 }, { "epoch": 1.97, "learning_rate": 5.284263956273904e-07, "loss": 3.2588, "step": 6814 }, { "epoch": 1.97, "learning_rate": 5.176990568471207e-07, "loss": 3.0991, "step": 6815 }, { "epoch": 1.97, "learning_rate": 5.070816664762834e-07, "loss": 3.1159, "step": 6816 }, { "epoch": 1.97, "learning_rate": 4.9657422685212e-07, "loss": 3.0886, "step": 6817 }, { "epoch": 1.97, "learning_rate": 4.861767402874473e-07, "loss": 3.0876, "step": 6818 }, { "epoch": 1.97, "learning_rate": 4.758892090711009e-07, "loss": 3.0841, "step": 6819 }, { "epoch": 1.98, "learning_rate": 4.657116354676583e-07, "loss": 2.9826, "step": 6820 }, { "epoch": 1.98, "learning_rate": 4.556440217173274e-07, "loss": 3.1118, "step": 6821 }, { "epoch": 1.98, "learning_rate": 4.456863700363356e-07, "loss": 3.1408, "step": 6822 }, { "epoch": 1.98, "learning_rate": 4.3583868261654057e-07, "loss": 3.197, "step": 6823 }, { "epoch": 1.98, "learning_rate": 4.2610096162576383e-07, "loss": 3.1698, "step": 6824 }, { "epoch": 1.98, "learning_rate": 4.1647320920740196e-07, "loss": 2.9909, "step": 6825 }, { "epoch": 1.98, "learning_rate": 4.0695542748081515e-07, "loss": 3.0803, "step": 6826 }, { "epoch": 1.98, "learning_rate": 3.975476185411608e-07, "loss": 3.0492, "step": 6827 }, { "epoch": 1.98, "learning_rate": 3.8824978445922697e-07, "loss": 2.9804, "step": 6828 }, { "epoch": 1.98, "learning_rate": 3.7906192728176525e-07, "loss": 3.1853, "step": 6829 }, { "epoch": 1.98, "learning_rate": 3.6998404903121343e-07, "loss": 3.1189, "step": 6830 }, { "epoch": 1.98, "learning_rate": 3.610161517058619e-07, "loss": 3.1471, "step": 6831 }, { "epoch": 1.98, "learning_rate": 3.521582372797427e-07, "loss": 3.1151, "step": 6832 }, { "epoch": 1.98, "learning_rate": 3.434103077027406e-07, "loss": 3.1341, "step": 6833 }, { "epoch": 1.98, "learning_rate": 3.347723649004264e-07, "loss": 3.1743, "step": 6834 }, { "epoch": 1.98, "learning_rate": 3.2624441077433454e-07, "loss": 3.1439, "step": 6835 }, { "epoch": 1.98, "learning_rate": 3.178264472015191e-07, "loss": 3.1221, "step": 6836 }, { "epoch": 1.98, "learning_rate": 3.0951847603516437e-07, "loss": 3.0627, "step": 6837 }, { "epoch": 1.98, "learning_rate": 3.013204991038632e-07, "loss": 3.0999, "step": 6838 }, { "epoch": 1.98, "learning_rate": 2.9323251821239406e-07, "loss": 3.1567, "step": 6839 }, { "epoch": 1.98, "learning_rate": 2.852545351409996e-07, "loss": 3.0508, "step": 6840 }, { "epoch": 1.98, "learning_rate": 2.773865516458307e-07, "loss": 3.2437, "step": 6841 }, { "epoch": 1.98, "learning_rate": 2.6962856945883516e-07, "loss": 3.1639, "step": 6842 }, { "epoch": 1.98, "learning_rate": 2.6198059028781363e-07, "loss": 3.188, "step": 6843 }, { "epoch": 1.98, "learning_rate": 2.544426158161972e-07, "loss": 3.2085, "step": 6844 }, { "epoch": 1.98, "learning_rate": 2.4701464770326976e-07, "loss": 3.0681, "step": 6845 }, { "epoch": 1.98, "learning_rate": 2.396966875841677e-07, "loss": 2.9869, "step": 6846 }, { "epoch": 1.98, "learning_rate": 2.3248873706971373e-07, "loss": 3.0449, "step": 6847 }, { "epoch": 1.98, "learning_rate": 2.25390797746583e-07, "loss": 3.1544, "step": 6848 }, { "epoch": 1.98, "learning_rate": 2.1840287117713686e-07, "loss": 3.0971, "step": 6849 }, { "epoch": 1.98, "learning_rate": 2.1152495889970036e-07, "loss": 3.0133, "step": 6850 }, { "epoch": 1.98, "learning_rate": 2.0475706242822913e-07, "loss": 3.162, "step": 6851 }, { "epoch": 1.98, "learning_rate": 1.9809918325247589e-07, "loss": 3.0428, "step": 6852 }, { "epoch": 1.98, "learning_rate": 1.915513228380461e-07, "loss": 3.2029, "step": 6853 }, { "epoch": 1.98, "learning_rate": 1.8511348262623128e-07, "loss": 3.1264, "step": 6854 }, { "epoch": 1.99, "learning_rate": 1.7878566403417563e-07, "loss": 3.0131, "step": 6855 }, { "epoch": 1.99, "learning_rate": 1.7256786845482041e-07, "loss": 3.1825, "step": 6856 }, { "epoch": 1.99, "learning_rate": 1.6646009725684864e-07, "loss": 3.12, "step": 6857 }, { "epoch": 1.99, "learning_rate": 1.6046235178474034e-07, "loss": 3.0983, "step": 6858 }, { "epoch": 1.99, "learning_rate": 1.5457463335871725e-07, "loss": 3.0628, "step": 6859 }, { "epoch": 1.99, "learning_rate": 1.487969432747982e-07, "loss": 3.1142, "step": 6860 }, { "epoch": 1.99, "learning_rate": 1.431292828048547e-07, "loss": 3.0811, "step": 6861 }, { "epoch": 1.99, "learning_rate": 1.3757165319644438e-07, "loss": 3.0236, "step": 6862 }, { "epoch": 1.99, "learning_rate": 1.3212405567292195e-07, "loss": 3.1595, "step": 6863 }, { "epoch": 1.99, "learning_rate": 1.2678649143349485e-07, "loss": 3.1628, "step": 6864 }, { "epoch": 1.99, "learning_rate": 1.2155896165300107e-07, "loss": 3.0549, "step": 6865 }, { "epoch": 1.99, "learning_rate": 1.1644146748224226e-07, "loss": 3.0374, "step": 6866 }, { "epoch": 1.99, "learning_rate": 1.1143401004765075e-07, "loss": 2.98, "step": 6867 }, { "epoch": 1.99, "learning_rate": 1.0653659045156694e-07, "loss": 3.0992, "step": 6868 }, { "epoch": 1.99, "learning_rate": 1.0174920977190638e-07, "loss": 3.0933, "step": 6869 }, { "epoch": 1.99, "learning_rate": 9.707186906254827e-08, "loss": 3.2008, "step": 6870 }, { "epoch": 1.99, "learning_rate": 9.250456935316898e-08, "loss": 3.1931, "step": 6871 }, { "epoch": 1.99, "learning_rate": 8.804731164901991e-08, "loss": 3.0202, "step": 6872 }, { "epoch": 1.99, "learning_rate": 8.37000969313162e-08, "loss": 3.1126, "step": 6873 }, { "epoch": 1.99, "learning_rate": 7.946292615701456e-08, "loss": 3.1199, "step": 6874 }, { "epoch": 1.99, "learning_rate": 7.533580025875785e-08, "loss": 3.0864, "step": 6875 }, { "epoch": 1.99, "learning_rate": 7.131872014509711e-08, "loss": 3.2039, "step": 6876 }, { "epoch": 1.99, "learning_rate": 6.741168670021391e-08, "loss": 3.1242, "step": 6877 }, { "epoch": 1.99, "learning_rate": 6.361470078419806e-08, "loss": 3.1363, "step": 6878 }, { "epoch": 1.99, "learning_rate": 5.992776323282545e-08, "loss": 3.0905, "step": 6879 }, { "epoch": 1.99, "learning_rate": 5.635087485772461e-08, "loss": 3.2045, "step": 6880 }, { "epoch": 1.99, "learning_rate": 5.288403644626572e-08, "loss": 3.2134, "step": 6881 }, { "epoch": 1.99, "learning_rate": 4.952724876150505e-08, "loss": 3.1003, "step": 6882 }, { "epoch": 1.99, "learning_rate": 4.628051254240706e-08, "loss": 3.0644, "step": 6883 }, { "epoch": 1.99, "learning_rate": 4.314382850362231e-08, "loss": 3.2093, "step": 6884 }, { "epoch": 1.99, "learning_rate": 4.011719733570951e-08, "loss": 3.1352, "step": 6885 }, { "epoch": 1.99, "learning_rate": 3.720061970480249e-08, "loss": 3.107, "step": 6886 }, { "epoch": 1.99, "learning_rate": 3.439409625294321e-08, "loss": 3.2134, "step": 6887 }, { "epoch": 1.99, "learning_rate": 3.169762759797079e-08, "loss": 3.1457, "step": 6888 }, { "epoch": 2.0, "learning_rate": 2.9111214333354952e-08, "loss": 3.094, "step": 6889 }, { "epoch": 2.0, "learning_rate": 2.663485702847357e-08, "loss": 3.2528, "step": 6890 }, { "epoch": 2.0, "learning_rate": 2.4268556228446147e-08, "loss": 3.1685, "step": 6891 }, { "epoch": 2.0, "learning_rate": 2.2012312454133822e-08, "loss": 3.0823, "step": 6892 }, { "epoch": 2.0, "learning_rate": 1.9866126202250366e-08, "loss": 3.0776, "step": 6893 }, { "epoch": 2.0, "learning_rate": 1.7829997945084665e-08, "loss": 3.1803, "step": 6894 }, { "epoch": 2.0, "learning_rate": 1.5903928131000278e-08, "loss": 3.1332, "step": 6895 }, { "epoch": 2.0, "learning_rate": 1.408791718382485e-08, "loss": 3.1009, "step": 6896 }, { "epoch": 2.0, "learning_rate": 1.2381965503460712e-08, "loss": 3.1051, "step": 6897 }, { "epoch": 2.0, "learning_rate": 1.0786073465274271e-08, "loss": 3.139, "step": 6898 }, { "epoch": 2.0, "learning_rate": 9.300241420706623e-09, "loss": 3.0286, "step": 6899 }, { "epoch": 2.0, "learning_rate": 7.92446969671845e-09, "loss": 3.1334, "step": 6900 }, { "epoch": 2.0, "learning_rate": 6.658758596178594e-09, "loss": 3.2288, "step": 6901 }, { "epoch": 2.0, "learning_rate": 5.503108397753031e-09, "loss": 3.1797, "step": 6902 }, { "epoch": 2.0, "learning_rate": 4.457519355738349e-09, "loss": 3.0901, "step": 6903 }, { "epoch": 2.0, "learning_rate": 3.5219917003948e-09, "loss": 3.0597, "step": 6904 }, { "epoch": 2.0, "step": 6904, "total_flos": 8.419903045579571e+16, "train_loss": 3.300872454810391, "train_runtime": 32006.6725, "train_samples_per_second": 13.809, "train_steps_per_second": 0.216 } ], "logging_steps": 1.0, "max_steps": 6904, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 2000, "total_flos": 8.419903045579571e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }