diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,41454 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.9994569840896337, + "eval_steps": 500, + "global_step": 6904, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 0.0, + "loss": 6.161, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 0.0, + "loss": 6.1682, + "step": 2 + }, + { + "epoch": 0.0, + "learning_rate": 4.807692307692308e-06, + "loss": 5.9098, + "step": 3 + }, + { + "epoch": 0.0, + "learning_rate": 9.615384615384616e-06, + "loss": 6.2003, + "step": 4 + }, + { + "epoch": 0.0, + "learning_rate": 1.4423076923076924e-05, + "loss": 6.1261, + "step": 5 + }, + { + "epoch": 0.0, + "learning_rate": 1.923076923076923e-05, + "loss": 5.9273, + "step": 6 + }, + { + "epoch": 0.0, + "learning_rate": 2.4038461538461542e-05, + "loss": 5.7911, + "step": 7 + }, + { + "epoch": 0.0, + "learning_rate": 2.4038461538461542e-05, + "loss": 5.6916, + "step": 8 + }, + { + "epoch": 0.0, + "learning_rate": 2.884615384615385e-05, + "loss": 5.4199, + "step": 9 + }, + { + "epoch": 0.0, + "learning_rate": 2.884615384615385e-05, + "loss": 5.2784, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 3.365384615384615e-05, + "loss": 5.4758, + "step": 11 + }, + { + "epoch": 0.0, + "learning_rate": 3.846153846153846e-05, + "loss": 5.4711, + "step": 12 + }, + { + "epoch": 0.0, + "learning_rate": 4.3269230769230766e-05, + "loss": 5.4178, + "step": 13 + }, + { + "epoch": 0.0, + "learning_rate": 4.3269230769230766e-05, + "loss": 5.1831, + "step": 14 + }, + { + "epoch": 0.0, + "learning_rate": 4.8076923076923084e-05, + "loss": 5.1222, + "step": 15 + }, + { + "epoch": 0.0, + "learning_rate": 5.288461538461539e-05, + "loss": 4.9677, + "step": 16 + }, + { + "epoch": 0.0, + "learning_rate": 5.76923076923077e-05, + "loss": 5.014, + "step": 17 + }, + { + "epoch": 0.01, + "learning_rate": 6.25e-05, + "loss": 4.8392, + "step": 18 + }, + { + "epoch": 0.01, + "learning_rate": 6.73076923076923e-05, + "loss": 4.8018, + "step": 19 + }, + { + "epoch": 0.01, + "learning_rate": 7.211538461538461e-05, + "loss": 4.8535, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 7.692307692307693e-05, + "loss": 4.5942, + "step": 21 + }, + { + "epoch": 0.01, + "learning_rate": 8.173076923076923e-05, + "loss": 4.5563, + "step": 22 + }, + { + "epoch": 0.01, + "learning_rate": 8.653846153846153e-05, + "loss": 4.6241, + "step": 23 + }, + { + "epoch": 0.01, + "learning_rate": 9.134615384615384e-05, + "loss": 4.6345, + "step": 24 + }, + { + "epoch": 0.01, + "learning_rate": 9.615384615384617e-05, + "loss": 4.5706, + "step": 25 + }, + { + "epoch": 0.01, + "learning_rate": 0.00010096153846153847, + "loss": 4.5761, + "step": 26 + }, + { + "epoch": 0.01, + "learning_rate": 0.00010576923076923077, + "loss": 4.5912, + "step": 27 + }, + { + "epoch": 0.01, + "learning_rate": 0.00011057692307692308, + "loss": 4.4882, + "step": 28 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001153846153846154, + "loss": 4.5608, + "step": 29 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001201923076923077, + "loss": 4.6495, + "step": 30 + }, + { + "epoch": 0.01, + "learning_rate": 0.000125, + "loss": 4.3678, + "step": 31 + }, + { + "epoch": 0.01, + "learning_rate": 0.00012980769230769233, + "loss": 4.5512, + "step": 32 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001346153846153846, + "loss": 4.4474, + "step": 33 + }, + { + "epoch": 0.01, + "learning_rate": 0.00013942307692307694, + "loss": 4.2791, + "step": 34 + }, + { + "epoch": 0.01, + "learning_rate": 0.00014423076923076922, + "loss": 4.4434, + "step": 35 + }, + { + "epoch": 0.01, + "learning_rate": 0.00014903846153846155, + "loss": 4.3315, + "step": 36 + }, + { + "epoch": 0.01, + "learning_rate": 0.00015384615384615385, + "loss": 4.3086, + "step": 37 + }, + { + "epoch": 0.01, + "learning_rate": 0.00015865384615384616, + "loss": 4.4628, + "step": 38 + }, + { + "epoch": 0.01, + "learning_rate": 0.00016346153846153846, + "loss": 4.4949, + "step": 39 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001682692307692308, + "loss": 4.5125, + "step": 40 + }, + { + "epoch": 0.01, + "learning_rate": 0.00017307692307692307, + "loss": 4.56, + "step": 41 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001778846153846154, + "loss": 4.2873, + "step": 42 + }, + { + "epoch": 0.01, + "learning_rate": 0.00018269230769230767, + "loss": 4.2763, + "step": 43 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001875, + "loss": 4.4362, + "step": 44 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019230769230769233, + "loss": 4.2658, + "step": 45 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001971153846153846, + "loss": 4.3142, + "step": 46 + }, + { + "epoch": 0.01, + "learning_rate": 0.00020192307692307694, + "loss": 4.2805, + "step": 47 + }, + { + "epoch": 0.01, + "learning_rate": 0.00020673076923076922, + "loss": 4.2614, + "step": 48 + }, + { + "epoch": 0.01, + "learning_rate": 0.00021153846153846155, + "loss": 4.3094, + "step": 49 + }, + { + "epoch": 0.01, + "learning_rate": 0.00021634615384615385, + "loss": 4.2918, + "step": 50 + }, + { + "epoch": 0.01, + "learning_rate": 0.00022115384615384616, + "loss": 4.2968, + "step": 51 + }, + { + "epoch": 0.02, + "learning_rate": 0.00022596153846153846, + "loss": 4.2043, + "step": 52 + }, + { + "epoch": 0.02, + "learning_rate": 0.0002307692307692308, + "loss": 4.2329, + "step": 53 + }, + { + "epoch": 0.02, + "learning_rate": 0.00023557692307692307, + "loss": 4.3264, + "step": 54 + }, + { + "epoch": 0.02, + "learning_rate": 0.0002403846153846154, + "loss": 4.3367, + "step": 55 + }, + { + "epoch": 0.02, + "learning_rate": 0.0002451923076923077, + "loss": 4.1987, + "step": 56 + }, + { + "epoch": 0.02, + "learning_rate": 0.00025, + "loss": 4.1895, + "step": 57 + }, + { + "epoch": 0.02, + "learning_rate": 0.0002548076923076923, + "loss": 4.1371, + "step": 58 + }, + { + "epoch": 0.02, + "learning_rate": 0.00025961538461538467, + "loss": 4.2447, + "step": 59 + }, + { + "epoch": 0.02, + "learning_rate": 0.0002644230769230769, + "loss": 4.1681, + "step": 60 + }, + { + "epoch": 0.02, + "learning_rate": 0.0002692307692307692, + "loss": 4.1076, + "step": 61 + }, + { + "epoch": 0.02, + "learning_rate": 0.0002740384615384616, + "loss": 4.0651, + "step": 62 + }, + { + "epoch": 0.02, + "learning_rate": 0.0002788461538461539, + "loss": 4.0728, + "step": 63 + }, + { + "epoch": 0.02, + "learning_rate": 0.00028365384615384613, + "loss": 3.9783, + "step": 64 + }, + { + "epoch": 0.02, + "learning_rate": 0.00028846153846153843, + "loss": 4.1753, + "step": 65 + }, + { + "epoch": 0.02, + "learning_rate": 0.0002932692307692308, + "loss": 4.1236, + "step": 66 + }, + { + "epoch": 0.02, + "learning_rate": 0.0002980769230769231, + "loss": 4.1174, + "step": 67 + }, + { + "epoch": 0.02, + "learning_rate": 0.00030288461538461535, + "loss": 3.8908, + "step": 68 + }, + { + "epoch": 0.02, + "learning_rate": 0.0003076923076923077, + "loss": 4.1196, + "step": 69 + }, + { + "epoch": 0.02, + "learning_rate": 0.0003125, + "loss": 4.1691, + "step": 70 + }, + { + "epoch": 0.02, + "learning_rate": 0.0003173076923076923, + "loss": 4.2177, + "step": 71 + }, + { + "epoch": 0.02, + "learning_rate": 0.00032211538461538467, + "loss": 4.1797, + "step": 72 + }, + { + "epoch": 0.02, + "learning_rate": 0.0003269230769230769, + "loss": 4.1321, + "step": 73 + }, + { + "epoch": 0.02, + "learning_rate": 0.0003317307692307692, + "loss": 3.9375, + "step": 74 + }, + { + "epoch": 0.02, + "learning_rate": 0.0003365384615384616, + "loss": 3.9576, + "step": 75 + }, + { + "epoch": 0.02, + "learning_rate": 0.0003413461538461539, + "loss": 3.9569, + "step": 76 + }, + { + "epoch": 0.02, + "learning_rate": 0.00034615384615384613, + "loss": 4.0711, + "step": 77 + }, + { + "epoch": 0.02, + "learning_rate": 0.00035096153846153844, + "loss": 4.0995, + "step": 78 + }, + { + "epoch": 0.02, + "learning_rate": 0.0003557692307692308, + "loss": 3.912, + "step": 79 + }, + { + "epoch": 0.02, + "learning_rate": 0.0003605769230769231, + "loss": 3.8976, + "step": 80 + }, + { + "epoch": 0.02, + "learning_rate": 0.00036538461538461535, + "loss": 4.0411, + "step": 81 + }, + { + "epoch": 0.02, + "learning_rate": 0.0003701923076923077, + "loss": 4.0273, + "step": 82 + }, + { + "epoch": 0.02, + "learning_rate": 0.000375, + "loss": 3.9225, + "step": 83 + }, + { + "epoch": 0.02, + "learning_rate": 0.0003798076923076923, + "loss": 3.9683, + "step": 84 + }, + { + "epoch": 0.02, + "learning_rate": 0.00038461538461538467, + "loss": 3.7877, + "step": 85 + }, + { + "epoch": 0.02, + "learning_rate": 0.0003894230769230769, + "loss": 3.9174, + "step": 86 + }, + { + "epoch": 0.03, + "learning_rate": 0.0003942307692307692, + "loss": 3.9229, + "step": 87 + }, + { + "epoch": 0.03, + "learning_rate": 0.0003990384615384616, + "loss": 3.9332, + "step": 88 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004038461538461539, + "loss": 3.9367, + "step": 89 + }, + { + "epoch": 0.03, + "learning_rate": 0.00040865384615384613, + "loss": 3.9915, + "step": 90 + }, + { + "epoch": 0.03, + "learning_rate": 0.00041346153846153844, + "loss": 3.8203, + "step": 91 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004182692307692308, + "loss": 4.0365, + "step": 92 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004230769230769231, + "loss": 3.8866, + "step": 93 + }, + { + "epoch": 0.03, + "learning_rate": 0.00042788461538461535, + "loss": 3.8819, + "step": 94 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004326923076923077, + "loss": 3.9308, + "step": 95 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004375, + "loss": 3.8048, + "step": 96 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004423076923076923, + "loss": 3.9687, + "step": 97 + }, + { + "epoch": 0.03, + "learning_rate": 0.00044711538461538467, + "loss": 3.8629, + "step": 98 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004519230769230769, + "loss": 3.9706, + "step": 99 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004567307692307692, + "loss": 3.881, + "step": 100 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004615384615384616, + "loss": 4.0128, + "step": 101 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004663461538461539, + "loss": 3.8763, + "step": 102 + }, + { + "epoch": 0.03, + "learning_rate": 0.00047115384615384613, + "loss": 3.7673, + "step": 103 + }, + { + "epoch": 0.03, + "learning_rate": 0.00047596153846153844, + "loss": 3.8364, + "step": 104 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004807692307692308, + "loss": 3.8304, + "step": 105 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004855769230769231, + "loss": 3.837, + "step": 106 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004903846153846154, + "loss": 3.9548, + "step": 107 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004951923076923077, + "loss": 3.9055, + "step": 108 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005, + "loss": 3.898, + "step": 109 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005048076923076923, + "loss": 3.9116, + "step": 110 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005096153846153846, + "loss": 3.7062, + "step": 111 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005144230769230769, + "loss": 3.8398, + "step": 112 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005192307692307693, + "loss": 4.0592, + "step": 113 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005240384615384615, + "loss": 3.7921, + "step": 114 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005288461538461538, + "loss": 3.7489, + "step": 115 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005336538461538461, + "loss": 3.9802, + "step": 116 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005384615384615384, + "loss": 3.7342, + "step": 117 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005432692307692307, + "loss": 3.9971, + "step": 118 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005480769230769232, + "loss": 3.907, + "step": 119 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005528846153846155, + "loss": 3.7726, + "step": 120 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005576923076923078, + "loss": 3.7401, + "step": 121 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005625000000000001, + "loss": 3.7839, + "step": 122 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005673076923076923, + "loss": 3.9549, + "step": 123 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005721153846153846, + "loss": 3.8995, + "step": 124 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005769230769230769, + "loss": 3.8765, + "step": 125 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005817307692307693, + "loss": 3.8703, + "step": 126 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005865384615384616, + "loss": 3.7604, + "step": 127 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005913461538461539, + "loss": 3.8104, + "step": 128 + }, + { + "epoch": 0.04, + "learning_rate": 0.0005961538461538462, + "loss": 3.8254, + "step": 129 + }, + { + "epoch": 0.04, + "learning_rate": 0.0006009615384615385, + "loss": 3.8615, + "step": 130 + }, + { + "epoch": 0.04, + "learning_rate": 0.0006057692307692307, + "loss": 3.8292, + "step": 131 + }, + { + "epoch": 0.04, + "learning_rate": 0.0006105769230769231, + "loss": 3.8351, + "step": 132 + }, + { + "epoch": 0.04, + "learning_rate": 0.0006153846153846154, + "loss": 3.7863, + "step": 133 + }, + { + "epoch": 0.04, + "learning_rate": 0.0006201923076923077, + "loss": 3.7492, + "step": 134 + }, + { + "epoch": 0.04, + "learning_rate": 0.000625, + "loss": 3.7104, + "step": 135 + }, + { + "epoch": 0.04, + "learning_rate": 0.0006298076923076923, + "loss": 3.9247, + "step": 136 + }, + { + "epoch": 0.04, + "learning_rate": 0.0006346153846153846, + "loss": 3.8404, + "step": 137 + }, + { + "epoch": 0.04, + "learning_rate": 0.0006394230769230769, + "loss": 3.7626, + "step": 138 + }, + { + "epoch": 0.04, + "learning_rate": 0.0006442307692307693, + "loss": 3.6955, + "step": 139 + }, + { + "epoch": 0.04, + "learning_rate": 0.0006490384615384615, + "loss": 3.722, + "step": 140 + }, + { + "epoch": 0.04, + "learning_rate": 0.0006538461538461538, + "loss": 3.7595, + "step": 141 + }, + { + "epoch": 0.04, + "learning_rate": 0.0006586538461538461, + "loss": 3.9546, + "step": 142 + }, + { + "epoch": 0.04, + "learning_rate": 0.0006634615384615384, + "loss": 3.9042, + "step": 143 + }, + { + "epoch": 0.04, + "learning_rate": 0.0006682692307692307, + "loss": 3.7585, + "step": 144 + }, + { + "epoch": 0.04, + "learning_rate": 0.0006730769230769232, + "loss": 3.6973, + "step": 145 + }, + { + "epoch": 0.04, + "learning_rate": 0.0006778846153846155, + "loss": 3.8009, + "step": 146 + }, + { + "epoch": 0.04, + "learning_rate": 0.0006826923076923078, + "loss": 3.7775, + "step": 147 + }, + { + "epoch": 0.04, + "learning_rate": 0.0006875, + "loss": 3.7912, + "step": 148 + }, + { + "epoch": 0.04, + "learning_rate": 0.0006923076923076923, + "loss": 3.6862, + "step": 149 + }, + { + "epoch": 0.04, + "learning_rate": 0.0006971153846153846, + "loss": 3.8582, + "step": 150 + }, + { + "epoch": 0.04, + "learning_rate": 0.0007019230769230769, + "loss": 3.8413, + "step": 151 + }, + { + "epoch": 0.04, + "learning_rate": 0.0007067307692307693, + "loss": 3.7278, + "step": 152 + }, + { + "epoch": 0.04, + "learning_rate": 0.0007115384615384616, + "loss": 3.7517, + "step": 153 + }, + { + "epoch": 0.04, + "learning_rate": 0.0007163461538461539, + "loss": 3.702, + "step": 154 + }, + { + "epoch": 0.04, + "learning_rate": 0.0007211538461538462, + "loss": 3.7415, + "step": 155 + }, + { + "epoch": 0.05, + "learning_rate": 0.0007259615384615385, + "loss": 3.9373, + "step": 156 + }, + { + "epoch": 0.05, + "learning_rate": 0.0007307692307692307, + "loss": 3.7964, + "step": 157 + }, + { + "epoch": 0.05, + "learning_rate": 0.0007355769230769231, + "loss": 3.7393, + "step": 158 + }, + { + "epoch": 0.05, + "learning_rate": 0.0007403846153846154, + "loss": 3.7706, + "step": 159 + }, + { + "epoch": 0.05, + "learning_rate": 0.0007451923076923077, + "loss": 3.6625, + "step": 160 + }, + { + "epoch": 0.05, + "learning_rate": 0.00075, + "loss": 3.8063, + "step": 161 + }, + { + "epoch": 0.05, + "learning_rate": 0.0007548076923076923, + "loss": 3.6056, + "step": 162 + }, + { + "epoch": 0.05, + "learning_rate": 0.0007596153846153846, + "loss": 3.8231, + "step": 163 + }, + { + "epoch": 0.05, + "learning_rate": 0.0007644230769230769, + "loss": 3.7259, + "step": 164 + }, + { + "epoch": 0.05, + "learning_rate": 0.0007692307692307693, + "loss": 3.826, + "step": 165 + }, + { + "epoch": 0.05, + "learning_rate": 0.0007740384615384615, + "loss": 3.7214, + "step": 166 + }, + { + "epoch": 0.05, + "learning_rate": 0.0007788461538461538, + "loss": 3.7655, + "step": 167 + }, + { + "epoch": 0.05, + "learning_rate": 0.0007836538461538461, + "loss": 3.8019, + "step": 168 + }, + { + "epoch": 0.05, + "learning_rate": 0.0007884615384615384, + "loss": 3.925, + "step": 169 + }, + { + "epoch": 0.05, + "learning_rate": 0.0007932692307692307, + "loss": 3.8384, + "step": 170 + }, + { + "epoch": 0.05, + "learning_rate": 0.0007980769230769232, + "loss": 3.688, + "step": 171 + }, + { + "epoch": 0.05, + "learning_rate": 0.0008028846153846155, + "loss": 3.7548, + "step": 172 + }, + { + "epoch": 0.05, + "learning_rate": 0.0008076923076923078, + "loss": 3.5522, + "step": 173 + }, + { + "epoch": 0.05, + "learning_rate": 0.0008125000000000001, + "loss": 3.7104, + "step": 174 + }, + { + "epoch": 0.05, + "learning_rate": 0.0008173076923076923, + "loss": 3.7556, + "step": 175 + }, + { + "epoch": 0.05, + "learning_rate": 0.0008221153846153846, + "loss": 3.7124, + "step": 176 + }, + { + "epoch": 0.05, + "learning_rate": 0.0008269230769230769, + "loss": 3.6851, + "step": 177 + }, + { + "epoch": 0.05, + "learning_rate": 0.0008317307692307693, + "loss": 3.777, + "step": 178 + }, + { + "epoch": 0.05, + "learning_rate": 0.0008365384615384616, + "loss": 3.6594, + "step": 179 + }, + { + "epoch": 0.05, + "learning_rate": 0.0008413461538461539, + "loss": 3.7439, + "step": 180 + }, + { + "epoch": 0.05, + "learning_rate": 0.0008461538461538462, + "loss": 3.8061, + "step": 181 + }, + { + "epoch": 0.05, + "learning_rate": 0.0008509615384615385, + "loss": 3.7258, + "step": 182 + }, + { + "epoch": 0.05, + "learning_rate": 0.0008557692307692307, + "loss": 3.6793, + "step": 183 + }, + { + "epoch": 0.05, + "learning_rate": 0.0008605769230769231, + "loss": 3.6215, + "step": 184 + }, + { + "epoch": 0.05, + "learning_rate": 0.0008653846153846154, + "loss": 3.7049, + "step": 185 + }, + { + "epoch": 0.05, + "learning_rate": 0.0008701923076923077, + "loss": 3.8242, + "step": 186 + }, + { + "epoch": 0.05, + "learning_rate": 0.000875, + "loss": 3.778, + "step": 187 + }, + { + "epoch": 0.05, + "learning_rate": 0.0008798076923076923, + "loss": 3.6382, + "step": 188 + }, + { + "epoch": 0.05, + "learning_rate": 0.0008846153846153846, + "loss": 3.7708, + "step": 189 + }, + { + "epoch": 0.06, + "learning_rate": 0.0008894230769230769, + "loss": 3.6522, + "step": 190 + }, + { + "epoch": 0.06, + "learning_rate": 0.0008942307692307693, + "loss": 3.7645, + "step": 191 + }, + { + "epoch": 0.06, + "learning_rate": 0.0008990384615384615, + "loss": 3.7917, + "step": 192 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009038461538461538, + "loss": 3.6763, + "step": 193 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009086538461538461, + "loss": 3.7301, + "step": 194 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009134615384615384, + "loss": 3.5736, + "step": 195 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009182692307692308, + "loss": 3.6986, + "step": 196 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009230769230769232, + "loss": 3.6325, + "step": 197 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009278846153846155, + "loss": 3.8108, + "step": 198 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009326923076923078, + "loss": 3.8029, + "step": 199 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009375, + "loss": 3.7307, + "step": 200 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009423076923076923, + "loss": 3.7974, + "step": 201 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009471153846153846, + "loss": 3.8639, + "step": 202 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009519230769230769, + "loss": 3.6149, + "step": 203 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009567307692307693, + "loss": 3.7643, + "step": 204 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009615384615384616, + "loss": 3.6839, + "step": 205 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009663461538461539, + "loss": 3.7104, + "step": 206 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009711538461538462, + "loss": 3.7614, + "step": 207 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009759615384615385, + "loss": 3.6576, + "step": 208 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009807692307692308, + "loss": 3.7488, + "step": 209 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009855769230769232, + "loss": 3.7574, + "step": 210 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009903846153846154, + "loss": 3.8157, + "step": 211 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009951923076923078, + "loss": 3.6628, + "step": 212 + }, + { + "epoch": 0.06, + "learning_rate": 0.001, + "loss": 3.6348, + "step": 213 + }, + { + "epoch": 0.06, + "learning_rate": 0.000999999944968816, + "loss": 3.6941, + "step": 214 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009999997798752765, + "loss": 3.8112, + "step": 215 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009999995047194174, + "loss": 3.8529, + "step": 216 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009999991195012996, + "loss": 3.5949, + "step": 217 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009999986242210078, + "loss": 3.6972, + "step": 218 + }, + { + "epoch": 0.06, + "learning_rate": 0.000999998018878651, + "loss": 3.6609, + "step": 219 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009999973034743625, + "loss": 3.6864, + "step": 220 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009999964780082996, + "loss": 3.8264, + "step": 221 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009999955424806442, + "loss": 3.6914, + "step": 222 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009999944968916022, + "loss": 3.7121, + "step": 223 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009999933412414039, + "loss": 3.7541, + "step": 224 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009999920755303033, + "loss": 3.8026, + "step": 225 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009999906997585793, + "loss": 3.5693, + "step": 226 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009999892139265346, + "loss": 3.6715, + "step": 227 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009999876180344966, + "loss": 3.7445, + "step": 228 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009999859120828163, + "loss": 3.796, + "step": 229 + }, + { + "epoch": 0.07, + "learning_rate": 0.000999984096071869, + "loss": 3.657, + "step": 230 + }, + { + "epoch": 0.07, + "learning_rate": 0.000999982170002055, + "loss": 3.6771, + "step": 231 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009999801338737978, + "loss": 3.8769, + "step": 232 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009999779876875459, + "loss": 3.6235, + "step": 233 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009999757314437716, + "loss": 3.6209, + "step": 234 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009999733651429716, + "loss": 3.6345, + "step": 235 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009999708887856666, + "loss": 3.7145, + "step": 236 + }, + { + "epoch": 0.07, + "learning_rate": 0.000999968302372402, + "loss": 3.8341, + "step": 237 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009999656059037471, + "loss": 3.5662, + "step": 238 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009999627993802953, + "loss": 3.8158, + "step": 239 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009999598828026644, + "loss": 3.6846, + "step": 240 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009999568561714965, + "loss": 3.6518, + "step": 241 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009999537194874577, + "loss": 3.7659, + "step": 242 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009999504727512384, + "loss": 3.6396, + "step": 243 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009999471159635538, + "loss": 3.6343, + "step": 244 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009999436491251424, + "loss": 3.758, + "step": 245 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009999400722367673, + "loss": 3.6296, + "step": 246 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009999363852992158, + "loss": 3.7105, + "step": 247 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009999325883132998, + "loss": 3.7128, + "step": 248 + }, + { + "epoch": 0.07, + "learning_rate": 0.000999928681279855, + "loss": 3.7588, + "step": 249 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009999246641997413, + "loss": 3.7156, + "step": 250 + }, + { + "epoch": 0.07, + "learning_rate": 0.000999920537073843, + "loss": 3.7028, + "step": 251 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009999162999030687, + "loss": 3.5924, + "step": 252 + }, + { + "epoch": 0.07, + "learning_rate": 0.000999911952688351, + "loss": 3.7117, + "step": 253 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009999074954306468, + "loss": 3.7178, + "step": 254 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009999029281309376, + "loss": 3.7393, + "step": 255 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009998982507902281, + "loss": 3.7616, + "step": 256 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009998934634095485, + "loss": 3.7342, + "step": 257 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009998885659899524, + "loss": 3.6621, + "step": 258 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009998835585325178, + "loss": 3.5621, + "step": 259 + }, + { + "epoch": 0.08, + "learning_rate": 0.000999878441038347, + "loss": 3.7805, + "step": 260 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009998732135085666, + "loss": 3.6375, + "step": 261 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009998678759443273, + "loss": 3.8603, + "step": 262 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009998624283468036, + "loss": 3.7728, + "step": 263 + }, + { + "epoch": 0.08, + "learning_rate": 0.000999856870717195, + "loss": 3.6819, + "step": 264 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009998512030567252, + "loss": 3.625, + "step": 265 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009998454253666413, + "loss": 3.7778, + "step": 266 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009998395376482152, + "loss": 3.6591, + "step": 267 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009998335399027433, + "loss": 3.6708, + "step": 268 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009998274321315452, + "loss": 3.6223, + "step": 269 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009998212143359658, + "loss": 3.6819, + "step": 270 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009998148865173737, + "loss": 3.7105, + "step": 271 + }, + { + "epoch": 0.08, + "learning_rate": 0.000999808448677162, + "loss": 3.5446, + "step": 272 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009998019008167476, + "loss": 3.611, + "step": 273 + }, + { + "epoch": 0.08, + "learning_rate": 0.000999795242937572, + "loss": 3.7123, + "step": 274 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009997884750411004, + "loss": 3.6479, + "step": 275 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009997815971288229, + "loss": 3.664, + "step": 276 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009997746092022535, + "loss": 3.5848, + "step": 277 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009997675112629304, + "loss": 3.6461, + "step": 278 + }, + { + "epoch": 0.08, + "learning_rate": 0.000999760303312416, + "loss": 3.5358, + "step": 279 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009997529853522967, + "loss": 3.7093, + "step": 280 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009997455573841837, + "loss": 3.7852, + "step": 281 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009997380194097121, + "loss": 3.7664, + "step": 282 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009997303714305412, + "loss": 3.6975, + "step": 283 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009997226134483542, + "loss": 3.5344, + "step": 284 + }, + { + "epoch": 0.08, + "learning_rate": 0.000999714745464859, + "loss": 3.7226, + "step": 285 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009997067674817877, + "loss": 3.704, + "step": 286 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009996986795008961, + "loss": 3.8532, + "step": 287 + }, + { + "epoch": 0.08, + "learning_rate": 0.000999690481523965, + "loss": 3.7976, + "step": 288 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009996821735527987, + "loss": 3.7236, + "step": 289 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009996737555892257, + "loss": 3.6443, + "step": 290 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009996652276350996, + "loss": 3.6849, + "step": 291 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009996565896922974, + "loss": 3.6088, + "step": 292 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009996478417627203, + "loss": 3.5341, + "step": 293 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009996389838482942, + "loss": 3.7067, + "step": 294 + }, + { + "epoch": 0.09, + "learning_rate": 0.000999630015950969, + "loss": 3.7872, + "step": 295 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009996209380727184, + "loss": 3.7297, + "step": 296 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009996117502155408, + "loss": 3.655, + "step": 297 + }, + { + "epoch": 0.09, + "learning_rate": 0.000999602452381459, + "loss": 3.6808, + "step": 298 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009995930445725193, + "loss": 3.5957, + "step": 299 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009995835267907927, + "loss": 3.57, + "step": 300 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009995738990383743, + "loss": 3.6477, + "step": 301 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009995641613173834, + "loss": 3.689, + "step": 302 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009995543136299637, + "loss": 3.6932, + "step": 303 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009995443559782827, + "loss": 3.6288, + "step": 304 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009995342883645324, + "loss": 3.7541, + "step": 305 + }, + { + "epoch": 0.09, + "learning_rate": 0.000999524110790929, + "loss": 3.7432, + "step": 306 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009995138232597126, + "loss": 3.7528, + "step": 307 + }, + { + "epoch": 0.09, + "learning_rate": 0.000999503425773148, + "loss": 3.7018, + "step": 308 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009994929183335236, + "loss": 3.6876, + "step": 309 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009994823009431529, + "loss": 3.7658, + "step": 310 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009994715736043726, + "loss": 3.4898, + "step": 311 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009994607363195442, + "loss": 3.6119, + "step": 312 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009994497890910533, + "loss": 3.6885, + "step": 313 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009994387319213094, + "loss": 3.7195, + "step": 314 + }, + { + "epoch": 0.09, + "learning_rate": 0.000999427564812747, + "loss": 3.6904, + "step": 315 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009994162877678236, + "loss": 3.7741, + "step": 316 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009994049007890221, + "loss": 3.709, + "step": 317 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009993934038788487, + "loss": 3.679, + "step": 318 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009993817970398342, + "loss": 3.7931, + "step": 319 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009993700802745338, + "loss": 3.7332, + "step": 320 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009993582535855264, + "loss": 3.6608, + "step": 321 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009993463169754154, + "loss": 3.5741, + "step": 322 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009993342704468285, + "loss": 3.7506, + "step": 323 + }, + { + "epoch": 0.09, + "learning_rate": 0.000999322114002417, + "loss": 3.6312, + "step": 324 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009993098476448576, + "loss": 3.5923, + "step": 325 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009992974713768498, + "loss": 3.5332, + "step": 326 + }, + { + "epoch": 0.09, + "learning_rate": 0.000999284985201118, + "loss": 3.634, + "step": 327 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009992723891204106, + "loss": 3.5849, + "step": 328 + }, + { + "epoch": 0.1, + "learning_rate": 0.000999259683137501, + "loss": 3.6371, + "step": 329 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009992468672551853, + "loss": 3.6672, + "step": 330 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009992339414762851, + "loss": 3.6747, + "step": 331 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009992209058036455, + "loss": 3.6194, + "step": 332 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009992077602401356, + "loss": 3.6531, + "step": 333 + }, + { + "epoch": 0.1, + "learning_rate": 0.00099919450478865, + "loss": 3.6961, + "step": 334 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009991811394521055, + "loss": 3.7749, + "step": 335 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009991676642334447, + "loss": 3.8673, + "step": 336 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009991540791356342, + "loss": 3.7134, + "step": 337 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009991403841616636, + "loss": 3.7489, + "step": 338 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009991265793145479, + "loss": 3.834, + "step": 339 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009991126645973259, + "loss": 3.827, + "step": 340 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009990986400130607, + "loss": 3.7506, + "step": 341 + }, + { + "epoch": 0.1, + "learning_rate": 0.000999084505564839, + "loss": 3.6117, + "step": 342 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009990702612557728, + "loss": 3.7151, + "step": 343 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009990559070889968, + "loss": 3.8299, + "step": 344 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009990414430676715, + "loss": 3.7023, + "step": 345 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009990268691949804, + "loss": 3.7124, + "step": 346 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009990121854741316, + "loss": 3.6357, + "step": 347 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009989973919083575, + "loss": 3.7341, + "step": 348 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009989824885009142, + "loss": 3.7055, + "step": 349 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009989674752550826, + "loss": 3.6428, + "step": 350 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009989523521741674, + "loss": 3.7944, + "step": 351 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009989371192614977, + "loss": 3.6954, + "step": 352 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009989217765204266, + "loss": 3.7625, + "step": 353 + }, + { + "epoch": 0.1, + "learning_rate": 0.000998906323954331, + "loss": 3.6765, + "step": 354 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009988907615666128, + "loss": 3.6974, + "step": 355 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009988750893606976, + "loss": 3.6291, + "step": 356 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009988593073400353, + "loss": 3.6986, + "step": 357 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009988434155080998, + "loss": 3.7308, + "step": 358 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009988274138683895, + "loss": 3.6717, + "step": 359 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009988113024244263, + "loss": 3.6114, + "step": 360 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009987950811797572, + "loss": 3.7942, + "step": 361 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009987787501379527, + "loss": 3.7083, + "step": 362 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009987623093026074, + "loss": 3.7808, + "step": 363 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009987457586773409, + "loss": 3.5259, + "step": 364 + }, + { + "epoch": 0.11, + "learning_rate": 0.000998729098265796, + "loss": 3.6476, + "step": 365 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009987123280716403, + "loss": 3.7561, + "step": 366 + }, + { + "epoch": 0.11, + "learning_rate": 0.000998695448098565, + "loss": 3.7506, + "step": 367 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009986784583502861, + "loss": 3.616, + "step": 368 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009986613588305435, + "loss": 3.629, + "step": 369 + }, + { + "epoch": 0.11, + "learning_rate": 0.000998644149543101, + "loss": 3.9004, + "step": 370 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009986268304917468, + "loss": 4.0177, + "step": 371 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009986094016802935, + "loss": 3.8866, + "step": 372 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009985918631125773, + "loss": 4.063, + "step": 373 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009985742147924593, + "loss": 4.1941, + "step": 374 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009985564567238237, + "loss": 3.9636, + "step": 375 + }, + { + "epoch": 0.11, + "learning_rate": 0.00099853858891058, + "loss": 4.0097, + "step": 376 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009985206113566614, + "loss": 3.8793, + "step": 377 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009985025240660248, + "loss": 3.9372, + "step": 378 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009984843270426519, + "loss": 3.8242, + "step": 379 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009984660202905483, + "loss": 3.9139, + "step": 380 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009984476038137435, + "loss": 3.8506, + "step": 381 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009984290776162922, + "loss": 3.9393, + "step": 382 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009984104417022715, + "loss": 3.9364, + "step": 383 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009983916960757842, + "loss": 3.7773, + "step": 384 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009983728407409564, + "loss": 3.8694, + "step": 385 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009983538757019391, + "loss": 3.8089, + "step": 386 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009983348009629063, + "loss": 4.0018, + "step": 387 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009983156165280574, + "loss": 3.7097, + "step": 388 + }, + { + "epoch": 0.11, + "learning_rate": 0.000998296322401615, + "loss": 3.8345, + "step": 389 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009982769185878264, + "loss": 3.773, + "step": 390 + }, + { + "epoch": 0.11, + "learning_rate": 0.000998257405090963, + "loss": 3.8152, + "step": 391 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009982377819153197, + "loss": 3.8483, + "step": 392 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009982180490652165, + "loss": 3.784, + "step": 393 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009981982065449969, + "loss": 3.7975, + "step": 394 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009981782543590288, + "loss": 3.7096, + "step": 395 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009981581925117044, + "loss": 3.8472, + "step": 396 + }, + { + "epoch": 0.11, + "learning_rate": 0.000998138021007439, + "loss": 3.6477, + "step": 397 + }, + { + "epoch": 0.12, + "learning_rate": 0.000998117739850674, + "loss": 3.8306, + "step": 398 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009980973490458728, + "loss": 3.6405, + "step": 399 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009980768485975244, + "loss": 3.7399, + "step": 400 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009980562385101413, + "loss": 3.7544, + "step": 401 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009980355187882606, + "loss": 3.754, + "step": 402 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009980146894364428, + "loss": 3.7543, + "step": 403 + }, + { + "epoch": 0.12, + "learning_rate": 0.000997993750459273, + "loss": 3.836, + "step": 404 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009979727018613607, + "loss": 3.6722, + "step": 405 + }, + { + "epoch": 0.12, + "learning_rate": 0.000997951543647339, + "loss": 3.829, + "step": 406 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009979302758218653, + "loss": 3.8155, + "step": 407 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009979088983896214, + "loss": 3.7308, + "step": 408 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009978874113553127, + "loss": 3.6827, + "step": 409 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009978658147236695, + "loss": 3.7619, + "step": 410 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009978441084994452, + "loss": 3.71, + "step": 411 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009978222926874181, + "loss": 3.8948, + "step": 412 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009978003672923904, + "loss": 3.7231, + "step": 413 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009977783323191884, + "loss": 3.8278, + "step": 414 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009977561877726627, + "loss": 3.6713, + "step": 415 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009977339336576876, + "loss": 3.7997, + "step": 416 + }, + { + "epoch": 0.12, + "learning_rate": 0.000997711569979162, + "loss": 3.6536, + "step": 417 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009976890967420089, + "loss": 3.7295, + "step": 418 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009976665139511745, + "loss": 3.6741, + "step": 419 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009976438216116306, + "loss": 3.5868, + "step": 420 + }, + { + "epoch": 0.12, + "learning_rate": 0.000997621019728372, + "loss": 3.8072, + "step": 421 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009975981083064176, + "loss": 3.7895, + "step": 422 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009975750873508116, + "loss": 3.7789, + "step": 423 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009975519568666206, + "loss": 3.7323, + "step": 424 + }, + { + "epoch": 0.12, + "learning_rate": 0.000997528716858937, + "loss": 3.756, + "step": 425 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009975053673328758, + "loss": 3.7363, + "step": 426 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009974819082935772, + "loss": 3.7528, + "step": 427 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009974583397462052, + "loss": 3.7039, + "step": 428 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009974346616959476, + "loss": 3.6738, + "step": 429 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009974108741480166, + "loss": 3.6975, + "step": 430 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009973869771076483, + "loss": 3.6814, + "step": 431 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009973629705801034, + "loss": 3.7683, + "step": 432 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009973388545706657, + "loss": 3.7217, + "step": 433 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009973146290846444, + "loss": 3.6759, + "step": 434 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009972902941273716, + "loss": 3.6048, + "step": 435 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009972658497042044, + "loss": 3.8603, + "step": 436 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009972412958205235, + "loss": 3.8097, + "step": 437 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009972166324817339, + "loss": 3.7029, + "step": 438 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009971918596932642, + "loss": 3.662, + "step": 439 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009971669774605678, + "loss": 3.5865, + "step": 440 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009971419857891222, + "loss": 3.7332, + "step": 441 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009971168846844283, + "loss": 3.6936, + "step": 442 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009970916741520114, + "loss": 3.8466, + "step": 443 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009970663541974214, + "loss": 3.7223, + "step": 444 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009970409248262315, + "loss": 3.615, + "step": 445 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009970153860440394, + "loss": 3.774, + "step": 446 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009969897378564667, + "loss": 3.561, + "step": 447 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009969639802691593, + "loss": 3.6403, + "step": 448 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009969381132877873, + "loss": 3.8436, + "step": 449 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009969121369180444, + "loss": 3.6757, + "step": 450 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009968860511656485, + "loss": 3.6232, + "step": 451 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009968598560363422, + "loss": 3.7082, + "step": 452 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009968335515358915, + "loss": 3.6463, + "step": 453 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009968071376700864, + "loss": 3.7536, + "step": 454 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009967806144447415, + "loss": 3.8328, + "step": 455 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009967539818656953, + "loss": 3.7297, + "step": 456 + }, + { + "epoch": 0.13, + "learning_rate": 0.00099672723993881, + "loss": 3.7524, + "step": 457 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009967003886699722, + "loss": 3.5577, + "step": 458 + }, + { + "epoch": 0.13, + "learning_rate": 0.000996673428065093, + "loss": 3.7276, + "step": 459 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009966463581301062, + "loss": 3.5709, + "step": 460 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009966191788709714, + "loss": 3.6987, + "step": 461 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009965918902936713, + "loss": 3.6349, + "step": 462 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009965644924042124, + "loss": 3.5932, + "step": 463 + }, + { + "epoch": 0.13, + "learning_rate": 0.000996536985208626, + "loss": 3.716, + "step": 464 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009965093687129669, + "loss": 3.8616, + "step": 465 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009964816429233142, + "loss": 3.5844, + "step": 466 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009964538078457712, + "loss": 3.7315, + "step": 467 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009964258634864648, + "loss": 3.7489, + "step": 468 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009963978098515467, + "loss": 3.6614, + "step": 469 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009963696469471918, + "loss": 3.6779, + "step": 470 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009963413747795995, + "loss": 3.7368, + "step": 471 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009963129933549932, + "loss": 3.5822, + "step": 472 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009962845026796206, + "loss": 3.7772, + "step": 473 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009962559027597532, + "loss": 3.7422, + "step": 474 + }, + { + "epoch": 0.14, + "learning_rate": 0.000996227193601686, + "loss": 3.7444, + "step": 475 + }, + { + "epoch": 0.14, + "learning_rate": 0.000996198375211739, + "loss": 3.6498, + "step": 476 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009961694475962562, + "loss": 3.6996, + "step": 477 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009961404107616048, + "loss": 3.6961, + "step": 478 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009961112647141764, + "loss": 3.6028, + "step": 479 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009960820094603872, + "loss": 3.6861, + "step": 480 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009960526450066766, + "loss": 3.7575, + "step": 481 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009960231713595088, + "loss": 3.7073, + "step": 482 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009959935885253714, + "loss": 3.6649, + "step": 483 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009959638965107767, + "loss": 3.6906, + "step": 484 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009959340953222603, + "loss": 3.6002, + "step": 485 + }, + { + "epoch": 0.14, + "learning_rate": 0.000995904184966382, + "loss": 3.6651, + "step": 486 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009958741654497263, + "loss": 3.6374, + "step": 487 + }, + { + "epoch": 0.14, + "learning_rate": 0.000995844036778901, + "loss": 3.6679, + "step": 488 + }, + { + "epoch": 0.14, + "learning_rate": 0.000995813798960538, + "loss": 3.6765, + "step": 489 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009957834520012937, + "loss": 3.6081, + "step": 490 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009957529959078479, + "loss": 3.572, + "step": 491 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009957224306869053, + "loss": 3.6963, + "step": 492 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009956917563451934, + "loss": 3.7229, + "step": 493 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009956609728894647, + "loss": 3.6748, + "step": 494 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009956300803264954, + "loss": 3.7678, + "step": 495 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009955990786630858, + "loss": 3.726, + "step": 496 + }, + { + "epoch": 0.14, + "learning_rate": 0.00099556796790606, + "loss": 3.6513, + "step": 497 + }, + { + "epoch": 0.14, + "learning_rate": 0.000995536748062266, + "loss": 3.5193, + "step": 498 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009955054191385763, + "loss": 3.5712, + "step": 499 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009954739811418875, + "loss": 3.585, + "step": 500 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009954424340791195, + "loss": 3.6559, + "step": 501 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009954107779572166, + "loss": 3.6704, + "step": 502 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009953790127831472, + "loss": 3.6832, + "step": 503 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009953471385639036, + "loss": 3.7979, + "step": 504 + }, + { + "epoch": 0.15, + "learning_rate": 0.000995315155306502, + "loss": 3.6549, + "step": 505 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009952830630179826, + "loss": 3.5736, + "step": 506 + }, + { + "epoch": 0.15, + "learning_rate": 0.00099525086170541, + "loss": 3.5251, + "step": 507 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009952185513758724, + "loss": 3.6032, + "step": 508 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009951861320364821, + "loss": 3.5613, + "step": 509 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009951536036943754, + "loss": 3.6615, + "step": 510 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009951209663567125, + "loss": 3.6612, + "step": 511 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009950882200306778, + "loss": 3.5431, + "step": 512 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009950553647234796, + "loss": 3.6175, + "step": 513 + }, + { + "epoch": 0.15, + "learning_rate": 0.00099502240044235, + "loss": 3.6892, + "step": 514 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009949893271945455, + "loss": 3.6055, + "step": 515 + }, + { + "epoch": 0.15, + "learning_rate": 0.000994956144987346, + "loss": 3.7621, + "step": 516 + }, + { + "epoch": 0.15, + "learning_rate": 0.000994922853828056, + "loss": 3.6264, + "step": 517 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009948894537240038, + "loss": 3.6447, + "step": 518 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009948559446825412, + "loss": 3.6252, + "step": 519 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009948223267110447, + "loss": 3.5332, + "step": 520 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009947885998169142, + "loss": 3.742, + "step": 521 + }, + { + "epoch": 0.15, + "learning_rate": 0.000994754764007574, + "loss": 3.6361, + "step": 522 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009947208192904722, + "loss": 3.7173, + "step": 523 + }, + { + "epoch": 0.15, + "learning_rate": 0.000994686765673081, + "loss": 3.6875, + "step": 524 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009946526031628959, + "loss": 3.6247, + "step": 525 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009946183317674376, + "loss": 3.6669, + "step": 526 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009945839514942495, + "loss": 3.7014, + "step": 527 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009945494623509001, + "loss": 3.6868, + "step": 528 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009945148643449807, + "loss": 3.7157, + "step": 529 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009944801574841078, + "loss": 3.8294, + "step": 530 + }, + { + "epoch": 0.15, + "learning_rate": 0.000994445341775921, + "loss": 3.733, + "step": 531 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009944104172280837, + "loss": 3.7909, + "step": 532 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009943753838482844, + "loss": 3.6931, + "step": 533 + }, + { + "epoch": 0.15, + "learning_rate": 0.000994340241644234, + "loss": 3.6485, + "step": 534 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009943049906236687, + "loss": 3.7484, + "step": 535 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009942696307943483, + "loss": 3.6667, + "step": 536 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009942341621640557, + "loss": 3.7458, + "step": 537 + }, + { + "epoch": 0.16, + "learning_rate": 0.000994198584740599, + "loss": 3.5692, + "step": 538 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009941628985318093, + "loss": 3.5952, + "step": 539 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009941271035455424, + "loss": 3.7567, + "step": 540 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009940911997896773, + "loss": 3.8045, + "step": 541 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009940551872721175, + "loss": 3.6537, + "step": 542 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009940190660007902, + "loss": 3.6709, + "step": 543 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009939828359836467, + "loss": 3.7905, + "step": 544 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009939464972286618, + "loss": 3.5699, + "step": 545 + }, + { + "epoch": 0.16, + "learning_rate": 0.000993910049743835, + "loss": 3.7881, + "step": 546 + }, + { + "epoch": 0.16, + "learning_rate": 0.000993873493537189, + "loss": 3.6279, + "step": 547 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009938368286167706, + "loss": 3.5849, + "step": 548 + }, + { + "epoch": 0.16, + "learning_rate": 0.000993800054990651, + "loss": 3.6961, + "step": 549 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009937631726669245, + "loss": 3.7376, + "step": 550 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009937261816537106, + "loss": 3.7351, + "step": 551 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009936890819591511, + "loss": 3.6581, + "step": 552 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009936518735914133, + "loss": 3.6157, + "step": 553 + }, + { + "epoch": 0.16, + "learning_rate": 0.000993614556558687, + "loss": 3.6744, + "step": 554 + }, + { + "epoch": 0.16, + "learning_rate": 0.000993577130869187, + "loss": 3.5829, + "step": 555 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009935395965311516, + "loss": 3.6365, + "step": 556 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009935019535528432, + "loss": 3.6775, + "step": 557 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009934642019425472, + "loss": 3.7224, + "step": 558 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009934263417085746, + "loss": 3.7002, + "step": 559 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009933883728592589, + "loss": 3.7048, + "step": 560 + }, + { + "epoch": 0.16, + "learning_rate": 0.000993350295402958, + "loss": 3.4936, + "step": 561 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009933121093480537, + "loss": 3.529, + "step": 562 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009932738147029516, + "loss": 3.665, + "step": 563 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009932354114760818, + "loss": 3.7, + "step": 564 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009931968996758972, + "loss": 3.6423, + "step": 565 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009931582793108753, + "loss": 3.5859, + "step": 566 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009931195503895178, + "loss": 3.7281, + "step": 567 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009930807129203494, + "loss": 3.6349, + "step": 568 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009930417669119194, + "loss": 3.6389, + "step": 569 + }, + { + "epoch": 0.17, + "learning_rate": 0.000993002712372801, + "loss": 3.6347, + "step": 570 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009929635493115907, + "loss": 3.6897, + "step": 571 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009929242777369095, + "loss": 3.75, + "step": 572 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009928848976574018, + "loss": 3.6016, + "step": 573 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009928454090817364, + "loss": 3.6417, + "step": 574 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009928058120186058, + "loss": 3.5364, + "step": 575 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009927661064767258, + "loss": 3.7325, + "step": 576 + }, + { + "epoch": 0.17, + "learning_rate": 0.000992726292464837, + "loss": 3.6492, + "step": 577 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009926863699917033, + "loss": 3.6142, + "step": 578 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009926463390661127, + "loss": 3.7066, + "step": 579 + }, + { + "epoch": 0.17, + "learning_rate": 0.000992606199696877, + "loss": 3.4474, + "step": 580 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009925659518928314, + "loss": 3.7053, + "step": 581 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009925255956628363, + "loss": 3.6484, + "step": 582 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009924851310157744, + "loss": 3.5367, + "step": 583 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009924445579605533, + "loss": 3.6106, + "step": 584 + }, + { + "epoch": 0.17, + "learning_rate": 0.000992403876506104, + "loss": 3.5737, + "step": 585 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009923630866613816, + "loss": 3.7044, + "step": 586 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009923221884353649, + "loss": 3.5919, + "step": 587 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009922811818370567, + "loss": 3.6284, + "step": 588 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009922400668754834, + "loss": 3.6501, + "step": 589 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009921988435596953, + "loss": 3.526, + "step": 590 + }, + { + "epoch": 0.17, + "learning_rate": 0.000992157511898767, + "loss": 3.737, + "step": 591 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009921160719017967, + "loss": 3.5667, + "step": 592 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009920745235779058, + "loss": 3.7342, + "step": 593 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009920328669362408, + "loss": 3.6082, + "step": 594 + }, + { + "epoch": 0.17, + "learning_rate": 0.000991991101985971, + "loss": 3.5576, + "step": 595 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009919492287362897, + "loss": 3.6667, + "step": 596 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009919072471964146, + "loss": 3.5459, + "step": 597 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009918651573755865, + "loss": 3.7746, + "step": 598 + }, + { + "epoch": 0.17, + "learning_rate": 0.000991822959283071, + "loss": 3.7523, + "step": 599 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009917806529281565, + "loss": 3.6561, + "step": 600 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009917382383201555, + "loss": 3.7697, + "step": 601 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009916957154684051, + "loss": 3.649, + "step": 602 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009916530843822654, + "loss": 3.606, + "step": 603 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009916103450711203, + "loss": 3.6625, + "step": 604 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009915674975443777, + "loss": 3.6538, + "step": 605 + }, + { + "epoch": 0.18, + "learning_rate": 0.00099152454181147, + "loss": 3.6074, + "step": 606 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009914814778818524, + "loss": 3.5671, + "step": 607 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009914383057650043, + "loss": 3.7064, + "step": 608 + }, + { + "epoch": 0.18, + "learning_rate": 0.000991395025470429, + "loss": 3.5435, + "step": 609 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009913516370076536, + "loss": 3.644, + "step": 610 + }, + { + "epoch": 0.18, + "learning_rate": 0.000991308140386229, + "loss": 3.5077, + "step": 611 + }, + { + "epoch": 0.18, + "learning_rate": 0.00099126453561573, + "loss": 3.5789, + "step": 612 + }, + { + "epoch": 0.18, + "learning_rate": 0.000991220822705755, + "loss": 3.6847, + "step": 613 + }, + { + "epoch": 0.18, + "learning_rate": 0.000991177001665926, + "loss": 3.5346, + "step": 614 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009911330725058895, + "loss": 3.6991, + "step": 615 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009910890352353154, + "loss": 3.5078, + "step": 616 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009910448898638971, + "loss": 3.6742, + "step": 617 + }, + { + "epoch": 0.18, + "learning_rate": 0.000991000636401352, + "loss": 3.5215, + "step": 618 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009909562748574218, + "loss": 3.6943, + "step": 619 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009909118052418716, + "loss": 3.5329, + "step": 620 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009908672275644897, + "loss": 3.6148, + "step": 621 + }, + { + "epoch": 0.18, + "learning_rate": 0.000990822541835089, + "loss": 3.6845, + "step": 622 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009907777480635063, + "loss": 3.6781, + "step": 623 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009907328462596015, + "loss": 3.6402, + "step": 624 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009906878364332585, + "loss": 3.7434, + "step": 625 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009906427185943853, + "loss": 3.6494, + "step": 626 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009905974927529135, + "loss": 3.4317, + "step": 627 + }, + { + "epoch": 0.18, + "learning_rate": 0.000990552158918798, + "loss": 3.5232, + "step": 628 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009905067171020185, + "loss": 3.5147, + "step": 629 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009904611673125774, + "loss": 3.5913, + "step": 630 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009904155095605013, + "loss": 3.6906, + "step": 631 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009903697438558411, + "loss": 3.6337, + "step": 632 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009903238702086705, + "loss": 3.5287, + "step": 633 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009902778886290878, + "loss": 3.5244, + "step": 634 + }, + { + "epoch": 0.18, + "learning_rate": 0.000990231799127214, + "loss": 3.744, + "step": 635 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009901856017131953, + "loss": 3.5939, + "step": 636 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009901392963972007, + "loss": 3.6093, + "step": 637 + }, + { + "epoch": 0.18, + "learning_rate": 0.000990092883189423, + "loss": 3.568, + "step": 638 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009900463621000787, + "loss": 3.6674, + "step": 639 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009899997331394089, + "loss": 3.6506, + "step": 640 + }, + { + "epoch": 0.19, + "learning_rate": 0.000989952996317677, + "loss": 3.7021, + "step": 641 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009899061516451715, + "loss": 3.5932, + "step": 642 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009898591991322036, + "loss": 3.5443, + "step": 643 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009898121387891092, + "loss": 3.6242, + "step": 644 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009897649706262473, + "loss": 3.6105, + "step": 645 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009897176946540005, + "loss": 3.6434, + "step": 646 + }, + { + "epoch": 0.19, + "learning_rate": 0.000989670310882776, + "loss": 3.6683, + "step": 647 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009896228193230032, + "loss": 3.6425, + "step": 648 + }, + { + "epoch": 0.19, + "learning_rate": 0.000989575219985137, + "loss": 3.7033, + "step": 649 + }, + { + "epoch": 0.19, + "learning_rate": 0.000989527512879655, + "loss": 3.5329, + "step": 650 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009894796980170584, + "loss": 3.6693, + "step": 651 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009894317754078727, + "loss": 3.6757, + "step": 652 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009893837450626471, + "loss": 3.5769, + "step": 653 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009893356069919537, + "loss": 3.5945, + "step": 654 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009892873612063894, + "loss": 3.5734, + "step": 655 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009892390077165737, + "loss": 3.4647, + "step": 656 + }, + { + "epoch": 0.19, + "learning_rate": 0.000989190546533151, + "loss": 3.5565, + "step": 657 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009891419776667885, + "loss": 3.5983, + "step": 658 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009890933011281774, + "loss": 3.6734, + "step": 659 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009890445169280328, + "loss": 3.5085, + "step": 660 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009889956250770933, + "loss": 3.5911, + "step": 661 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009889466255861208, + "loss": 3.6021, + "step": 662 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009888975184659018, + "loss": 3.4324, + "step": 663 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009888483037272455, + "loss": 3.705, + "step": 664 + }, + { + "epoch": 0.19, + "learning_rate": 0.000988798981380986, + "loss": 3.5961, + "step": 665 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009887495514379796, + "loss": 3.5768, + "step": 666 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009887000139091073, + "loss": 3.6342, + "step": 667 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009886503688052739, + "loss": 3.5585, + "step": 668 + }, + { + "epoch": 0.19, + "learning_rate": 0.000988600616137407, + "loss": 3.5758, + "step": 669 + }, + { + "epoch": 0.19, + "learning_rate": 0.000988550755916459, + "loss": 3.5678, + "step": 670 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009885007881534045, + "loss": 3.5479, + "step": 671 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009884507128592435, + "loss": 3.6072, + "step": 672 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009884005300449983, + "loss": 3.6328, + "step": 673 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009883502397217156, + "loss": 3.5589, + "step": 674 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009882998419004653, + "loss": 3.5256, + "step": 675 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009882493365923415, + "loss": 3.5706, + "step": 676 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009881987238084615, + "loss": 3.4686, + "step": 677 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009881480035599667, + "loss": 3.5833, + "step": 678 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009880971758580216, + "loss": 3.5522, + "step": 679 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009880462407138144, + "loss": 3.6313, + "step": 680 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009879951981385578, + "loss": 3.4857, + "step": 681 + }, + { + "epoch": 0.2, + "learning_rate": 0.000987944048143487, + "loss": 3.7416, + "step": 682 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009878927907398616, + "loss": 3.517, + "step": 683 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009878414259389646, + "loss": 3.4619, + "step": 684 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009877899537521027, + "loss": 3.6346, + "step": 685 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009877383741906062, + "loss": 3.6255, + "step": 686 + }, + { + "epoch": 0.2, + "learning_rate": 0.000987686687265829, + "loss": 3.5642, + "step": 687 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009876348929891486, + "loss": 3.474, + "step": 688 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009875829913719664, + "loss": 3.5055, + "step": 689 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009875309824257071, + "loss": 3.5864, + "step": 690 + }, + { + "epoch": 0.2, + "learning_rate": 0.000987478866161819, + "loss": 3.4751, + "step": 691 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009874266425917745, + "loss": 3.6781, + "step": 692 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009873743117270691, + "loss": 3.6705, + "step": 693 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009873218735792222, + "loss": 3.5399, + "step": 694 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009872693281597767, + "loss": 3.4929, + "step": 695 + }, + { + "epoch": 0.2, + "learning_rate": 0.000987216675480299, + "loss": 3.5202, + "step": 696 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009871639155523795, + "loss": 3.7003, + "step": 697 + }, + { + "epoch": 0.2, + "learning_rate": 0.000987111048387632, + "loss": 3.5224, + "step": 698 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009870580739976935, + "loss": 3.4661, + "step": 699 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009870049923942253, + "loss": 3.5702, + "step": 700 + }, + { + "epoch": 0.2, + "learning_rate": 0.000986951803588912, + "loss": 3.6086, + "step": 701 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009868985075934616, + "loss": 3.6399, + "step": 702 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009868451044196059, + "loss": 3.6272, + "step": 703 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009867915940791, + "loss": 3.6246, + "step": 704 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009867379765837235, + "loss": 3.4941, + "step": 705 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009866842519452785, + "loss": 3.6187, + "step": 706 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009866304201755912, + "loss": 3.6571, + "step": 707 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009865764812865112, + "loss": 3.467, + "step": 708 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009865224352899118, + "loss": 3.5198, + "step": 709 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009864682821976902, + "loss": 3.5924, + "step": 710 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009864140220217665, + "loss": 3.6005, + "step": 711 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009863596547740846, + "loss": 3.5977, + "step": 712 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009863051804666124, + "loss": 3.4406, + "step": 713 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009862505991113408, + "loss": 3.5955, + "step": 714 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009861959107202846, + "loss": 3.7426, + "step": 715 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009861411153054822, + "loss": 3.5773, + "step": 716 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009860862128789954, + "loss": 3.5234, + "step": 717 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009860312034529093, + "loss": 3.561, + "step": 718 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009859760870393332, + "loss": 3.615, + "step": 719 + }, + { + "epoch": 0.21, + "learning_rate": 0.000985920863650399, + "loss": 3.5786, + "step": 720 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009858655332982632, + "loss": 3.5686, + "step": 721 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009858100959951057, + "loss": 3.6361, + "step": 722 + }, + { + "epoch": 0.21, + "learning_rate": 0.000985754551753129, + "loss": 3.6035, + "step": 723 + }, + { + "epoch": 0.21, + "learning_rate": 0.00098569890058456, + "loss": 3.4891, + "step": 724 + }, + { + "epoch": 0.21, + "learning_rate": 0.000985643142501649, + "loss": 3.5108, + "step": 725 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009855872775166696, + "loss": 3.5662, + "step": 726 + }, + { + "epoch": 0.21, + "learning_rate": 0.000985531305641919, + "loss": 3.5337, + "step": 727 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009854752268897181, + "loss": 3.5486, + "step": 728 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009854190412724112, + "loss": 3.5992, + "step": 729 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009853627488023663, + "loss": 3.5737, + "step": 730 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009853063494919745, + "loss": 3.6321, + "step": 731 + }, + { + "epoch": 0.21, + "learning_rate": 0.000985249843353651, + "loss": 3.6426, + "step": 732 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009851932303998336, + "loss": 3.61, + "step": 733 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009851365106429849, + "loss": 3.6078, + "step": 734 + }, + { + "epoch": 0.21, + "learning_rate": 0.00098507968409559, + "loss": 3.5538, + "step": 735 + }, + { + "epoch": 0.21, + "learning_rate": 0.000985022750770158, + "loss": 3.4704, + "step": 736 + }, + { + "epoch": 0.21, + "learning_rate": 0.000984965710679221, + "loss": 3.5393, + "step": 737 + }, + { + "epoch": 0.21, + "learning_rate": 0.000984908563835335, + "loss": 3.6422, + "step": 738 + }, + { + "epoch": 0.21, + "learning_rate": 0.00098485131025108, + "loss": 3.4752, + "step": 739 + }, + { + "epoch": 0.21, + "learning_rate": 0.000984793949939058, + "loss": 3.53, + "step": 740 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009847364829118962, + "loss": 3.5809, + "step": 741 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009846789091822441, + "loss": 3.6176, + "step": 742 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009846212287627754, + "loss": 3.5731, + "step": 743 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009845634416661866, + "loss": 3.6697, + "step": 744 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009845055479051985, + "loss": 3.6432, + "step": 745 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009844475474925548, + "loss": 3.4783, + "step": 746 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009843894404410224, + "loss": 3.5522, + "step": 747 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009843312267633928, + "loss": 3.6019, + "step": 748 + }, + { + "epoch": 0.22, + "learning_rate": 0.00098427290647248, + "loss": 3.5303, + "step": 749 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009842144795811215, + "loss": 3.6123, + "step": 750 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009841559461021786, + "loss": 3.5729, + "step": 751 + }, + { + "epoch": 0.22, + "learning_rate": 0.000984097306048536, + "loss": 3.4976, + "step": 752 + }, + { + "epoch": 0.22, + "learning_rate": 0.000984038559433102, + "loss": 3.5393, + "step": 753 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009839797062688083, + "loss": 3.6539, + "step": 754 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009839207465686093, + "loss": 3.5819, + "step": 755 + }, + { + "epoch": 0.22, + "learning_rate": 0.000983861680345484, + "loss": 3.5266, + "step": 756 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009838025076124345, + "loss": 3.6981, + "step": 757 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009837432283824855, + "loss": 3.6821, + "step": 758 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009836838426686863, + "loss": 3.4628, + "step": 759 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009836243504841092, + "loss": 3.6034, + "step": 760 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009835647518418498, + "loss": 3.6074, + "step": 761 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009835050467550273, + "loss": 3.6137, + "step": 762 + }, + { + "epoch": 0.22, + "learning_rate": 0.000983445235236784, + "loss": 3.4411, + "step": 763 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009833853173002861, + "loss": 3.5851, + "step": 764 + }, + { + "epoch": 0.22, + "learning_rate": 0.000983325292958723, + "loss": 3.4749, + "step": 765 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009832651622253077, + "loss": 3.5695, + "step": 766 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009832049251132762, + "loss": 3.6013, + "step": 767 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009831445816358884, + "loss": 3.5717, + "step": 768 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009830841318064273, + "loss": 3.6076, + "step": 769 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009830235756381992, + "loss": 3.499, + "step": 770 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009829629131445341, + "loss": 3.4922, + "step": 771 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009829021443387856, + "loss": 3.6222, + "step": 772 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009828412692343303, + "loss": 3.3502, + "step": 773 + }, + { + "epoch": 0.22, + "learning_rate": 0.000982780287844568, + "loss": 3.351, + "step": 774 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009827192001829227, + "loss": 3.5911, + "step": 775 + }, + { + "epoch": 0.22, + "learning_rate": 0.000982658006262841, + "loss": 3.5716, + "step": 776 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009825967060977932, + "loss": 3.5324, + "step": 777 + }, + { + "epoch": 0.23, + "learning_rate": 0.000982535299701273, + "loss": 3.6293, + "step": 778 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009824737870867976, + "loss": 3.5073, + "step": 779 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009824121682679072, + "loss": 3.5074, + "step": 780 + }, + { + "epoch": 0.23, + "learning_rate": 0.000982350443258166, + "loss": 3.5036, + "step": 781 + }, + { + "epoch": 0.23, + "learning_rate": 0.000982288612071161, + "loss": 3.5905, + "step": 782 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009822266747205024, + "loss": 3.5023, + "step": 783 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009821646312198249, + "loss": 3.5743, + "step": 784 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009821024815827853, + "loss": 3.5681, + "step": 785 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009820402258230642, + "loss": 3.6316, + "step": 786 + }, + { + "epoch": 0.23, + "learning_rate": 0.000981977863954366, + "loss": 3.5863, + "step": 787 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009819153959904178, + "loss": 3.6011, + "step": 788 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009818528219449705, + "loss": 3.5443, + "step": 789 + }, + { + "epoch": 0.23, + "learning_rate": 0.000981790141831798, + "loss": 3.5701, + "step": 790 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009817273556646982, + "loss": 3.5753, + "step": 791 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009816644634574913, + "loss": 3.549, + "step": 792 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009816014652240218, + "loss": 3.5605, + "step": 793 + }, + { + "epoch": 0.23, + "learning_rate": 0.000981538360978157, + "loss": 3.601, + "step": 794 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009814751507337877, + "loss": 3.5687, + "step": 795 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009814118345048285, + "loss": 3.6279, + "step": 796 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009813484123052162, + "loss": 3.5233, + "step": 797 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009812848841489119, + "loss": 3.4506, + "step": 798 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009812212500498996, + "loss": 3.6081, + "step": 799 + }, + { + "epoch": 0.23, + "learning_rate": 0.000981157510022187, + "loss": 3.6848, + "step": 800 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009810936640798045, + "loss": 3.4994, + "step": 801 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009810297122368067, + "loss": 3.6364, + "step": 802 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009809656545072703, + "loss": 3.4953, + "step": 803 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009809014909052966, + "loss": 3.5484, + "step": 804 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009808372214450093, + "loss": 3.5691, + "step": 805 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009807728461405556, + "loss": 3.5722, + "step": 806 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009807083650061063, + "loss": 3.5811, + "step": 807 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009806437780558552, + "loss": 3.6131, + "step": 808 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009805790853040196, + "loss": 3.5346, + "step": 809 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009805142867648398, + "loss": 3.6232, + "step": 810 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009804493824525796, + "loss": 3.5237, + "step": 811 + }, + { + "epoch": 0.24, + "learning_rate": 0.0009803843723815262, + "loss": 3.6583, + "step": 812 + }, + { + "epoch": 0.24, + "learning_rate": 0.0009803192565659897, + "loss": 3.5203, + "step": 813 + }, + { + "epoch": 0.24, + "learning_rate": 0.0009802540350203038, + "loss": 3.5106, + "step": 814 + }, + { + "epoch": 0.24, + "learning_rate": 0.0009801887077588254, + "loss": 3.6142, + "step": 815 + }, + { + "epoch": 0.24, + "learning_rate": 0.0009801232747959348, + "loss": 3.4449, + "step": 816 + }, + { + "epoch": 0.24, + "learning_rate": 0.000980057736146035, + "loss": 3.4883, + "step": 817 + }, + { + "epoch": 0.24, + "learning_rate": 0.0009799920918235532, + "loss": 3.4582, + "step": 818 + }, + { + "epoch": 0.24, + "learning_rate": 0.000979926341842939, + "loss": 3.638, + "step": 819 + }, + { + "epoch": 0.24, + "learning_rate": 0.0009798604862186655, + "loss": 3.4349, + "step": 820 + }, + { + "epoch": 0.24, + "learning_rate": 0.0009797945249652295, + "loss": 3.5413, + "step": 821 + }, + { + "epoch": 0.24, + "learning_rate": 0.0009797284580971503, + "loss": 3.5257, + "step": 822 + }, + { + "epoch": 0.24, + "learning_rate": 0.0009796622856289713, + "loss": 3.4987, + "step": 823 + }, + { + "epoch": 0.24, + "learning_rate": 0.0009795960075752585, + "loss": 3.6352, + "step": 824 + }, + { + "epoch": 0.24, + "learning_rate": 0.000979529623950601, + "loss": 3.5039, + "step": 825 + }, + { + "epoch": 0.24, + "learning_rate": 0.0009794631347696122, + "loss": 3.3518, + "step": 826 + }, + { + "epoch": 0.24, + "learning_rate": 0.0009793965400469273, + "loss": 3.6784, + "step": 827 + }, + { + "epoch": 0.24, + "learning_rate": 0.0009793298397972057, + "loss": 3.5734, + "step": 828 + }, + { + "epoch": 0.24, + "learning_rate": 0.0009792630340351301, + "loss": 3.5829, + "step": 829 + }, + { + "epoch": 0.24, + "learning_rate": 0.0009791961227754056, + "loss": 3.5014, + "step": 830 + }, + { + "epoch": 0.24, + "learning_rate": 0.0009791291060327612, + "loss": 3.5623, + "step": 831 + }, + { + "epoch": 0.24, + "learning_rate": 0.000979061983821949, + "loss": 3.6146, + "step": 832 + }, + { + "epoch": 0.24, + "learning_rate": 0.0009789947561577445, + "loss": 3.5619, + "step": 833 + }, + { + "epoch": 0.24, + "learning_rate": 0.0009789274230549456, + "loss": 3.4901, + "step": 834 + }, + { + "epoch": 0.24, + "learning_rate": 0.0009788599845283745, + "loss": 3.5296, + "step": 835 + }, + { + "epoch": 0.24, + "learning_rate": 0.0009787924405928758, + "loss": 3.4983, + "step": 836 + }, + { + "epoch": 0.24, + "learning_rate": 0.0009787247912633177, + "loss": 3.4982, + "step": 837 + }, + { + "epoch": 0.24, + "learning_rate": 0.0009786570365545916, + "loss": 3.5671, + "step": 838 + }, + { + "epoch": 0.24, + "learning_rate": 0.0009785891764816116, + "loss": 3.4727, + "step": 839 + }, + { + "epoch": 0.24, + "learning_rate": 0.0009785212110593158, + "loss": 3.4206, + "step": 840 + }, + { + "epoch": 0.24, + "learning_rate": 0.0009784531403026649, + "loss": 3.5059, + "step": 841 + }, + { + "epoch": 0.24, + "learning_rate": 0.000978384964226643, + "loss": 3.6528, + "step": 842 + }, + { + "epoch": 0.24, + "learning_rate": 0.0009783166828462573, + "loss": 3.56, + "step": 843 + }, + { + "epoch": 0.24, + "learning_rate": 0.0009782482961765383, + "loss": 3.4675, + "step": 844 + }, + { + "epoch": 0.24, + "learning_rate": 0.000978179804232539, + "loss": 3.5227, + "step": 845 + }, + { + "epoch": 0.25, + "learning_rate": 0.0009781112070293373, + "loss": 3.5736, + "step": 846 + }, + { + "epoch": 0.25, + "learning_rate": 0.0009780425045820323, + "loss": 3.658, + "step": 847 + }, + { + "epoch": 0.25, + "learning_rate": 0.0009779736969057473, + "loss": 3.6315, + "step": 848 + }, + { + "epoch": 0.25, + "learning_rate": 0.0009779047840156288, + "loss": 3.5286, + "step": 849 + }, + { + "epoch": 0.25, + "learning_rate": 0.000977835765926846, + "loss": 3.5504, + "step": 850 + }, + { + "epoch": 0.25, + "learning_rate": 0.0009777666426545916, + "loss": 3.5619, + "step": 851 + }, + { + "epoch": 0.25, + "learning_rate": 0.000977697414214081, + "loss": 3.4639, + "step": 852 + }, + { + "epoch": 0.25, + "learning_rate": 0.0009776280806205536, + "loss": 3.585, + "step": 853 + }, + { + "epoch": 0.25, + "learning_rate": 0.000977558641889271, + "loss": 3.6519, + "step": 854 + }, + { + "epoch": 0.25, + "learning_rate": 0.0009774890980355187, + "loss": 3.4163, + "step": 855 + }, + { + "epoch": 0.25, + "learning_rate": 0.000977419449074605, + "loss": 3.5057, + "step": 856 + }, + { + "epoch": 0.25, + "learning_rate": 0.0009773496950218612, + "loss": 3.5909, + "step": 857 + }, + { + "epoch": 0.25, + "learning_rate": 0.000977279835892642, + "loss": 3.6187, + "step": 858 + }, + { + "epoch": 0.25, + "learning_rate": 0.000977209871702325, + "loss": 3.5689, + "step": 859 + }, + { + "epoch": 0.25, + "learning_rate": 0.0009771398024663112, + "loss": 3.5449, + "step": 860 + }, + { + "epoch": 0.25, + "learning_rate": 0.0009770696282000244, + "loss": 3.5003, + "step": 861 + }, + { + "epoch": 0.25, + "learning_rate": 0.0009769993489189119, + "loss": 3.5458, + "step": 862 + }, + { + "epoch": 0.25, + "learning_rate": 0.0009769289646384437, + "loss": 3.4318, + "step": 863 + }, + { + "epoch": 0.25, + "learning_rate": 0.0009768584753741135, + "loss": 3.5175, + "step": 864 + }, + { + "epoch": 0.25, + "learning_rate": 0.0009767878811414372, + "loss": 3.5169, + "step": 865 + }, + { + "epoch": 0.25, + "learning_rate": 0.0009767171819559549, + "loss": 3.5585, + "step": 866 + }, + { + "epoch": 0.25, + "learning_rate": 0.0009766463778332285, + "loss": 3.4212, + "step": 867 + }, + { + "epoch": 0.25, + "learning_rate": 0.0009765754687888445, + "loss": 3.5118, + "step": 868 + }, + { + "epoch": 0.25, + "learning_rate": 0.0009765044548384113, + "loss": 3.5994, + "step": 869 + }, + { + "epoch": 0.25, + "learning_rate": 0.000976433335997561, + "loss": 3.4767, + "step": 870 + }, + { + "epoch": 0.25, + "learning_rate": 0.0009763621122819484, + "loss": 3.5178, + "step": 871 + }, + { + "epoch": 0.25, + "learning_rate": 0.0009762907837072519, + "loss": 3.5422, + "step": 872 + }, + { + "epoch": 0.25, + "learning_rate": 0.0009762193502891725, + "loss": 3.4347, + "step": 873 + }, + { + "epoch": 0.25, + "learning_rate": 0.0009761478120434345, + "loss": 3.4119, + "step": 874 + }, + { + "epoch": 0.25, + "learning_rate": 0.0009760761689857852, + "loss": 3.6649, + "step": 875 + }, + { + "epoch": 0.25, + "learning_rate": 0.0009760044211319952, + "loss": 3.6256, + "step": 876 + }, + { + "epoch": 0.25, + "learning_rate": 0.0009759325684978576, + "loss": 3.4753, + "step": 877 + }, + { + "epoch": 0.25, + "learning_rate": 0.0009758606110991892, + "loss": 3.3769, + "step": 878 + }, + { + "epoch": 0.25, + "learning_rate": 0.0009757885489518296, + "loss": 3.5747, + "step": 879 + }, + { + "epoch": 0.25, + "learning_rate": 0.0009757163820716416, + "loss": 3.5021, + "step": 880 + }, + { + "epoch": 0.26, + "learning_rate": 0.0009756441104745106, + "loss": 3.6369, + "step": 881 + }, + { + "epoch": 0.26, + "learning_rate": 0.0009755717341763455, + "loss": 3.5371, + "step": 882 + }, + { + "epoch": 0.26, + "learning_rate": 0.0009754992531930782, + "loss": 3.5898, + "step": 883 + }, + { + "epoch": 0.26, + "learning_rate": 0.0009754266675406634, + "loss": 3.59, + "step": 884 + }, + { + "epoch": 0.26, + "learning_rate": 0.0009753539772350791, + "loss": 3.4418, + "step": 885 + }, + { + "epoch": 0.26, + "learning_rate": 0.0009752811822923262, + "loss": 3.5543, + "step": 886 + }, + { + "epoch": 0.26, + "learning_rate": 0.0009752082827284288, + "loss": 3.6001, + "step": 887 + }, + { + "epoch": 0.26, + "learning_rate": 0.0009751352785594337, + "loss": 3.503, + "step": 888 + }, + { + "epoch": 0.26, + "learning_rate": 0.0009750621698014111, + "loss": 3.4875, + "step": 889 + }, + { + "epoch": 0.26, + "learning_rate": 0.0009749889564704537, + "loss": 3.5998, + "step": 890 + }, + { + "epoch": 0.26, + "learning_rate": 0.0009749156385826781, + "loss": 3.4877, + "step": 891 + }, + { + "epoch": 0.26, + "learning_rate": 0.0009748422161542228, + "loss": 3.4938, + "step": 892 + }, + { + "epoch": 0.26, + "learning_rate": 0.0009747686892012504, + "loss": 3.4152, + "step": 893 + }, + { + "epoch": 0.26, + "learning_rate": 0.0009746950577399458, + "loss": 3.5186, + "step": 894 + }, + { + "epoch": 0.26, + "learning_rate": 0.000974621321786517, + "loss": 3.5221, + "step": 895 + }, + { + "epoch": 0.26, + "learning_rate": 0.0009745474813571953, + "loss": 3.5841, + "step": 896 + }, + { + "epoch": 0.26, + "learning_rate": 0.0009744735364682345, + "loss": 3.7238, + "step": 897 + }, + { + "epoch": 0.26, + "learning_rate": 0.000974399487135912, + "loss": 3.6349, + "step": 898 + }, + { + "epoch": 0.26, + "learning_rate": 0.0009743253333765279, + "loss": 3.5319, + "step": 899 + }, + { + "epoch": 0.26, + "learning_rate": 0.0009742510752064051, + "loss": 3.4815, + "step": 900 + }, + { + "epoch": 0.26, + "learning_rate": 0.0009741767126418898, + "loss": 3.452, + "step": 901 + }, + { + "epoch": 0.26, + "learning_rate": 0.0009741022456993509, + "loss": 3.6086, + "step": 902 + }, + { + "epoch": 0.26, + "learning_rate": 0.0009740276743951806, + "loss": 3.43, + "step": 903 + }, + { + "epoch": 0.26, + "learning_rate": 0.0009739529987457936, + "loss": 3.5281, + "step": 904 + }, + { + "epoch": 0.26, + "learning_rate": 0.0009738782187676281, + "loss": 3.3784, + "step": 905 + }, + { + "epoch": 0.26, + "learning_rate": 0.000973803334477145, + "loss": 3.445, + "step": 906 + }, + { + "epoch": 0.26, + "learning_rate": 0.0009737283458908281, + "loss": 3.6006, + "step": 907 + }, + { + "epoch": 0.26, + "learning_rate": 0.0009736532530251842, + "loss": 3.525, + "step": 908 + }, + { + "epoch": 0.26, + "learning_rate": 0.0009735780558967434, + "loss": 3.5538, + "step": 909 + }, + { + "epoch": 0.26, + "learning_rate": 0.000973502754522058, + "loss": 3.4658, + "step": 910 + }, + { + "epoch": 0.26, + "learning_rate": 0.0009734273489177041, + "loss": 3.6239, + "step": 911 + }, + { + "epoch": 0.26, + "learning_rate": 0.0009733518391002803, + "loss": 3.3687, + "step": 912 + }, + { + "epoch": 0.26, + "learning_rate": 0.0009732762250864078, + "loss": 3.5682, + "step": 913 + }, + { + "epoch": 0.26, + "learning_rate": 0.0009732005068927314, + "loss": 3.526, + "step": 914 + }, + { + "epoch": 0.26, + "learning_rate": 0.0009731246845359185, + "loss": 3.6061, + "step": 915 + }, + { + "epoch": 0.27, + "learning_rate": 0.0009730487580326594, + "loss": 3.4868, + "step": 916 + }, + { + "epoch": 0.27, + "learning_rate": 0.0009729727273996675, + "loss": 3.4171, + "step": 917 + }, + { + "epoch": 0.27, + "learning_rate": 0.0009728965926536793, + "loss": 3.4232, + "step": 918 + }, + { + "epoch": 0.27, + "learning_rate": 0.0009728203538114533, + "loss": 3.288, + "step": 919 + }, + { + "epoch": 0.27, + "learning_rate": 0.000972744010889772, + "loss": 3.5914, + "step": 920 + }, + { + "epoch": 0.27, + "learning_rate": 0.0009726675639054403, + "loss": 3.5839, + "step": 921 + }, + { + "epoch": 0.27, + "learning_rate": 0.0009725910128752861, + "loss": 3.5542, + "step": 922 + }, + { + "epoch": 0.27, + "learning_rate": 0.00097251435781616, + "loss": 3.4124, + "step": 923 + }, + { + "epoch": 0.27, + "learning_rate": 0.0009724375987449358, + "loss": 3.383, + "step": 924 + }, + { + "epoch": 0.27, + "learning_rate": 0.0009723607356785102, + "loss": 3.4318, + "step": 925 + }, + { + "epoch": 0.27, + "learning_rate": 0.0009722837686338024, + "loss": 3.5786, + "step": 926 + }, + { + "epoch": 0.27, + "learning_rate": 0.0009722066976277548, + "loss": 3.371, + "step": 927 + }, + { + "epoch": 0.27, + "learning_rate": 0.0009721295226773329, + "loss": 3.5054, + "step": 928 + }, + { + "epoch": 0.27, + "learning_rate": 0.0009720522437995245, + "loss": 3.5444, + "step": 929 + }, + { + "epoch": 0.27, + "learning_rate": 0.0009719748610113408, + "loss": 3.4402, + "step": 930 + }, + { + "epoch": 0.27, + "learning_rate": 0.0009718973743298154, + "loss": 3.4786, + "step": 931 + }, + { + "epoch": 0.27, + "learning_rate": 0.0009718197837720053, + "loss": 3.5061, + "step": 932 + }, + { + "epoch": 0.27, + "learning_rate": 0.0009717420893549903, + "loss": 3.627, + "step": 933 + }, + { + "epoch": 0.27, + "learning_rate": 0.0009716642910958721, + "loss": 3.6156, + "step": 934 + }, + { + "epoch": 0.27, + "learning_rate": 0.0009715863890117769, + "loss": 3.5177, + "step": 935 + }, + { + "epoch": 0.27, + "learning_rate": 0.0009715083831198521, + "loss": 3.5053, + "step": 936 + }, + { + "epoch": 0.27, + "learning_rate": 0.0009714302734372694, + "loss": 3.4878, + "step": 937 + }, + { + "epoch": 0.27, + "learning_rate": 0.0009713520599812222, + "loss": 3.4091, + "step": 938 + }, + { + "epoch": 0.27, + "learning_rate": 0.0009712737427689276, + "loss": 3.497, + "step": 939 + }, + { + "epoch": 0.27, + "learning_rate": 0.0009711953218176249, + "loss": 3.4804, + "step": 940 + }, + { + "epoch": 0.27, + "learning_rate": 0.0009711167971445764, + "loss": 3.4217, + "step": 941 + }, + { + "epoch": 0.27, + "learning_rate": 0.0009710381687670675, + "loss": 3.5062, + "step": 942 + }, + { + "epoch": 0.27, + "learning_rate": 0.0009709594367024062, + "loss": 3.5981, + "step": 943 + }, + { + "epoch": 0.27, + "learning_rate": 0.0009708806009679235, + "loss": 3.6186, + "step": 944 + }, + { + "epoch": 0.27, + "learning_rate": 0.0009708016615809729, + "loss": 3.4613, + "step": 945 + }, + { + "epoch": 0.27, + "learning_rate": 0.0009707226185589311, + "loss": 3.5123, + "step": 946 + }, + { + "epoch": 0.27, + "learning_rate": 0.0009706434719191972, + "loss": 3.5741, + "step": 947 + }, + { + "epoch": 0.27, + "learning_rate": 0.0009705642216791935, + "loss": 3.4768, + "step": 948 + }, + { + "epoch": 0.27, + "learning_rate": 0.0009704848678563648, + "loss": 3.5631, + "step": 949 + }, + { + "epoch": 0.28, + "learning_rate": 0.000970405410468179, + "loss": 3.5149, + "step": 950 + }, + { + "epoch": 0.28, + "learning_rate": 0.0009703258495321266, + "loss": 3.5053, + "step": 951 + }, + { + "epoch": 0.28, + "learning_rate": 0.0009702461850657208, + "loss": 3.606, + "step": 952 + }, + { + "epoch": 0.28, + "learning_rate": 0.0009701664170864979, + "loss": 3.5679, + "step": 953 + }, + { + "epoch": 0.28, + "learning_rate": 0.0009700865456120167, + "loss": 3.5625, + "step": 954 + }, + { + "epoch": 0.28, + "learning_rate": 0.0009700065706598588, + "loss": 3.5396, + "step": 955 + }, + { + "epoch": 0.28, + "learning_rate": 0.0009699264922476289, + "loss": 3.5066, + "step": 956 + }, + { + "epoch": 0.28, + "learning_rate": 0.0009698463103929542, + "loss": 3.4495, + "step": 957 + }, + { + "epoch": 0.28, + "learning_rate": 0.0009697660251134846, + "loss": 3.5757, + "step": 958 + }, + { + "epoch": 0.28, + "learning_rate": 0.0009696856364268928, + "loss": 3.4091, + "step": 959 + }, + { + "epoch": 0.28, + "learning_rate": 0.0009696051443508745, + "loss": 3.3879, + "step": 960 + }, + { + "epoch": 0.28, + "learning_rate": 0.0009695245489031478, + "loss": 3.5407, + "step": 961 + }, + { + "epoch": 0.28, + "learning_rate": 0.0009694438501014539, + "loss": 3.5848, + "step": 962 + }, + { + "epoch": 0.28, + "learning_rate": 0.0009693630479635566, + "loss": 3.4625, + "step": 963 + }, + { + "epoch": 0.28, + "learning_rate": 0.0009692821425072427, + "loss": 3.4761, + "step": 964 + }, + { + "epoch": 0.28, + "learning_rate": 0.0009692011337503211, + "loss": 3.5363, + "step": 965 + }, + { + "epoch": 0.28, + "learning_rate": 0.0009691200217106237, + "loss": 3.4911, + "step": 966 + }, + { + "epoch": 0.28, + "learning_rate": 0.0009690388064060059, + "loss": 3.5524, + "step": 967 + }, + { + "epoch": 0.28, + "learning_rate": 0.0009689574878543446, + "loss": 3.5366, + "step": 968 + }, + { + "epoch": 0.28, + "learning_rate": 0.0009688760660735403, + "loss": 3.5017, + "step": 969 + }, + { + "epoch": 0.28, + "learning_rate": 0.0009687945410815158, + "loss": 3.4439, + "step": 970 + }, + { + "epoch": 0.28, + "learning_rate": 0.000968712912896217, + "loss": 3.6677, + "step": 971 + }, + { + "epoch": 0.28, + "learning_rate": 0.0009686311815356121, + "loss": 3.5207, + "step": 972 + }, + { + "epoch": 0.28, + "learning_rate": 0.0009685493470176922, + "loss": 3.6109, + "step": 973 + }, + { + "epoch": 0.28, + "learning_rate": 0.0009684674093604713, + "loss": 3.4524, + "step": 974 + }, + { + "epoch": 0.28, + "learning_rate": 0.0009683853685819856, + "loss": 3.4903, + "step": 975 + }, + { + "epoch": 0.28, + "learning_rate": 0.0009683032247002945, + "loss": 3.5888, + "step": 976 + }, + { + "epoch": 0.28, + "learning_rate": 0.0009682209777334798, + "loss": 3.5445, + "step": 977 + }, + { + "epoch": 0.28, + "learning_rate": 0.0009681386276996462, + "loss": 3.5005, + "step": 978 + }, + { + "epoch": 0.28, + "learning_rate": 0.000968056174616921, + "loss": 3.4943, + "step": 979 + }, + { + "epoch": 0.28, + "learning_rate": 0.0009679736185034539, + "loss": 3.5839, + "step": 980 + }, + { + "epoch": 0.28, + "learning_rate": 0.0009678909593774179, + "loss": 3.5211, + "step": 981 + }, + { + "epoch": 0.28, + "learning_rate": 0.000967808197257008, + "loss": 3.3511, + "step": 982 + }, + { + "epoch": 0.28, + "learning_rate": 0.0009677253321604425, + "loss": 3.5873, + "step": 983 + }, + { + "epoch": 0.28, + "learning_rate": 0.0009676423641059617, + "loss": 3.4395, + "step": 984 + }, + { + "epoch": 0.29, + "learning_rate": 0.0009675592931118293, + "loss": 3.6192, + "step": 985 + }, + { + "epoch": 0.29, + "learning_rate": 0.0009674761191963311, + "loss": 3.5369, + "step": 986 + }, + { + "epoch": 0.29, + "learning_rate": 0.0009673928423777756, + "loss": 3.5729, + "step": 987 + }, + { + "epoch": 0.29, + "learning_rate": 0.0009673094626744943, + "loss": 3.5083, + "step": 988 + }, + { + "epoch": 0.29, + "learning_rate": 0.000967225980104841, + "loss": 3.5692, + "step": 989 + }, + { + "epoch": 0.29, + "learning_rate": 0.0009671423946871924, + "loss": 3.5435, + "step": 990 + }, + { + "epoch": 0.29, + "learning_rate": 0.0009670587064399476, + "loss": 3.4407, + "step": 991 + }, + { + "epoch": 0.29, + "learning_rate": 0.0009669749153815285, + "loss": 3.4946, + "step": 992 + }, + { + "epoch": 0.29, + "learning_rate": 0.0009668910215303797, + "loss": 3.4737, + "step": 993 + }, + { + "epoch": 0.29, + "learning_rate": 0.000966807024904968, + "loss": 3.4256, + "step": 994 + }, + { + "epoch": 0.29, + "learning_rate": 0.0009667229255237835, + "loss": 3.4798, + "step": 995 + }, + { + "epoch": 0.29, + "learning_rate": 0.0009666387234053385, + "loss": 3.4783, + "step": 996 + }, + { + "epoch": 0.29, + "learning_rate": 0.0009665544185681677, + "loss": 3.4432, + "step": 997 + }, + { + "epoch": 0.29, + "learning_rate": 0.0009664700110308287, + "loss": 3.4962, + "step": 998 + }, + { + "epoch": 0.29, + "learning_rate": 0.000966385500811902, + "loss": 3.3048, + "step": 999 + }, + { + "epoch": 0.29, + "learning_rate": 0.0009663008879299902, + "loss": 3.5994, + "step": 1000 + }, + { + "epoch": 0.29, + "learning_rate": 0.0009662161724037187, + "loss": 3.5069, + "step": 1001 + }, + { + "epoch": 0.29, + "learning_rate": 0.0009661313542517354, + "loss": 3.5633, + "step": 1002 + }, + { + "epoch": 0.29, + "learning_rate": 0.0009660464334927112, + "loss": 3.5597, + "step": 1003 + }, + { + "epoch": 0.29, + "learning_rate": 0.0009659614101453389, + "loss": 3.4308, + "step": 1004 + }, + { + "epoch": 0.29, + "learning_rate": 0.0009658762842283342, + "loss": 3.5643, + "step": 1005 + }, + { + "epoch": 0.29, + "learning_rate": 0.0009657910557604358, + "loss": 3.45, + "step": 1006 + }, + { + "epoch": 0.29, + "learning_rate": 0.0009657057247604042, + "loss": 3.5006, + "step": 1007 + }, + { + "epoch": 0.29, + "learning_rate": 0.0009656202912470232, + "loss": 3.5166, + "step": 1008 + }, + { + "epoch": 0.29, + "learning_rate": 0.0009655347552390986, + "loss": 3.5072, + "step": 1009 + }, + { + "epoch": 0.29, + "learning_rate": 0.0009654491167554591, + "loss": 3.4849, + "step": 1010 + }, + { + "epoch": 0.29, + "learning_rate": 0.0009653633758149558, + "loss": 3.4952, + "step": 1011 + }, + { + "epoch": 0.29, + "learning_rate": 0.0009652775324364624, + "loss": 3.4285, + "step": 1012 + }, + { + "epoch": 0.29, + "learning_rate": 0.0009651915866388752, + "loss": 3.617, + "step": 1013 + }, + { + "epoch": 0.29, + "learning_rate": 0.0009651055384411128, + "loss": 3.4667, + "step": 1014 + }, + { + "epoch": 0.29, + "learning_rate": 0.0009650193878621169, + "loss": 3.423, + "step": 1015 + }, + { + "epoch": 0.29, + "learning_rate": 0.000964933134920851, + "loss": 3.5756, + "step": 1016 + }, + { + "epoch": 0.29, + "learning_rate": 0.0009648467796363017, + "loss": 3.5222, + "step": 1017 + }, + { + "epoch": 0.29, + "learning_rate": 0.0009647603220274781, + "loss": 3.5495, + "step": 1018 + }, + { + "epoch": 0.3, + "learning_rate": 0.0009646737621134112, + "loss": 3.4985, + "step": 1019 + }, + { + "epoch": 0.3, + "learning_rate": 0.0009645870999131554, + "loss": 3.5168, + "step": 1020 + }, + { + "epoch": 0.3, + "learning_rate": 0.0009645003354457871, + "loss": 3.4323, + "step": 1021 + }, + { + "epoch": 0.3, + "learning_rate": 0.0009644134687304052, + "loss": 3.4265, + "step": 1022 + }, + { + "epoch": 0.3, + "learning_rate": 0.0009643264997861312, + "loss": 3.584, + "step": 1023 + }, + { + "epoch": 0.3, + "learning_rate": 0.0009642394286321091, + "loss": 3.4916, + "step": 1024 + }, + { + "epoch": 0.3, + "learning_rate": 0.0009641522552875055, + "loss": 3.3122, + "step": 1025 + }, + { + "epoch": 0.3, + "learning_rate": 0.0009640649797715095, + "loss": 3.5789, + "step": 1026 + }, + { + "epoch": 0.3, + "learning_rate": 0.0009639776021033326, + "loss": 3.4873, + "step": 1027 + }, + { + "epoch": 0.3, + "learning_rate": 0.0009638901223022085, + "loss": 3.5818, + "step": 1028 + }, + { + "epoch": 0.3, + "learning_rate": 0.0009638025403873939, + "loss": 3.401, + "step": 1029 + }, + { + "epoch": 0.3, + "learning_rate": 0.0009637148563781677, + "loss": 3.6326, + "step": 1030 + }, + { + "epoch": 0.3, + "learning_rate": 0.0009636270702938313, + "loss": 3.641, + "step": 1031 + }, + { + "epoch": 0.3, + "learning_rate": 0.0009635391821537087, + "loss": 3.5108, + "step": 1032 + }, + { + "epoch": 0.3, + "learning_rate": 0.000963451191977146, + "loss": 3.4928, + "step": 1033 + }, + { + "epoch": 0.3, + "learning_rate": 0.0009633630997835124, + "loss": 3.4518, + "step": 1034 + }, + { + "epoch": 0.3, + "learning_rate": 0.0009632749055921987, + "loss": 3.5628, + "step": 1035 + }, + { + "epoch": 0.3, + "learning_rate": 0.0009631866094226189, + "loss": 3.4975, + "step": 1036 + }, + { + "epoch": 0.3, + "learning_rate": 0.0009630982112942092, + "loss": 3.557, + "step": 1037 + }, + { + "epoch": 0.3, + "learning_rate": 0.0009630097112264283, + "loss": 3.4622, + "step": 1038 + }, + { + "epoch": 0.3, + "learning_rate": 0.0009629211092387569, + "loss": 3.6069, + "step": 1039 + }, + { + "epoch": 0.3, + "learning_rate": 0.0009628324053506988, + "loss": 3.43, + "step": 1040 + }, + { + "epoch": 0.3, + "learning_rate": 0.0009627435995817798, + "loss": 3.4051, + "step": 1041 + }, + { + "epoch": 0.3, + "learning_rate": 0.0009626546919515482, + "loss": 3.4932, + "step": 1042 + }, + { + "epoch": 0.3, + "learning_rate": 0.000962565682479575, + "loss": 3.3411, + "step": 1043 + }, + { + "epoch": 0.3, + "learning_rate": 0.0009624765711854531, + "loss": 3.6247, + "step": 1044 + }, + { + "epoch": 0.3, + "learning_rate": 0.0009623873580887981, + "loss": 3.4938, + "step": 1045 + }, + { + "epoch": 0.3, + "learning_rate": 0.0009622980432092483, + "loss": 3.5675, + "step": 1046 + }, + { + "epoch": 0.3, + "learning_rate": 0.000962208626566464, + "loss": 3.4404, + "step": 1047 + }, + { + "epoch": 0.3, + "learning_rate": 0.0009621191081801277, + "loss": 3.4603, + "step": 1048 + }, + { + "epoch": 0.3, + "learning_rate": 0.0009620294880699449, + "loss": 3.5026, + "step": 1049 + }, + { + "epoch": 0.3, + "learning_rate": 0.0009619397662556434, + "loss": 3.4933, + "step": 1050 + }, + { + "epoch": 0.3, + "learning_rate": 0.0009618499427569729, + "loss": 3.423, + "step": 1051 + }, + { + "epoch": 0.3, + "learning_rate": 0.0009617600175937057, + "loss": 3.5696, + "step": 1052 + }, + { + "epoch": 0.3, + "learning_rate": 0.0009616699907856368, + "loss": 3.4475, + "step": 1053 + }, + { + "epoch": 0.31, + "learning_rate": 0.0009615798623525831, + "loss": 3.3406, + "step": 1054 + }, + { + "epoch": 0.31, + "learning_rate": 0.0009614896323143844, + "loss": 3.5829, + "step": 1055 + }, + { + "epoch": 0.31, + "learning_rate": 0.0009613993006909022, + "loss": 3.5474, + "step": 1056 + }, + { + "epoch": 0.31, + "learning_rate": 0.000961308867502021, + "loss": 3.4575, + "step": 1057 + }, + { + "epoch": 0.31, + "learning_rate": 0.0009612183327676472, + "loss": 3.3231, + "step": 1058 + }, + { + "epoch": 0.31, + "learning_rate": 0.0009611276965077098, + "loss": 3.631, + "step": 1059 + }, + { + "epoch": 0.31, + "learning_rate": 0.0009610369587421603, + "loss": 3.4946, + "step": 1060 + }, + { + "epoch": 0.31, + "learning_rate": 0.0009609461194909719, + "loss": 3.4722, + "step": 1061 + }, + { + "epoch": 0.31, + "learning_rate": 0.0009608551787741408, + "loss": 3.3482, + "step": 1062 + }, + { + "epoch": 0.31, + "learning_rate": 0.0009607641366116855, + "loss": 3.4333, + "step": 1063 + }, + { + "epoch": 0.31, + "learning_rate": 0.0009606729930236463, + "loss": 3.3754, + "step": 1064 + }, + { + "epoch": 0.31, + "learning_rate": 0.0009605817480300863, + "loss": 3.3337, + "step": 1065 + }, + { + "epoch": 0.31, + "learning_rate": 0.0009604904016510907, + "loss": 3.4779, + "step": 1066 + }, + { + "epoch": 0.31, + "learning_rate": 0.0009603989539067673, + "loss": 3.5222, + "step": 1067 + }, + { + "epoch": 0.31, + "learning_rate": 0.0009603074048172458, + "loss": 3.6245, + "step": 1068 + }, + { + "epoch": 0.31, + "learning_rate": 0.0009602157544026784, + "loss": 3.5671, + "step": 1069 + }, + { + "epoch": 0.31, + "learning_rate": 0.0009601240026832398, + "loss": 3.457, + "step": 1070 + }, + { + "epoch": 0.31, + "learning_rate": 0.0009600321496791268, + "loss": 3.5599, + "step": 1071 + }, + { + "epoch": 0.31, + "learning_rate": 0.0009599401954105583, + "loss": 3.5857, + "step": 1072 + }, + { + "epoch": 0.31, + "learning_rate": 0.000959848139897776, + "loss": 3.3973, + "step": 1073 + }, + { + "epoch": 0.31, + "learning_rate": 0.0009597559831610434, + "loss": 3.495, + "step": 1074 + }, + { + "epoch": 0.31, + "learning_rate": 0.0009596637252206466, + "loss": 3.508, + "step": 1075 + }, + { + "epoch": 0.31, + "learning_rate": 0.0009595713660968937, + "loss": 3.4039, + "step": 1076 + }, + { + "epoch": 0.31, + "learning_rate": 0.0009594789058101153, + "loss": 3.543, + "step": 1077 + }, + { + "epoch": 0.31, + "learning_rate": 0.0009593863443806643, + "loss": 3.4228, + "step": 1078 + }, + { + "epoch": 0.31, + "learning_rate": 0.0009592936818289158, + "loss": 3.4424, + "step": 1079 + }, + { + "epoch": 0.31, + "learning_rate": 0.0009592009181752667, + "loss": 3.5694, + "step": 1080 + }, + { + "epoch": 0.31, + "learning_rate": 0.000959108053440137, + "loss": 3.4883, + "step": 1081 + }, + { + "epoch": 0.31, + "learning_rate": 0.0009590150876439685, + "loss": 3.4609, + "step": 1082 + }, + { + "epoch": 0.31, + "learning_rate": 0.000958922020807225, + "loss": 3.5559, + "step": 1083 + }, + { + "epoch": 0.31, + "learning_rate": 0.000958828852950393, + "loss": 3.4133, + "step": 1084 + }, + { + "epoch": 0.31, + "learning_rate": 0.0009587355840939812, + "loss": 3.4119, + "step": 1085 + }, + { + "epoch": 0.31, + "learning_rate": 0.00095864221425852, + "loss": 3.5892, + "step": 1086 + }, + { + "epoch": 0.31, + "learning_rate": 0.0009585487434645627, + "loss": 3.477, + "step": 1087 + }, + { + "epoch": 0.32, + "learning_rate": 0.0009584551717326846, + "loss": 3.4365, + "step": 1088 + }, + { + "epoch": 0.32, + "learning_rate": 0.0009583614990834828, + "loss": 3.5156, + "step": 1089 + }, + { + "epoch": 0.32, + "learning_rate": 0.0009582677255375773, + "loss": 3.5673, + "step": 1090 + }, + { + "epoch": 0.32, + "learning_rate": 0.0009581738511156099, + "loss": 3.5105, + "step": 1091 + }, + { + "epoch": 0.32, + "learning_rate": 0.0009580798758382445, + "loss": 3.4694, + "step": 1092 + }, + { + "epoch": 0.32, + "learning_rate": 0.0009579857997261676, + "loss": 3.5583, + "step": 1093 + }, + { + "epoch": 0.32, + "learning_rate": 0.0009578916228000875, + "loss": 3.5915, + "step": 1094 + }, + { + "epoch": 0.32, + "learning_rate": 0.0009577973450807352, + "loss": 3.4807, + "step": 1095 + }, + { + "epoch": 0.32, + "learning_rate": 0.0009577029665888631, + "loss": 3.5021, + "step": 1096 + }, + { + "epoch": 0.32, + "learning_rate": 0.0009576084873452465, + "loss": 3.487, + "step": 1097 + }, + { + "epoch": 0.32, + "learning_rate": 0.0009575139073706827, + "loss": 3.5604, + "step": 1098 + }, + { + "epoch": 0.32, + "learning_rate": 0.0009574192266859909, + "loss": 3.4013, + "step": 1099 + }, + { + "epoch": 0.32, + "learning_rate": 0.0009573244453120127, + "loss": 3.3565, + "step": 1100 + }, + { + "epoch": 0.32, + "learning_rate": 0.0009572295632696119, + "loss": 3.5115, + "step": 1101 + }, + { + "epoch": 0.32, + "learning_rate": 0.0009571345805796744, + "loss": 3.3657, + "step": 1102 + }, + { + "epoch": 0.32, + "learning_rate": 0.0009570394972631081, + "loss": 3.5658, + "step": 1103 + }, + { + "epoch": 0.32, + "learning_rate": 0.0009569443133408433, + "loss": 3.4801, + "step": 1104 + }, + { + "epoch": 0.32, + "learning_rate": 0.0009568490288338324, + "loss": 3.4802, + "step": 1105 + }, + { + "epoch": 0.32, + "learning_rate": 0.0009567536437630498, + "loss": 3.5203, + "step": 1106 + }, + { + "epoch": 0.32, + "learning_rate": 0.000956658158149492, + "loss": 3.3927, + "step": 1107 + }, + { + "epoch": 0.32, + "learning_rate": 0.0009565625720141779, + "loss": 3.5339, + "step": 1108 + }, + { + "epoch": 0.32, + "learning_rate": 0.0009564668853781482, + "loss": 3.4401, + "step": 1109 + }, + { + "epoch": 0.32, + "learning_rate": 0.0009563710982624661, + "loss": 3.3405, + "step": 1110 + }, + { + "epoch": 0.32, + "learning_rate": 0.0009562752106882167, + "loss": 3.5112, + "step": 1111 + }, + { + "epoch": 0.32, + "learning_rate": 0.0009561792226765072, + "loss": 3.4346, + "step": 1112 + }, + { + "epoch": 0.32, + "learning_rate": 0.0009560831342484667, + "loss": 3.542, + "step": 1113 + }, + { + "epoch": 0.32, + "learning_rate": 0.000955986945425247, + "loss": 3.4283, + "step": 1114 + }, + { + "epoch": 0.32, + "learning_rate": 0.0009558906562280214, + "loss": 3.4636, + "step": 1115 + }, + { + "epoch": 0.32, + "learning_rate": 0.0009557942666779855, + "loss": 3.4943, + "step": 1116 + }, + { + "epoch": 0.32, + "learning_rate": 0.0009556977767963573, + "loss": 3.3695, + "step": 1117 + }, + { + "epoch": 0.32, + "learning_rate": 0.0009556011866043763, + "loss": 3.298, + "step": 1118 + }, + { + "epoch": 0.32, + "learning_rate": 0.0009555044961233047, + "loss": 3.4682, + "step": 1119 + }, + { + "epoch": 0.32, + "learning_rate": 0.0009554077053744262, + "loss": 3.6107, + "step": 1120 + }, + { + "epoch": 0.32, + "learning_rate": 0.000955310814379047, + "loss": 3.4354, + "step": 1121 + }, + { + "epoch": 0.32, + "learning_rate": 0.000955213823158495, + "loss": 3.4328, + "step": 1122 + }, + { + "epoch": 0.33, + "learning_rate": 0.0009551167317341208, + "loss": 3.4753, + "step": 1123 + }, + { + "epoch": 0.33, + "learning_rate": 0.000955019540127296, + "loss": 3.3754, + "step": 1124 + }, + { + "epoch": 0.33, + "learning_rate": 0.0009549222483594154, + "loss": 3.5392, + "step": 1125 + }, + { + "epoch": 0.33, + "learning_rate": 0.000954824856451895, + "loss": 3.3951, + "step": 1126 + }, + { + "epoch": 0.33, + "learning_rate": 0.0009547273644261733, + "loss": 3.4474, + "step": 1127 + }, + { + "epoch": 0.33, + "learning_rate": 0.0009546297723037108, + "loss": 3.585, + "step": 1128 + }, + { + "epoch": 0.33, + "learning_rate": 0.0009545320801059899, + "loss": 3.4031, + "step": 1129 + }, + { + "epoch": 0.33, + "learning_rate": 0.000954434287854515, + "loss": 3.4119, + "step": 1130 + }, + { + "epoch": 0.33, + "learning_rate": 0.0009543363955708124, + "loss": 3.4096, + "step": 1131 + }, + { + "epoch": 0.33, + "learning_rate": 0.0009542384032764311, + "loss": 3.3013, + "step": 1132 + }, + { + "epoch": 0.33, + "learning_rate": 0.0009541403109929412, + "loss": 3.3416, + "step": 1133 + }, + { + "epoch": 0.33, + "learning_rate": 0.0009540421187419354, + "loss": 3.3955, + "step": 1134 + }, + { + "epoch": 0.33, + "learning_rate": 0.0009539438265450282, + "loss": 3.4547, + "step": 1135 + }, + { + "epoch": 0.33, + "learning_rate": 0.0009538454344238563, + "loss": 3.5079, + "step": 1136 + }, + { + "epoch": 0.33, + "learning_rate": 0.000953746942400078, + "loss": 3.4254, + "step": 1137 + }, + { + "epoch": 0.33, + "learning_rate": 0.000953648350495374, + "loss": 3.3355, + "step": 1138 + }, + { + "epoch": 0.33, + "learning_rate": 0.0009535496587314467, + "loss": 3.5066, + "step": 1139 + }, + { + "epoch": 0.33, + "learning_rate": 0.0009534508671300208, + "loss": 3.5104, + "step": 1140 + }, + { + "epoch": 0.33, + "learning_rate": 0.0009533519757128426, + "loss": 3.5015, + "step": 1141 + }, + { + "epoch": 0.33, + "learning_rate": 0.0009532529845016804, + "loss": 3.5309, + "step": 1142 + }, + { + "epoch": 0.33, + "learning_rate": 0.0009531538935183251, + "loss": 3.5318, + "step": 1143 + }, + { + "epoch": 0.33, + "learning_rate": 0.0009530547027845885, + "loss": 3.5455, + "step": 1144 + }, + { + "epoch": 0.33, + "learning_rate": 0.0009529554123223053, + "loss": 3.5305, + "step": 1145 + }, + { + "epoch": 0.33, + "learning_rate": 0.0009528560221533318, + "loss": 3.4937, + "step": 1146 + }, + { + "epoch": 0.33, + "learning_rate": 0.0009527565322995459, + "loss": 3.5614, + "step": 1147 + }, + { + "epoch": 0.33, + "learning_rate": 0.0009526569427828482, + "loss": 3.3454, + "step": 1148 + }, + { + "epoch": 0.33, + "learning_rate": 0.0009525572536251606, + "loss": 3.6448, + "step": 1149 + }, + { + "epoch": 0.33, + "learning_rate": 0.0009524574648484272, + "loss": 3.6655, + "step": 1150 + }, + { + "epoch": 0.33, + "learning_rate": 0.0009523575764746138, + "loss": 3.4615, + "step": 1151 + }, + { + "epoch": 0.33, + "learning_rate": 0.0009522575885257085, + "loss": 3.4586, + "step": 1152 + }, + { + "epoch": 0.33, + "learning_rate": 0.0009521575010237213, + "loss": 3.4774, + "step": 1153 + }, + { + "epoch": 0.33, + "learning_rate": 0.0009520573139906835, + "loss": 3.3822, + "step": 1154 + }, + { + "epoch": 0.33, + "learning_rate": 0.000951957027448649, + "loss": 3.3402, + "step": 1155 + }, + { + "epoch": 0.33, + "learning_rate": 0.0009518566414196933, + "loss": 3.5972, + "step": 1156 + }, + { + "epoch": 0.34, + "learning_rate": 0.0009517561559259138, + "loss": 3.4594, + "step": 1157 + }, + { + "epoch": 0.34, + "learning_rate": 0.0009516555709894299, + "loss": 3.3494, + "step": 1158 + }, + { + "epoch": 0.34, + "learning_rate": 0.0009515548866323829, + "loss": 3.4584, + "step": 1159 + }, + { + "epoch": 0.34, + "learning_rate": 0.0009514541028769359, + "loss": 3.4955, + "step": 1160 + }, + { + "epoch": 0.34, + "learning_rate": 0.0009513532197452737, + "loss": 3.442, + "step": 1161 + }, + { + "epoch": 0.34, + "learning_rate": 0.0009512522372596033, + "loss": 3.4369, + "step": 1162 + }, + { + "epoch": 0.34, + "learning_rate": 0.0009511511554421536, + "loss": 3.3733, + "step": 1163 + }, + { + "epoch": 0.34, + "learning_rate": 0.000951049974315175, + "loss": 3.5298, + "step": 1164 + }, + { + "epoch": 0.34, + "learning_rate": 0.0009509486939009399, + "loss": 3.406, + "step": 1165 + }, + { + "epoch": 0.34, + "learning_rate": 0.0009508473142217429, + "loss": 3.4008, + "step": 1166 + }, + { + "epoch": 0.34, + "learning_rate": 0.0009507458352999001, + "loss": 3.5708, + "step": 1167 + }, + { + "epoch": 0.34, + "learning_rate": 0.0009506442571577494, + "loss": 3.401, + "step": 1168 + }, + { + "epoch": 0.34, + "learning_rate": 0.0009505425798176506, + "loss": 3.5604, + "step": 1169 + }, + { + "epoch": 0.34, + "learning_rate": 0.0009504408033019856, + "loss": 3.4011, + "step": 1170 + }, + { + "epoch": 0.34, + "learning_rate": 0.0009503389276331578, + "loss": 3.4842, + "step": 1171 + }, + { + "epoch": 0.34, + "learning_rate": 0.0009502369528335925, + "loss": 3.4707, + "step": 1172 + }, + { + "epoch": 0.34, + "learning_rate": 0.0009501348789257373, + "loss": 3.426, + "step": 1173 + }, + { + "epoch": 0.34, + "learning_rate": 0.0009500327059320606, + "loss": 3.4284, + "step": 1174 + }, + { + "epoch": 0.34, + "learning_rate": 0.0009499304338750535, + "loss": 3.4284, + "step": 1175 + }, + { + "epoch": 0.34, + "learning_rate": 0.0009498280627772286, + "loss": 3.3262, + "step": 1176 + }, + { + "epoch": 0.34, + "learning_rate": 0.0009497255926611204, + "loss": 3.4692, + "step": 1177 + }, + { + "epoch": 0.34, + "learning_rate": 0.0009496230235492848, + "loss": 3.6114, + "step": 1178 + }, + { + "epoch": 0.34, + "learning_rate": 0.0009495203554643002, + "loss": 3.506, + "step": 1179 + }, + { + "epoch": 0.34, + "learning_rate": 0.0009494175884287659, + "loss": 3.4448, + "step": 1180 + }, + { + "epoch": 0.34, + "learning_rate": 0.000949314722465304, + "loss": 3.4518, + "step": 1181 + }, + { + "epoch": 0.34, + "learning_rate": 0.0009492117575965576, + "loss": 3.452, + "step": 1182 + }, + { + "epoch": 0.34, + "learning_rate": 0.0009491086938451916, + "loss": 3.4753, + "step": 1183 + }, + { + "epoch": 0.34, + "learning_rate": 0.0009490055312338932, + "loss": 3.4168, + "step": 1184 + }, + { + "epoch": 0.34, + "learning_rate": 0.0009489022697853709, + "loss": 3.4701, + "step": 1185 + }, + { + "epoch": 0.34, + "learning_rate": 0.000948798909522355, + "loss": 3.3463, + "step": 1186 + }, + { + "epoch": 0.34, + "learning_rate": 0.000948695450467598, + "loss": 3.4065, + "step": 1187 + }, + { + "epoch": 0.34, + "learning_rate": 0.0009485918926438735, + "loss": 3.648, + "step": 1188 + }, + { + "epoch": 0.34, + "learning_rate": 0.0009484882360739771, + "loss": 3.337, + "step": 1189 + }, + { + "epoch": 0.34, + "learning_rate": 0.0009483844807807265, + "loss": 3.5844, + "step": 1190 + }, + { + "epoch": 0.34, + "learning_rate": 0.0009482806267869604, + "loss": 3.5108, + "step": 1191 + }, + { + "epoch": 0.35, + "learning_rate": 0.00094817667411554, + "loss": 3.5523, + "step": 1192 + }, + { + "epoch": 0.35, + "learning_rate": 0.0009480726227893474, + "loss": 3.4023, + "step": 1193 + }, + { + "epoch": 0.35, + "learning_rate": 0.0009479684728312873, + "loss": 3.4591, + "step": 1194 + }, + { + "epoch": 0.35, + "learning_rate": 0.0009478642242642856, + "loss": 3.4727, + "step": 1195 + }, + { + "epoch": 0.35, + "learning_rate": 0.00094775987711129, + "loss": 3.5113, + "step": 1196 + }, + { + "epoch": 0.35, + "learning_rate": 0.0009476554313952697, + "loss": 3.527, + "step": 1197 + }, + { + "epoch": 0.35, + "learning_rate": 0.0009475508871392157, + "loss": 3.4104, + "step": 1198 + }, + { + "epoch": 0.35, + "learning_rate": 0.0009474462443661414, + "loss": 3.5622, + "step": 1199 + }, + { + "epoch": 0.35, + "learning_rate": 0.0009473415030990805, + "loss": 3.4857, + "step": 1200 + }, + { + "epoch": 0.35, + "learning_rate": 0.0009472366633610896, + "loss": 3.3896, + "step": 1201 + }, + { + "epoch": 0.35, + "learning_rate": 0.0009471317251752464, + "loss": 3.4251, + "step": 1202 + }, + { + "epoch": 0.35, + "learning_rate": 0.0009470266885646504, + "loss": 3.5522, + "step": 1203 + }, + { + "epoch": 0.35, + "learning_rate": 0.0009469215535524227, + "loss": 3.3946, + "step": 1204 + }, + { + "epoch": 0.35, + "learning_rate": 0.0009468163201617061, + "loss": 3.3417, + "step": 1205 + }, + { + "epoch": 0.35, + "learning_rate": 0.0009467109884156653, + "loss": 3.5147, + "step": 1206 + }, + { + "epoch": 0.35, + "learning_rate": 0.0009466055583374862, + "loss": 3.3857, + "step": 1207 + }, + { + "epoch": 0.35, + "learning_rate": 0.0009465000299503766, + "loss": 3.4718, + "step": 1208 + }, + { + "epoch": 0.35, + "learning_rate": 0.0009463944032775659, + "loss": 3.3237, + "step": 1209 + }, + { + "epoch": 0.35, + "learning_rate": 0.0009462886783423052, + "loss": 3.5157, + "step": 1210 + }, + { + "epoch": 0.35, + "learning_rate": 0.000946182855167867, + "loss": 3.4439, + "step": 1211 + }, + { + "epoch": 0.35, + "learning_rate": 0.000946076933777546, + "loss": 3.3567, + "step": 1212 + }, + { + "epoch": 0.35, + "learning_rate": 0.0009459709141946578, + "loss": 3.3312, + "step": 1213 + }, + { + "epoch": 0.35, + "learning_rate": 0.00094586479644254, + "loss": 3.3966, + "step": 1214 + }, + { + "epoch": 0.35, + "learning_rate": 0.0009457585805445517, + "loss": 3.3479, + "step": 1215 + }, + { + "epoch": 0.35, + "learning_rate": 0.0009456522665240737, + "loss": 3.5239, + "step": 1216 + }, + { + "epoch": 0.35, + "learning_rate": 0.0009455458544045084, + "loss": 3.4368, + "step": 1217 + }, + { + "epoch": 0.35, + "learning_rate": 0.0009454393442092795, + "loss": 3.4658, + "step": 1218 + }, + { + "epoch": 0.35, + "learning_rate": 0.0009453327359618329, + "loss": 3.4498, + "step": 1219 + }, + { + "epoch": 0.35, + "learning_rate": 0.0009452260296856354, + "loss": 3.5219, + "step": 1220 + }, + { + "epoch": 0.35, + "learning_rate": 0.0009451192254041758, + "loss": 3.3854, + "step": 1221 + }, + { + "epoch": 0.35, + "learning_rate": 0.0009450123231409645, + "loss": 3.5392, + "step": 1222 + }, + { + "epoch": 0.35, + "learning_rate": 0.000944905322919533, + "loss": 3.4415, + "step": 1223 + }, + { + "epoch": 0.35, + "learning_rate": 0.000944798224763435, + "loss": 3.4649, + "step": 1224 + }, + { + "epoch": 0.35, + "learning_rate": 0.0009446910286962454, + "loss": 3.4086, + "step": 1225 + }, + { + "epoch": 0.36, + "learning_rate": 0.0009445837347415607, + "loss": 3.5799, + "step": 1226 + }, + { + "epoch": 0.36, + "learning_rate": 0.0009444763429229988, + "loss": 3.4461, + "step": 1227 + }, + { + "epoch": 0.36, + "learning_rate": 0.0009443688532641994, + "loss": 3.4643, + "step": 1228 + }, + { + "epoch": 0.36, + "learning_rate": 0.0009442612657888236, + "loss": 3.4334, + "step": 1229 + }, + { + "epoch": 0.36, + "learning_rate": 0.0009441535805205541, + "loss": 3.4699, + "step": 1230 + }, + { + "epoch": 0.36, + "learning_rate": 0.0009440457974830952, + "loss": 3.4442, + "step": 1231 + }, + { + "epoch": 0.36, + "learning_rate": 0.0009439379167001725, + "loss": 3.4856, + "step": 1232 + }, + { + "epoch": 0.36, + "learning_rate": 0.0009438299381955332, + "loss": 3.5447, + "step": 1233 + }, + { + "epoch": 0.36, + "learning_rate": 0.0009437218619929461, + "loss": 3.5375, + "step": 1234 + }, + { + "epoch": 0.36, + "learning_rate": 0.0009436136881162014, + "loss": 3.4859, + "step": 1235 + }, + { + "epoch": 0.36, + "learning_rate": 0.0009435054165891108, + "loss": 3.5139, + "step": 1236 + }, + { + "epoch": 0.36, + "learning_rate": 0.0009433970474355078, + "loss": 3.431, + "step": 1237 + }, + { + "epoch": 0.36, + "learning_rate": 0.0009432885806792468, + "loss": 3.5373, + "step": 1238 + }, + { + "epoch": 0.36, + "learning_rate": 0.0009431800163442042, + "loss": 3.5694, + "step": 1239 + }, + { + "epoch": 0.36, + "learning_rate": 0.0009430713544542775, + "loss": 3.5188, + "step": 1240 + }, + { + "epoch": 0.36, + "learning_rate": 0.0009429625950333863, + "loss": 3.4209, + "step": 1241 + }, + { + "epoch": 0.36, + "learning_rate": 0.0009428537381054707, + "loss": 3.5914, + "step": 1242 + }, + { + "epoch": 0.36, + "learning_rate": 0.0009427447836944931, + "loss": 3.4998, + "step": 1243 + }, + { + "epoch": 0.36, + "learning_rate": 0.0009426357318244371, + "loss": 3.375, + "step": 1244 + }, + { + "epoch": 0.36, + "learning_rate": 0.0009425265825193077, + "loss": 3.5494, + "step": 1245 + }, + { + "epoch": 0.36, + "learning_rate": 0.0009424173358031313, + "loss": 3.4496, + "step": 1246 + }, + { + "epoch": 0.36, + "learning_rate": 0.0009423079916999557, + "loss": 3.4364, + "step": 1247 + }, + { + "epoch": 0.36, + "learning_rate": 0.0009421985502338503, + "loss": 3.3708, + "step": 1248 + }, + { + "epoch": 0.36, + "learning_rate": 0.000942089011428906, + "loss": 3.4589, + "step": 1249 + }, + { + "epoch": 0.36, + "learning_rate": 0.000941979375309235, + "loss": 3.5386, + "step": 1250 + }, + { + "epoch": 0.36, + "learning_rate": 0.0009418696418989707, + "loss": 3.3466, + "step": 1251 + }, + { + "epoch": 0.36, + "learning_rate": 0.0009417598112222684, + "loss": 3.4504, + "step": 1252 + }, + { + "epoch": 0.36, + "learning_rate": 0.0009416498833033043, + "loss": 3.4788, + "step": 1253 + }, + { + "epoch": 0.36, + "learning_rate": 0.0009415398581662766, + "loss": 3.5419, + "step": 1254 + }, + { + "epoch": 0.36, + "learning_rate": 0.0009414297358354041, + "loss": 3.5052, + "step": 1255 + }, + { + "epoch": 0.36, + "learning_rate": 0.0009413195163349278, + "loss": 3.5594, + "step": 1256 + }, + { + "epoch": 0.36, + "learning_rate": 0.0009412091996891096, + "loss": 3.4582, + "step": 1257 + }, + { + "epoch": 0.36, + "learning_rate": 0.0009410987859222328, + "loss": 3.5344, + "step": 1258 + }, + { + "epoch": 0.36, + "learning_rate": 0.0009409882750586027, + "loss": 3.4305, + "step": 1259 + }, + { + "epoch": 0.36, + "learning_rate": 0.0009408776671225448, + "loss": 3.3858, + "step": 1260 + }, + { + "epoch": 0.37, + "learning_rate": 0.0009407669621384071, + "loss": 3.4205, + "step": 1261 + }, + { + "epoch": 0.37, + "learning_rate": 0.0009406561601305583, + "loss": 3.3421, + "step": 1262 + }, + { + "epoch": 0.37, + "learning_rate": 0.0009405452611233888, + "loss": 3.4428, + "step": 1263 + }, + { + "epoch": 0.37, + "learning_rate": 0.0009404342651413102, + "loss": 3.4048, + "step": 1264 + }, + { + "epoch": 0.37, + "learning_rate": 0.0009403231722087552, + "loss": 3.4089, + "step": 1265 + }, + { + "epoch": 0.37, + "learning_rate": 0.0009402119823501786, + "loss": 3.533, + "step": 1266 + }, + { + "epoch": 0.37, + "learning_rate": 0.0009401006955900555, + "loss": 3.4595, + "step": 1267 + }, + { + "epoch": 0.37, + "learning_rate": 0.0009399893119528832, + "loss": 3.5276, + "step": 1268 + }, + { + "epoch": 0.37, + "learning_rate": 0.0009398778314631801, + "loss": 3.4688, + "step": 1269 + }, + { + "epoch": 0.37, + "learning_rate": 0.0009397662541454854, + "loss": 3.4219, + "step": 1270 + }, + { + "epoch": 0.37, + "learning_rate": 0.0009396545800243603, + "loss": 3.515, + "step": 1271 + }, + { + "epoch": 0.37, + "learning_rate": 0.000939542809124387, + "loss": 3.5304, + "step": 1272 + }, + { + "epoch": 0.37, + "learning_rate": 0.0009394309414701691, + "loss": 3.4831, + "step": 1273 + }, + { + "epoch": 0.37, + "learning_rate": 0.0009393189770863313, + "loss": 3.3917, + "step": 1274 + }, + { + "epoch": 0.37, + "learning_rate": 0.0009392069159975199, + "loss": 3.384, + "step": 1275 + }, + { + "epoch": 0.37, + "learning_rate": 0.000939094758228402, + "loss": 3.4211, + "step": 1276 + }, + { + "epoch": 0.37, + "learning_rate": 0.0009389825038036665, + "loss": 3.3417, + "step": 1277 + }, + { + "epoch": 0.37, + "learning_rate": 0.0009388701527480237, + "loss": 3.3621, + "step": 1278 + }, + { + "epoch": 0.37, + "learning_rate": 0.0009387577050862043, + "loss": 3.521, + "step": 1279 + }, + { + "epoch": 0.37, + "learning_rate": 0.000938645160842961, + "loss": 3.4802, + "step": 1280 + }, + { + "epoch": 0.37, + "learning_rate": 0.0009385325200430678, + "loss": 3.3666, + "step": 1281 + }, + { + "epoch": 0.37, + "learning_rate": 0.0009384197827113195, + "loss": 3.4192, + "step": 1282 + }, + { + "epoch": 0.37, + "learning_rate": 0.0009383069488725323, + "loss": 3.373, + "step": 1283 + }, + { + "epoch": 0.37, + "learning_rate": 0.0009381940185515439, + "loss": 3.4617, + "step": 1284 + }, + { + "epoch": 0.37, + "learning_rate": 0.0009380809917732131, + "loss": 3.4004, + "step": 1285 + }, + { + "epoch": 0.37, + "learning_rate": 0.0009379678685624196, + "loss": 3.3708, + "step": 1286 + }, + { + "epoch": 0.37, + "learning_rate": 0.000937854648944065, + "loss": 3.3608, + "step": 1287 + }, + { + "epoch": 0.37, + "learning_rate": 0.0009377413329430715, + "loss": 3.2966, + "step": 1288 + }, + { + "epoch": 0.37, + "learning_rate": 0.0009376279205843827, + "loss": 3.3409, + "step": 1289 + }, + { + "epoch": 0.37, + "learning_rate": 0.0009375144118929637, + "loss": 3.532, + "step": 1290 + }, + { + "epoch": 0.37, + "learning_rate": 0.0009374008068938003, + "loss": 3.3268, + "step": 1291 + }, + { + "epoch": 0.37, + "learning_rate": 0.0009372871056118998, + "loss": 3.4235, + "step": 1292 + }, + { + "epoch": 0.37, + "learning_rate": 0.0009371733080722911, + "loss": 3.2505, + "step": 1293 + }, + { + "epoch": 0.37, + "learning_rate": 0.0009370594143000233, + "loss": 3.3253, + "step": 1294 + }, + { + "epoch": 0.38, + "learning_rate": 0.0009369454243201676, + "loss": 3.4299, + "step": 1295 + }, + { + "epoch": 0.38, + "learning_rate": 0.0009368313381578157, + "loss": 3.4081, + "step": 1296 + }, + { + "epoch": 0.38, + "learning_rate": 0.0009367171558380812, + "loss": 3.4095, + "step": 1297 + }, + { + "epoch": 0.38, + "learning_rate": 0.0009366028773860979, + "loss": 3.4344, + "step": 1298 + }, + { + "epoch": 0.38, + "learning_rate": 0.0009364885028270219, + "loss": 3.4723, + "step": 1299 + }, + { + "epoch": 0.38, + "learning_rate": 0.0009363740321860294, + "loss": 3.5192, + "step": 1300 + }, + { + "epoch": 0.38, + "learning_rate": 0.0009362594654883185, + "loss": 3.4867, + "step": 1301 + }, + { + "epoch": 0.38, + "learning_rate": 0.000936144802759108, + "loss": 3.5584, + "step": 1302 + }, + { + "epoch": 0.38, + "learning_rate": 0.0009360300440236383, + "loss": 3.4309, + "step": 1303 + }, + { + "epoch": 0.38, + "learning_rate": 0.0009359151893071701, + "loss": 3.4978, + "step": 1304 + }, + { + "epoch": 0.38, + "learning_rate": 0.0009358002386349862, + "loss": 3.2717, + "step": 1305 + }, + { + "epoch": 0.38, + "learning_rate": 0.0009356851920323901, + "loss": 3.3344, + "step": 1306 + }, + { + "epoch": 0.38, + "learning_rate": 0.0009355700495247061, + "loss": 3.5188, + "step": 1307 + }, + { + "epoch": 0.38, + "learning_rate": 0.0009354548111372802, + "loss": 3.4985, + "step": 1308 + }, + { + "epoch": 0.38, + "learning_rate": 0.000935339476895479, + "loss": 3.4764, + "step": 1309 + }, + { + "epoch": 0.38, + "learning_rate": 0.0009352240468246907, + "loss": 3.29, + "step": 1310 + }, + { + "epoch": 0.38, + "learning_rate": 0.000935108520950324, + "loss": 3.3709, + "step": 1311 + }, + { + "epoch": 0.38, + "learning_rate": 0.0009349928992978093, + "loss": 3.4172, + "step": 1312 + }, + { + "epoch": 0.38, + "learning_rate": 0.0009348771818925974, + "loss": 3.3744, + "step": 1313 + }, + { + "epoch": 0.38, + "learning_rate": 0.0009347613687601611, + "loss": 3.4147, + "step": 1314 + }, + { + "epoch": 0.38, + "learning_rate": 0.0009346454599259931, + "loss": 3.421, + "step": 1315 + }, + { + "epoch": 0.38, + "learning_rate": 0.0009345294554156084, + "loss": 3.4694, + "step": 1316 + }, + { + "epoch": 0.38, + "learning_rate": 0.000934413355254542, + "loss": 3.4997, + "step": 1317 + }, + { + "epoch": 0.38, + "learning_rate": 0.0009342971594683506, + "loss": 3.3398, + "step": 1318 + }, + { + "epoch": 0.38, + "learning_rate": 0.0009341808680826118, + "loss": 3.4477, + "step": 1319 + }, + { + "epoch": 0.38, + "learning_rate": 0.0009340644811229242, + "loss": 3.5037, + "step": 1320 + }, + { + "epoch": 0.38, + "learning_rate": 0.0009339479986149075, + "loss": 3.5472, + "step": 1321 + }, + { + "epoch": 0.38, + "learning_rate": 0.0009338314205842022, + "loss": 3.469, + "step": 1322 + }, + { + "epoch": 0.38, + "learning_rate": 0.0009337147470564702, + "loss": 3.3621, + "step": 1323 + }, + { + "epoch": 0.38, + "learning_rate": 0.0009335979780573941, + "loss": 3.3787, + "step": 1324 + }, + { + "epoch": 0.38, + "learning_rate": 0.0009334811136126778, + "loss": 3.4651, + "step": 1325 + }, + { + "epoch": 0.38, + "learning_rate": 0.0009333641537480459, + "loss": 3.4207, + "step": 1326 + }, + { + "epoch": 0.38, + "learning_rate": 0.000933247098489244, + "loss": 3.3333, + "step": 1327 + }, + { + "epoch": 0.38, + "learning_rate": 0.0009331299478620394, + "loss": 3.427, + "step": 1328 + }, + { + "epoch": 0.38, + "learning_rate": 0.0009330127018922195, + "loss": 3.5332, + "step": 1329 + }, + { + "epoch": 0.39, + "learning_rate": 0.0009328953606055928, + "loss": 3.4831, + "step": 1330 + }, + { + "epoch": 0.39, + "learning_rate": 0.0009327779240279894, + "loss": 3.3182, + "step": 1331 + }, + { + "epoch": 0.39, + "learning_rate": 0.0009326603921852599, + "loss": 3.4366, + "step": 1332 + }, + { + "epoch": 0.39, + "learning_rate": 0.0009325427651032758, + "loss": 3.4651, + "step": 1333 + }, + { + "epoch": 0.39, + "learning_rate": 0.0009324250428079301, + "loss": 3.4332, + "step": 1334 + }, + { + "epoch": 0.39, + "learning_rate": 0.000932307225325136, + "loss": 3.3028, + "step": 1335 + }, + { + "epoch": 0.39, + "learning_rate": 0.0009321893126808283, + "loss": 3.4168, + "step": 1336 + }, + { + "epoch": 0.39, + "learning_rate": 0.0009320713049009624, + "loss": 3.4024, + "step": 1337 + }, + { + "epoch": 0.39, + "learning_rate": 0.0009319532020115146, + "loss": 3.2594, + "step": 1338 + }, + { + "epoch": 0.39, + "learning_rate": 0.0009318350040384824, + "loss": 3.3419, + "step": 1339 + }, + { + "epoch": 0.39, + "learning_rate": 0.0009317167110078842, + "loss": 3.3499, + "step": 1340 + }, + { + "epoch": 0.39, + "learning_rate": 0.000931598322945759, + "loss": 3.4765, + "step": 1341 + }, + { + "epoch": 0.39, + "learning_rate": 0.000931479839878167, + "loss": 3.4306, + "step": 1342 + }, + { + "epoch": 0.39, + "learning_rate": 0.0009313612618311896, + "loss": 3.5119, + "step": 1343 + }, + { + "epoch": 0.39, + "learning_rate": 0.0009312425888309281, + "loss": 3.4875, + "step": 1344 + }, + { + "epoch": 0.39, + "learning_rate": 0.000931123820903506, + "loss": 3.5151, + "step": 1345 + }, + { + "epoch": 0.39, + "learning_rate": 0.0009310049580750666, + "loss": 3.4533, + "step": 1346 + }, + { + "epoch": 0.39, + "learning_rate": 0.0009308860003717749, + "loss": 3.4276, + "step": 1347 + }, + { + "epoch": 0.39, + "learning_rate": 0.0009307669478198162, + "loss": 3.444, + "step": 1348 + }, + { + "epoch": 0.39, + "learning_rate": 0.0009306478004453969, + "loss": 3.4755, + "step": 1349 + }, + { + "epoch": 0.39, + "learning_rate": 0.0009305285582747444, + "loss": 3.4241, + "step": 1350 + }, + { + "epoch": 0.39, + "learning_rate": 0.0009304092213341068, + "loss": 3.3772, + "step": 1351 + }, + { + "epoch": 0.39, + "learning_rate": 0.0009302897896497532, + "loss": 3.3596, + "step": 1352 + }, + { + "epoch": 0.39, + "learning_rate": 0.0009301702632479734, + "loss": 3.5062, + "step": 1353 + }, + { + "epoch": 0.39, + "learning_rate": 0.0009300506421550781, + "loss": 3.3361, + "step": 1354 + }, + { + "epoch": 0.39, + "learning_rate": 0.0009299309263973987, + "loss": 3.5178, + "step": 1355 + }, + { + "epoch": 0.39, + "learning_rate": 0.000929811116001288, + "loss": 3.346, + "step": 1356 + }, + { + "epoch": 0.39, + "learning_rate": 0.0009296912109931188, + "loss": 3.4259, + "step": 1357 + }, + { + "epoch": 0.39, + "learning_rate": 0.0009295712113992856, + "loss": 3.5577, + "step": 1358 + }, + { + "epoch": 0.39, + "learning_rate": 0.000929451117246203, + "loss": 3.2834, + "step": 1359 + }, + { + "epoch": 0.39, + "learning_rate": 0.0009293309285603067, + "loss": 3.4307, + "step": 1360 + }, + { + "epoch": 0.39, + "learning_rate": 0.0009292106453680531, + "loss": 3.2813, + "step": 1361 + }, + { + "epoch": 0.39, + "learning_rate": 0.0009290902676959197, + "loss": 3.2926, + "step": 1362 + }, + { + "epoch": 0.39, + "learning_rate": 0.0009289697955704047, + "loss": 3.3531, + "step": 1363 + }, + { + "epoch": 0.4, + "learning_rate": 0.0009288492290180266, + "loss": 3.4795, + "step": 1364 + }, + { + "epoch": 0.4, + "learning_rate": 0.0009287285680653254, + "loss": 3.399, + "step": 1365 + }, + { + "epoch": 0.4, + "learning_rate": 0.0009286078127388617, + "loss": 3.3518, + "step": 1366 + }, + { + "epoch": 0.4, + "learning_rate": 0.0009284869630652164, + "loss": 3.4574, + "step": 1367 + }, + { + "epoch": 0.4, + "learning_rate": 0.0009283660190709914, + "loss": 3.5092, + "step": 1368 + }, + { + "epoch": 0.4, + "learning_rate": 0.00092824498078281, + "loss": 3.5326, + "step": 1369 + }, + { + "epoch": 0.4, + "learning_rate": 0.0009281238482273153, + "loss": 3.3758, + "step": 1370 + }, + { + "epoch": 0.4, + "learning_rate": 0.0009280026214311716, + "loss": 3.402, + "step": 1371 + }, + { + "epoch": 0.4, + "learning_rate": 0.000927881300421064, + "loss": 3.3841, + "step": 1372 + }, + { + "epoch": 0.4, + "learning_rate": 0.0009277598852236984, + "loss": 3.4685, + "step": 1373 + }, + { + "epoch": 0.4, + "learning_rate": 0.000927638375865801, + "loss": 3.3931, + "step": 1374 + }, + { + "epoch": 0.4, + "learning_rate": 0.0009275167723741193, + "loss": 3.4629, + "step": 1375 + }, + { + "epoch": 0.4, + "learning_rate": 0.0009273950747754209, + "loss": 3.5442, + "step": 1376 + }, + { + "epoch": 0.4, + "learning_rate": 0.0009272732830964948, + "loss": 3.3796, + "step": 1377 + }, + { + "epoch": 0.4, + "learning_rate": 0.0009271513973641501, + "loss": 3.4243, + "step": 1378 + }, + { + "epoch": 0.4, + "learning_rate": 0.000927029417605217, + "loss": 3.333, + "step": 1379 + }, + { + "epoch": 0.4, + "learning_rate": 0.0009269073438465462, + "loss": 3.3831, + "step": 1380 + }, + { + "epoch": 0.4, + "learning_rate": 0.0009267851761150093, + "loss": 3.371, + "step": 1381 + }, + { + "epoch": 0.4, + "learning_rate": 0.000926662914437498, + "loss": 3.3735, + "step": 1382 + }, + { + "epoch": 0.4, + "learning_rate": 0.0009265405588409257, + "loss": 3.5322, + "step": 1383 + }, + { + "epoch": 0.4, + "learning_rate": 0.0009264181093522257, + "loss": 3.4954, + "step": 1384 + }, + { + "epoch": 0.4, + "learning_rate": 0.0009262955659983521, + "loss": 3.3605, + "step": 1385 + }, + { + "epoch": 0.4, + "learning_rate": 0.0009261729288062795, + "loss": 3.4737, + "step": 1386 + }, + { + "epoch": 0.4, + "learning_rate": 0.0009260501978030038, + "loss": 3.3139, + "step": 1387 + }, + { + "epoch": 0.4, + "learning_rate": 0.0009259273730155409, + "loss": 3.4509, + "step": 1388 + }, + { + "epoch": 0.4, + "learning_rate": 0.0009258044544709274, + "loss": 3.4176, + "step": 1389 + }, + { + "epoch": 0.4, + "learning_rate": 0.0009256814421962212, + "loss": 3.4684, + "step": 1390 + }, + { + "epoch": 0.4, + "learning_rate": 0.0009255583362184998, + "loss": 3.4119, + "step": 1391 + }, + { + "epoch": 0.4, + "learning_rate": 0.0009254351365648623, + "loss": 3.4409, + "step": 1392 + }, + { + "epoch": 0.4, + "learning_rate": 0.0009253118432624279, + "loss": 3.4422, + "step": 1393 + }, + { + "epoch": 0.4, + "learning_rate": 0.0009251884563383362, + "loss": 3.4088, + "step": 1394 + }, + { + "epoch": 0.4, + "learning_rate": 0.0009250649758197481, + "loss": 3.4316, + "step": 1395 + }, + { + "epoch": 0.4, + "learning_rate": 0.0009249414017338444, + "loss": 3.5577, + "step": 1396 + }, + { + "epoch": 0.4, + "learning_rate": 0.0009248177341078272, + "loss": 3.3473, + "step": 1397 + }, + { + "epoch": 0.4, + "learning_rate": 0.0009246939729689185, + "loss": 3.3608, + "step": 1398 + }, + { + "epoch": 0.41, + "learning_rate": 0.0009245701183443612, + "loss": 3.4354, + "step": 1399 + }, + { + "epoch": 0.41, + "learning_rate": 0.0009244461702614189, + "loss": 3.4208, + "step": 1400 + }, + { + "epoch": 0.41, + "learning_rate": 0.0009243221287473755, + "loss": 3.3903, + "step": 1401 + }, + { + "epoch": 0.41, + "learning_rate": 0.0009241979938295358, + "loss": 3.5084, + "step": 1402 + }, + { + "epoch": 0.41, + "learning_rate": 0.0009240737655352247, + "loss": 3.4489, + "step": 1403 + }, + { + "epoch": 0.41, + "learning_rate": 0.000923949443891788, + "loss": 3.4788, + "step": 1404 + }, + { + "epoch": 0.41, + "learning_rate": 0.0009238250289265921, + "loss": 3.4446, + "step": 1405 + }, + { + "epoch": 0.41, + "learning_rate": 0.0009237005206670238, + "loss": 3.2476, + "step": 1406 + }, + { + "epoch": 0.41, + "learning_rate": 0.0009235759191404904, + "loss": 3.4619, + "step": 1407 + }, + { + "epoch": 0.41, + "learning_rate": 0.0009234512243744197, + "loss": 3.3443, + "step": 1408 + }, + { + "epoch": 0.41, + "learning_rate": 0.0009233264363962601, + "loss": 3.458, + "step": 1409 + }, + { + "epoch": 0.41, + "learning_rate": 0.0009232015552334807, + "loss": 3.4374, + "step": 1410 + }, + { + "epoch": 0.41, + "learning_rate": 0.0009230765809135708, + "loss": 3.4712, + "step": 1411 + }, + { + "epoch": 0.41, + "learning_rate": 0.0009229515134640405, + "loss": 3.4655, + "step": 1412 + }, + { + "epoch": 0.41, + "learning_rate": 0.0009228263529124198, + "loss": 3.3235, + "step": 1413 + }, + { + "epoch": 0.41, + "learning_rate": 0.0009227010992862603, + "loss": 3.4861, + "step": 1414 + }, + { + "epoch": 0.41, + "learning_rate": 0.0009225757526131328, + "loss": 3.4776, + "step": 1415 + }, + { + "epoch": 0.41, + "learning_rate": 0.0009224503129206294, + "loss": 3.4416, + "step": 1416 + }, + { + "epoch": 0.41, + "learning_rate": 0.0009223247802363628, + "loss": 3.414, + "step": 1417 + }, + { + "epoch": 0.41, + "learning_rate": 0.0009221991545879653, + "loss": 3.3897, + "step": 1418 + }, + { + "epoch": 0.41, + "learning_rate": 0.0009220734360030907, + "loss": 3.4098, + "step": 1419 + }, + { + "epoch": 0.41, + "learning_rate": 0.0009219476245094125, + "loss": 3.3968, + "step": 1420 + }, + { + "epoch": 0.41, + "learning_rate": 0.0009218217201346251, + "loss": 3.4156, + "step": 1421 + }, + { + "epoch": 0.41, + "learning_rate": 0.0009216957229064429, + "loss": 3.4565, + "step": 1422 + }, + { + "epoch": 0.41, + "learning_rate": 0.0009215696328526012, + "loss": 3.3153, + "step": 1423 + }, + { + "epoch": 0.41, + "learning_rate": 0.0009214434500008555, + "loss": 3.4571, + "step": 1424 + }, + { + "epoch": 0.41, + "learning_rate": 0.0009213171743789819, + "loss": 3.3284, + "step": 1425 + }, + { + "epoch": 0.41, + "learning_rate": 0.0009211908060147764, + "loss": 3.4729, + "step": 1426 + }, + { + "epoch": 0.41, + "learning_rate": 0.0009210643449360563, + "loss": 3.4779, + "step": 1427 + }, + { + "epoch": 0.41, + "learning_rate": 0.0009209377911706584, + "loss": 3.3601, + "step": 1428 + }, + { + "epoch": 0.41, + "learning_rate": 0.0009208111447464406, + "loss": 3.5096, + "step": 1429 + }, + { + "epoch": 0.41, + "learning_rate": 0.0009206844056912809, + "loss": 3.4168, + "step": 1430 + }, + { + "epoch": 0.41, + "learning_rate": 0.0009205575740330774, + "loss": 3.5034, + "step": 1431 + }, + { + "epoch": 0.41, + "learning_rate": 0.0009204306497997492, + "loss": 3.4343, + "step": 1432 + }, + { + "epoch": 0.42, + "learning_rate": 0.0009203036330192354, + "loss": 3.3579, + "step": 1433 + }, + { + "epoch": 0.42, + "learning_rate": 0.0009201765237194954, + "loss": 3.3178, + "step": 1434 + }, + { + "epoch": 0.42, + "learning_rate": 0.0009200493219285091, + "loss": 3.5167, + "step": 1435 + }, + { + "epoch": 0.42, + "learning_rate": 0.000919922027674277, + "loss": 3.3586, + "step": 1436 + }, + { + "epoch": 0.42, + "learning_rate": 0.0009197946409848195, + "loss": 3.3971, + "step": 1437 + }, + { + "epoch": 0.42, + "learning_rate": 0.0009196671618881775, + "loss": 3.4838, + "step": 1438 + }, + { + "epoch": 0.42, + "learning_rate": 0.0009195395904124127, + "loss": 3.3883, + "step": 1439 + }, + { + "epoch": 0.42, + "learning_rate": 0.0009194119265856063, + "loss": 3.3505, + "step": 1440 + }, + { + "epoch": 0.42, + "learning_rate": 0.0009192841704358603, + "loss": 3.4158, + "step": 1441 + }, + { + "epoch": 0.42, + "learning_rate": 0.0009191563219912973, + "loss": 3.3792, + "step": 1442 + }, + { + "epoch": 0.42, + "learning_rate": 0.0009190283812800596, + "loss": 3.3501, + "step": 1443 + }, + { + "epoch": 0.42, + "learning_rate": 0.0009189003483303103, + "loss": 3.4569, + "step": 1444 + }, + { + "epoch": 0.42, + "learning_rate": 0.0009187722231702326, + "loss": 3.3888, + "step": 1445 + }, + { + "epoch": 0.42, + "learning_rate": 0.0009186440058280298, + "loss": 3.3838, + "step": 1446 + }, + { + "epoch": 0.42, + "learning_rate": 0.0009185156963319261, + "loss": 3.4199, + "step": 1447 + }, + { + "epoch": 0.42, + "learning_rate": 0.0009183872947101652, + "loss": 3.4749, + "step": 1448 + }, + { + "epoch": 0.42, + "learning_rate": 0.0009182588009910118, + "loss": 3.3924, + "step": 1449 + }, + { + "epoch": 0.42, + "learning_rate": 0.0009181302152027503, + "loss": 3.4622, + "step": 1450 + }, + { + "epoch": 0.42, + "learning_rate": 0.0009180015373736856, + "loss": 3.3308, + "step": 1451 + }, + { + "epoch": 0.42, + "learning_rate": 0.0009178727675321432, + "loss": 3.4488, + "step": 1452 + }, + { + "epoch": 0.42, + "learning_rate": 0.0009177439057064682, + "loss": 3.3675, + "step": 1453 + }, + { + "epoch": 0.42, + "learning_rate": 0.0009176149519250264, + "loss": 3.437, + "step": 1454 + }, + { + "epoch": 0.42, + "learning_rate": 0.0009174859062162038, + "loss": 3.4502, + "step": 1455 + }, + { + "epoch": 0.42, + "learning_rate": 0.0009173567686084063, + "loss": 3.4229, + "step": 1456 + }, + { + "epoch": 0.42, + "learning_rate": 0.0009172275391300604, + "loss": 3.5385, + "step": 1457 + }, + { + "epoch": 0.42, + "learning_rate": 0.0009170982178096128, + "loss": 3.4238, + "step": 1458 + }, + { + "epoch": 0.42, + "learning_rate": 0.0009169688046755302, + "loss": 3.3664, + "step": 1459 + }, + { + "epoch": 0.42, + "learning_rate": 0.0009168392997562999, + "loss": 3.5207, + "step": 1460 + }, + { + "epoch": 0.42, + "learning_rate": 0.0009167097030804288, + "loss": 3.3588, + "step": 1461 + }, + { + "epoch": 0.42, + "learning_rate": 0.0009165800146764445, + "loss": 3.5378, + "step": 1462 + }, + { + "epoch": 0.42, + "learning_rate": 0.0009164502345728945, + "loss": 3.3924, + "step": 1463 + }, + { + "epoch": 0.42, + "learning_rate": 0.0009163203627983468, + "loss": 3.472, + "step": 1464 + }, + { + "epoch": 0.42, + "learning_rate": 0.0009161903993813892, + "loss": 3.4022, + "step": 1465 + }, + { + "epoch": 0.42, + "learning_rate": 0.0009160603443506299, + "loss": 3.404, + "step": 1466 + }, + { + "epoch": 0.42, + "learning_rate": 0.0009159301977346975, + "loss": 3.314, + "step": 1467 + }, + { + "epoch": 0.43, + "learning_rate": 0.0009157999595622399, + "loss": 3.2916, + "step": 1468 + }, + { + "epoch": 0.43, + "learning_rate": 0.0009156696298619264, + "loss": 3.3927, + "step": 1469 + }, + { + "epoch": 0.43, + "learning_rate": 0.0009155392086624454, + "loss": 3.3874, + "step": 1470 + }, + { + "epoch": 0.43, + "learning_rate": 0.0009154086959925061, + "loss": 3.5102, + "step": 1471 + }, + { + "epoch": 0.43, + "learning_rate": 0.0009152780918808372, + "loss": 3.4116, + "step": 1472 + }, + { + "epoch": 0.43, + "learning_rate": 0.0009151473963561883, + "loss": 3.5719, + "step": 1473 + }, + { + "epoch": 0.43, + "learning_rate": 0.0009150166094473283, + "loss": 3.4166, + "step": 1474 + }, + { + "epoch": 0.43, + "learning_rate": 0.000914885731183047, + "loss": 3.4619, + "step": 1475 + }, + { + "epoch": 0.43, + "learning_rate": 0.0009147547615921536, + "loss": 3.4612, + "step": 1476 + }, + { + "epoch": 0.43, + "learning_rate": 0.0009146237007034781, + "loss": 3.5615, + "step": 1477 + }, + { + "epoch": 0.43, + "learning_rate": 0.0009144925485458701, + "loss": 3.4768, + "step": 1478 + }, + { + "epoch": 0.43, + "learning_rate": 0.0009143613051481994, + "loss": 3.3689, + "step": 1479 + }, + { + "epoch": 0.43, + "learning_rate": 0.0009142299705393559, + "loss": 3.4848, + "step": 1480 + }, + { + "epoch": 0.43, + "learning_rate": 0.0009140985447482497, + "loss": 3.479, + "step": 1481 + }, + { + "epoch": 0.43, + "learning_rate": 0.0009139670278038108, + "loss": 3.27, + "step": 1482 + }, + { + "epoch": 0.43, + "learning_rate": 0.0009138354197349892, + "loss": 3.5017, + "step": 1483 + }, + { + "epoch": 0.43, + "learning_rate": 0.0009137037205707553, + "loss": 3.4301, + "step": 1484 + }, + { + "epoch": 0.43, + "learning_rate": 0.0009135719303400993, + "loss": 3.51, + "step": 1485 + }, + { + "epoch": 0.43, + "learning_rate": 0.0009134400490720313, + "loss": 3.493, + "step": 1486 + }, + { + "epoch": 0.43, + "learning_rate": 0.000913308076795582, + "loss": 3.3665, + "step": 1487 + }, + { + "epoch": 0.43, + "learning_rate": 0.0009131760135398015, + "loss": 3.2569, + "step": 1488 + }, + { + "epoch": 0.43, + "learning_rate": 0.0009130438593337602, + "loss": 3.4917, + "step": 1489 + }, + { + "epoch": 0.43, + "learning_rate": 0.0009129116142065486, + "loss": 3.3882, + "step": 1490 + }, + { + "epoch": 0.43, + "learning_rate": 0.0009127792781872769, + "loss": 3.4547, + "step": 1491 + }, + { + "epoch": 0.43, + "learning_rate": 0.0009126468513050758, + "loss": 3.5479, + "step": 1492 + }, + { + "epoch": 0.43, + "learning_rate": 0.0009125143335890957, + "loss": 3.326, + "step": 1493 + }, + { + "epoch": 0.43, + "learning_rate": 0.000912381725068507, + "loss": 3.3763, + "step": 1494 + }, + { + "epoch": 0.43, + "learning_rate": 0.0009122490257724999, + "loss": 3.4186, + "step": 1495 + }, + { + "epoch": 0.43, + "learning_rate": 0.000912116235730285, + "loss": 3.3406, + "step": 1496 + }, + { + "epoch": 0.43, + "learning_rate": 0.0009119833549710928, + "loss": 3.4783, + "step": 1497 + }, + { + "epoch": 0.43, + "learning_rate": 0.0009118503835241734, + "loss": 3.3811, + "step": 1498 + }, + { + "epoch": 0.43, + "learning_rate": 0.0009117173214187971, + "loss": 3.4911, + "step": 1499 + }, + { + "epoch": 0.43, + "learning_rate": 0.0009115841686842544, + "loss": 3.4621, + "step": 1500 + }, + { + "epoch": 0.43, + "learning_rate": 0.0009114509253498553, + "loss": 3.4161, + "step": 1501 + }, + { + "epoch": 0.43, + "learning_rate": 0.0009113175914449301, + "loss": 3.4388, + "step": 1502 + }, + { + "epoch": 0.44, + "learning_rate": 0.0009111841669988288, + "loss": 3.4246, + "step": 1503 + }, + { + "epoch": 0.44, + "learning_rate": 0.0009110506520409213, + "loss": 3.3159, + "step": 1504 + }, + { + "epoch": 0.44, + "learning_rate": 0.0009109170466005979, + "loss": 3.5645, + "step": 1505 + }, + { + "epoch": 0.44, + "learning_rate": 0.0009107833507072679, + "loss": 3.3976, + "step": 1506 + }, + { + "epoch": 0.44, + "learning_rate": 0.0009106495643903616, + "loss": 3.3902, + "step": 1507 + }, + { + "epoch": 0.44, + "learning_rate": 0.0009105156876793286, + "loss": 3.2418, + "step": 1508 + }, + { + "epoch": 0.44, + "learning_rate": 0.0009103817206036382, + "loss": 3.4133, + "step": 1509 + }, + { + "epoch": 0.44, + "learning_rate": 0.0009102476631927801, + "loss": 3.3553, + "step": 1510 + }, + { + "epoch": 0.44, + "learning_rate": 0.0009101135154762635, + "loss": 3.4636, + "step": 1511 + }, + { + "epoch": 0.44, + "learning_rate": 0.0009099792774836178, + "loss": 3.3586, + "step": 1512 + }, + { + "epoch": 0.44, + "learning_rate": 0.0009098449492443921, + "loss": 3.3726, + "step": 1513 + }, + { + "epoch": 0.44, + "learning_rate": 0.0009097105307881552, + "loss": 3.3504, + "step": 1514 + }, + { + "epoch": 0.44, + "learning_rate": 0.0009095760221444959, + "loss": 3.3367, + "step": 1515 + }, + { + "epoch": 0.44, + "learning_rate": 0.000909441423343023, + "loss": 3.4277, + "step": 1516 + }, + { + "epoch": 0.44, + "learning_rate": 0.0009093067344133652, + "loss": 3.4782, + "step": 1517 + }, + { + "epoch": 0.44, + "learning_rate": 0.0009091719553851706, + "loss": 3.3788, + "step": 1518 + }, + { + "epoch": 0.44, + "learning_rate": 0.0009090370862881074, + "loss": 3.4783, + "step": 1519 + }, + { + "epoch": 0.44, + "learning_rate": 0.0009089021271518639, + "loss": 3.2593, + "step": 1520 + }, + { + "epoch": 0.44, + "learning_rate": 0.0009087670780061475, + "loss": 3.4685, + "step": 1521 + }, + { + "epoch": 0.44, + "learning_rate": 0.0009086319388806863, + "loss": 3.448, + "step": 1522 + }, + { + "epoch": 0.44, + "learning_rate": 0.0009084967098052275, + "loss": 3.3183, + "step": 1523 + }, + { + "epoch": 0.44, + "learning_rate": 0.0009083613908095385, + "loss": 3.4089, + "step": 1524 + }, + { + "epoch": 0.44, + "learning_rate": 0.0009082259819234062, + "loss": 3.3306, + "step": 1525 + }, + { + "epoch": 0.44, + "learning_rate": 0.0009080904831766375, + "loss": 3.485, + "step": 1526 + }, + { + "epoch": 0.44, + "learning_rate": 0.0009079548945990592, + "loss": 3.442, + "step": 1527 + }, + { + "epoch": 0.44, + "learning_rate": 0.0009078192162205174, + "loss": 3.4291, + "step": 1528 + }, + { + "epoch": 0.44, + "learning_rate": 0.0009076834480708786, + "loss": 3.4991, + "step": 1529 + }, + { + "epoch": 0.44, + "learning_rate": 0.0009075475901800285, + "loss": 3.3433, + "step": 1530 + }, + { + "epoch": 0.44, + "learning_rate": 0.0009074116425778729, + "loss": 3.4155, + "step": 1531 + }, + { + "epoch": 0.44, + "learning_rate": 0.000907275605294337, + "loss": 3.4234, + "step": 1532 + }, + { + "epoch": 0.44, + "learning_rate": 0.0009071394783593663, + "loss": 3.4906, + "step": 1533 + }, + { + "epoch": 0.44, + "learning_rate": 0.0009070032618029255, + "loss": 3.4373, + "step": 1534 + }, + { + "epoch": 0.44, + "learning_rate": 0.0009068669556549993, + "loss": 3.3876, + "step": 1535 + }, + { + "epoch": 0.44, + "learning_rate": 0.000906730559945592, + "loss": 3.3738, + "step": 1536 + }, + { + "epoch": 0.45, + "learning_rate": 0.0009065940747047277, + "loss": 3.4782, + "step": 1537 + }, + { + "epoch": 0.45, + "learning_rate": 0.0009064574999624502, + "loss": 3.4552, + "step": 1538 + }, + { + "epoch": 0.45, + "learning_rate": 0.000906320835748823, + "loss": 3.5532, + "step": 1539 + }, + { + "epoch": 0.45, + "learning_rate": 0.0009061840820939291, + "loss": 3.4128, + "step": 1540 + }, + { + "epoch": 0.45, + "learning_rate": 0.0009060472390278717, + "loss": 3.4418, + "step": 1541 + }, + { + "epoch": 0.45, + "learning_rate": 0.000905910306580773, + "loss": 3.4463, + "step": 1542 + }, + { + "epoch": 0.45, + "learning_rate": 0.0009057732847827754, + "loss": 3.5065, + "step": 1543 + }, + { + "epoch": 0.45, + "learning_rate": 0.0009056361736640407, + "loss": 3.3871, + "step": 1544 + }, + { + "epoch": 0.45, + "learning_rate": 0.0009054989732547506, + "loss": 3.3726, + "step": 1545 + }, + { + "epoch": 0.45, + "learning_rate": 0.0009053616835851062, + "loss": 3.3427, + "step": 1546 + }, + { + "epoch": 0.45, + "learning_rate": 0.0009052243046853283, + "loss": 3.3802, + "step": 1547 + }, + { + "epoch": 0.45, + "learning_rate": 0.0009050868365856575, + "loss": 3.4166, + "step": 1548 + }, + { + "epoch": 0.45, + "learning_rate": 0.0009049492793163538, + "loss": 3.3535, + "step": 1549 + }, + { + "epoch": 0.45, + "learning_rate": 0.000904811632907697, + "loss": 3.3599, + "step": 1550 + }, + { + "epoch": 0.45, + "learning_rate": 0.0009046738973899866, + "loss": 3.5453, + "step": 1551 + }, + { + "epoch": 0.45, + "learning_rate": 0.0009045360727935414, + "loss": 3.3058, + "step": 1552 + }, + { + "epoch": 0.45, + "learning_rate": 0.0009043981591487002, + "loss": 3.3802, + "step": 1553 + }, + { + "epoch": 0.45, + "learning_rate": 0.0009042601564858213, + "loss": 3.5476, + "step": 1554 + }, + { + "epoch": 0.45, + "learning_rate": 0.0009041220648352819, + "loss": 3.4963, + "step": 1555 + }, + { + "epoch": 0.45, + "learning_rate": 0.0009039838842274801, + "loss": 3.3665, + "step": 1556 + }, + { + "epoch": 0.45, + "learning_rate": 0.0009038456146928325, + "loss": 3.3512, + "step": 1557 + }, + { + "epoch": 0.45, + "learning_rate": 0.0009037072562617757, + "loss": 3.4731, + "step": 1558 + }, + { + "epoch": 0.45, + "learning_rate": 0.0009035688089647659, + "loss": 3.386, + "step": 1559 + }, + { + "epoch": 0.45, + "learning_rate": 0.0009034302728322787, + "loss": 3.2112, + "step": 1560 + }, + { + "epoch": 0.45, + "learning_rate": 0.0009032916478948092, + "loss": 3.4905, + "step": 1561 + }, + { + "epoch": 0.45, + "learning_rate": 0.0009031529341828724, + "loss": 3.3176, + "step": 1562 + }, + { + "epoch": 0.45, + "learning_rate": 0.0009030141317270026, + "loss": 3.3519, + "step": 1563 + }, + { + "epoch": 0.45, + "learning_rate": 0.0009028752405577535, + "loss": 3.4219, + "step": 1564 + }, + { + "epoch": 0.45, + "learning_rate": 0.0009027362607056986, + "loss": 3.3804, + "step": 1565 + }, + { + "epoch": 0.45, + "learning_rate": 0.0009025971922014307, + "loss": 3.3938, + "step": 1566 + }, + { + "epoch": 0.45, + "learning_rate": 0.0009024580350755623, + "loss": 3.3151, + "step": 1567 + }, + { + "epoch": 0.45, + "learning_rate": 0.0009023187893587255, + "loss": 3.5547, + "step": 1568 + }, + { + "epoch": 0.45, + "learning_rate": 0.0009021794550815713, + "loss": 3.4192, + "step": 1569 + }, + { + "epoch": 0.45, + "learning_rate": 0.000902040032274771, + "loss": 3.392, + "step": 1570 + }, + { + "epoch": 0.45, + "learning_rate": 0.0009019005209690147, + "loss": 3.4099, + "step": 1571 + }, + { + "epoch": 0.46, + "learning_rate": 0.0009017609211950127, + "loss": 3.2996, + "step": 1572 + }, + { + "epoch": 0.46, + "learning_rate": 0.000901621232983494, + "loss": 3.4289, + "step": 1573 + }, + { + "epoch": 0.46, + "learning_rate": 0.0009014814563652077, + "loss": 3.3132, + "step": 1574 + }, + { + "epoch": 0.46, + "learning_rate": 0.0009013415913709217, + "loss": 3.3513, + "step": 1575 + }, + { + "epoch": 0.46, + "learning_rate": 0.000901201638031424, + "loss": 3.5028, + "step": 1576 + }, + { + "epoch": 0.46, + "learning_rate": 0.0009010615963775219, + "loss": 3.3635, + "step": 1577 + }, + { + "epoch": 0.46, + "learning_rate": 0.0009009214664400419, + "loss": 3.3513, + "step": 1578 + }, + { + "epoch": 0.46, + "learning_rate": 0.0009007812482498301, + "loss": 3.4214, + "step": 1579 + }, + { + "epoch": 0.46, + "learning_rate": 0.000900640941837752, + "loss": 3.3444, + "step": 1580 + }, + { + "epoch": 0.46, + "learning_rate": 0.0009005005472346923, + "loss": 3.3484, + "step": 1581 + }, + { + "epoch": 0.46, + "learning_rate": 0.0009003600644715557, + "loss": 3.5227, + "step": 1582 + }, + { + "epoch": 0.46, + "learning_rate": 0.0009002194935792656, + "loss": 3.3583, + "step": 1583 + }, + { + "epoch": 0.46, + "learning_rate": 0.0009000788345887654, + "loss": 3.2395, + "step": 1584 + }, + { + "epoch": 0.46, + "learning_rate": 0.0008999380875310175, + "loss": 3.434, + "step": 1585 + }, + { + "epoch": 0.46, + "learning_rate": 0.0008997972524370037, + "loss": 3.3589, + "step": 1586 + }, + { + "epoch": 0.46, + "learning_rate": 0.0008996563293377254, + "loss": 3.3679, + "step": 1587 + }, + { + "epoch": 0.46, + "learning_rate": 0.0008995153182642032, + "loss": 3.3344, + "step": 1588 + }, + { + "epoch": 0.46, + "learning_rate": 0.0008993742192474771, + "loss": 3.3964, + "step": 1589 + }, + { + "epoch": 0.46, + "learning_rate": 0.0008992330323186068, + "loss": 3.4193, + "step": 1590 + }, + { + "epoch": 0.46, + "learning_rate": 0.0008990917575086708, + "loss": 3.4544, + "step": 1591 + }, + { + "epoch": 0.46, + "learning_rate": 0.000898950394848767, + "loss": 3.4655, + "step": 1592 + }, + { + "epoch": 0.46, + "learning_rate": 0.0008988089443700131, + "loss": 3.4745, + "step": 1593 + }, + { + "epoch": 0.46, + "learning_rate": 0.0008986674061035456, + "loss": 3.4333, + "step": 1594 + }, + { + "epoch": 0.46, + "learning_rate": 0.0008985257800805209, + "loss": 3.3412, + "step": 1595 + }, + { + "epoch": 0.46, + "learning_rate": 0.0008983840663321141, + "loss": 3.3424, + "step": 1596 + }, + { + "epoch": 0.46, + "learning_rate": 0.00089824226488952, + "loss": 3.32, + "step": 1597 + }, + { + "epoch": 0.46, + "learning_rate": 0.0008981003757839527, + "loss": 3.4338, + "step": 1598 + }, + { + "epoch": 0.46, + "learning_rate": 0.0008979583990466454, + "loss": 3.2909, + "step": 1599 + }, + { + "epoch": 0.46, + "learning_rate": 0.0008978163347088507, + "loss": 3.3318, + "step": 1600 + }, + { + "epoch": 0.46, + "learning_rate": 0.0008976741828018402, + "loss": 3.3918, + "step": 1601 + }, + { + "epoch": 0.46, + "learning_rate": 0.0008975319433569055, + "loss": 3.3622, + "step": 1602 + }, + { + "epoch": 0.46, + "learning_rate": 0.0008973896164053569, + "loss": 3.4138, + "step": 1603 + }, + { + "epoch": 0.46, + "learning_rate": 0.0008972472019785243, + "loss": 3.3027, + "step": 1604 + }, + { + "epoch": 0.46, + "learning_rate": 0.000897104700107756, + "loss": 3.4346, + "step": 1605 + }, + { + "epoch": 0.47, + "learning_rate": 0.0008969621108244208, + "loss": 3.4899, + "step": 1606 + }, + { + "epoch": 0.47, + "learning_rate": 0.0008968194341599056, + "loss": 3.3582, + "step": 1607 + }, + { + "epoch": 0.47, + "learning_rate": 0.0008966766701456176, + "loss": 3.2857, + "step": 1608 + }, + { + "epoch": 0.47, + "learning_rate": 0.0008965338188129824, + "loss": 3.4243, + "step": 1609 + }, + { + "epoch": 0.47, + "learning_rate": 0.0008963908801934451, + "loss": 3.3265, + "step": 1610 + }, + { + "epoch": 0.47, + "learning_rate": 0.00089624785431847, + "loss": 3.3795, + "step": 1611 + }, + { + "epoch": 0.47, + "learning_rate": 0.0008961047412195409, + "loss": 3.3041, + "step": 1612 + }, + { + "epoch": 0.47, + "learning_rate": 0.0008959615409281603, + "loss": 3.3915, + "step": 1613 + }, + { + "epoch": 0.47, + "learning_rate": 0.0008958182534758501, + "loss": 3.4096, + "step": 1614 + }, + { + "epoch": 0.47, + "learning_rate": 0.0008956748788941514, + "loss": 3.441, + "step": 1615 + }, + { + "epoch": 0.47, + "learning_rate": 0.0008955314172146247, + "loss": 3.4681, + "step": 1616 + }, + { + "epoch": 0.47, + "learning_rate": 0.0008953878684688492, + "loss": 3.4491, + "step": 1617 + }, + { + "epoch": 0.47, + "learning_rate": 0.0008952442326884239, + "loss": 3.3392, + "step": 1618 + }, + { + "epoch": 0.47, + "learning_rate": 0.0008951005099049663, + "loss": 3.4295, + "step": 1619 + }, + { + "epoch": 0.47, + "learning_rate": 0.0008949567001501134, + "loss": 3.3247, + "step": 1620 + }, + { + "epoch": 0.47, + "learning_rate": 0.0008948128034555211, + "loss": 3.3867, + "step": 1621 + }, + { + "epoch": 0.47, + "learning_rate": 0.000894668819852865, + "loss": 3.3997, + "step": 1622 + }, + { + "epoch": 0.47, + "learning_rate": 0.0008945247493738391, + "loss": 3.3472, + "step": 1623 + }, + { + "epoch": 0.47, + "learning_rate": 0.0008943805920501571, + "loss": 3.3823, + "step": 1624 + }, + { + "epoch": 0.47, + "learning_rate": 0.0008942363479135516, + "loss": 3.453, + "step": 1625 + }, + { + "epoch": 0.47, + "learning_rate": 0.000894092016995774, + "loss": 3.4222, + "step": 1626 + }, + { + "epoch": 0.47, + "learning_rate": 0.0008939475993285956, + "loss": 3.2948, + "step": 1627 + }, + { + "epoch": 0.47, + "learning_rate": 0.0008938030949438059, + "loss": 3.3695, + "step": 1628 + }, + { + "epoch": 0.47, + "learning_rate": 0.0008936585038732142, + "loss": 3.2836, + "step": 1629 + }, + { + "epoch": 0.47, + "learning_rate": 0.0008935138261486482, + "loss": 3.4491, + "step": 1630 + }, + { + "epoch": 0.47, + "learning_rate": 0.0008933690618019552, + "loss": 3.3722, + "step": 1631 + }, + { + "epoch": 0.47, + "learning_rate": 0.0008932242108650015, + "loss": 3.4862, + "step": 1632 + }, + { + "epoch": 0.47, + "learning_rate": 0.0008930792733696725, + "loss": 3.3029, + "step": 1633 + }, + { + "epoch": 0.47, + "learning_rate": 0.0008929342493478721, + "loss": 3.3983, + "step": 1634 + }, + { + "epoch": 0.47, + "learning_rate": 0.0008927891388315241, + "loss": 3.296, + "step": 1635 + }, + { + "epoch": 0.47, + "learning_rate": 0.0008926439418525707, + "loss": 3.3845, + "step": 1636 + }, + { + "epoch": 0.47, + "learning_rate": 0.0008924986584429732, + "loss": 3.4169, + "step": 1637 + }, + { + "epoch": 0.47, + "learning_rate": 0.0008923532886347125, + "loss": 3.4701, + "step": 1638 + }, + { + "epoch": 0.47, + "learning_rate": 0.0008922078324597878, + "loss": 3.3467, + "step": 1639 + }, + { + "epoch": 0.47, + "learning_rate": 0.0008920622899502178, + "loss": 3.4706, + "step": 1640 + }, + { + "epoch": 0.48, + "learning_rate": 0.0008919166611380396, + "loss": 3.3655, + "step": 1641 + }, + { + "epoch": 0.48, + "learning_rate": 0.0008917709460553101, + "loss": 3.3584, + "step": 1642 + }, + { + "epoch": 0.48, + "learning_rate": 0.0008916251447341046, + "loss": 3.516, + "step": 1643 + }, + { + "epoch": 0.48, + "learning_rate": 0.0008914792572065178, + "loss": 3.4284, + "step": 1644 + }, + { + "epoch": 0.48, + "learning_rate": 0.0008913332835046628, + "loss": 3.3717, + "step": 1645 + }, + { + "epoch": 0.48, + "learning_rate": 0.0008911872236606723, + "loss": 3.3326, + "step": 1646 + }, + { + "epoch": 0.48, + "learning_rate": 0.0008910410777066975, + "loss": 3.276, + "step": 1647 + }, + { + "epoch": 0.48, + "learning_rate": 0.0008908948456749089, + "loss": 3.4644, + "step": 1648 + }, + { + "epoch": 0.48, + "learning_rate": 0.0008907485275974958, + "loss": 3.4554, + "step": 1649 + }, + { + "epoch": 0.48, + "learning_rate": 0.0008906021235066664, + "loss": 3.4571, + "step": 1650 + }, + { + "epoch": 0.48, + "learning_rate": 0.0008904556334346477, + "loss": 3.3694, + "step": 1651 + }, + { + "epoch": 0.48, + "learning_rate": 0.0008903090574136858, + "loss": 3.3984, + "step": 1652 + }, + { + "epoch": 0.48, + "learning_rate": 0.0008901623954760459, + "loss": 3.3359, + "step": 1653 + }, + { + "epoch": 0.48, + "learning_rate": 0.0008900156476540119, + "loss": 3.2889, + "step": 1654 + }, + { + "epoch": 0.48, + "learning_rate": 0.0008898688139798865, + "loss": 3.3656, + "step": 1655 + }, + { + "epoch": 0.48, + "learning_rate": 0.0008897218944859915, + "loss": 3.2288, + "step": 1656 + }, + { + "epoch": 0.48, + "learning_rate": 0.0008895748892046674, + "loss": 3.3871, + "step": 1657 + }, + { + "epoch": 0.48, + "learning_rate": 0.0008894277981682739, + "loss": 3.3536, + "step": 1658 + }, + { + "epoch": 0.48, + "learning_rate": 0.0008892806214091893, + "loss": 3.5329, + "step": 1659 + }, + { + "epoch": 0.48, + "learning_rate": 0.0008891333589598107, + "loss": 3.4646, + "step": 1660 + }, + { + "epoch": 0.48, + "learning_rate": 0.0008889860108525543, + "loss": 3.3202, + "step": 1661 + }, + { + "epoch": 0.48, + "learning_rate": 0.0008888385771198552, + "loss": 3.4292, + "step": 1662 + }, + { + "epoch": 0.48, + "learning_rate": 0.0008886910577941672, + "loss": 3.3812, + "step": 1663 + }, + { + "epoch": 0.48, + "learning_rate": 0.0008885434529079625, + "loss": 3.3298, + "step": 1664 + }, + { + "epoch": 0.48, + "learning_rate": 0.0008883957624937332, + "loss": 3.3088, + "step": 1665 + }, + { + "epoch": 0.48, + "learning_rate": 0.0008882479865839893, + "loss": 3.4496, + "step": 1666 + }, + { + "epoch": 0.48, + "learning_rate": 0.0008881001252112599, + "loss": 3.2832, + "step": 1667 + }, + { + "epoch": 0.48, + "learning_rate": 0.0008879521784080932, + "loss": 3.4611, + "step": 1668 + }, + { + "epoch": 0.48, + "learning_rate": 0.0008878041462070556, + "loss": 3.3892, + "step": 1669 + }, + { + "epoch": 0.48, + "learning_rate": 0.0008876560286407329, + "loss": 3.3901, + "step": 1670 + }, + { + "epoch": 0.48, + "learning_rate": 0.0008875078257417294, + "loss": 3.4996, + "step": 1671 + }, + { + "epoch": 0.48, + "learning_rate": 0.0008873595375426681, + "loss": 3.4846, + "step": 1672 + }, + { + "epoch": 0.48, + "learning_rate": 0.0008872111640761911, + "loss": 3.3232, + "step": 1673 + }, + { + "epoch": 0.48, + "learning_rate": 0.000887062705374959, + "loss": 3.4312, + "step": 1674 + }, + { + "epoch": 0.49, + "learning_rate": 0.0008869141614716511, + "loss": 3.3542, + "step": 1675 + }, + { + "epoch": 0.49, + "learning_rate": 0.0008867655323989656, + "loss": 3.3957, + "step": 1676 + }, + { + "epoch": 0.49, + "learning_rate": 0.0008866168181896197, + "loss": 3.4165, + "step": 1677 + }, + { + "epoch": 0.49, + "learning_rate": 0.0008864680188763488, + "loss": 3.4353, + "step": 1678 + }, + { + "epoch": 0.49, + "learning_rate": 0.0008863191344919074, + "loss": 3.4652, + "step": 1679 + }, + { + "epoch": 0.49, + "learning_rate": 0.0008861701650690686, + "loss": 3.424, + "step": 1680 + }, + { + "epoch": 0.49, + "learning_rate": 0.0008860211106406244, + "loss": 3.3843, + "step": 1681 + }, + { + "epoch": 0.49, + "learning_rate": 0.0008858719712393851, + "loss": 3.3123, + "step": 1682 + }, + { + "epoch": 0.49, + "learning_rate": 0.0008857227468981804, + "loss": 3.4461, + "step": 1683 + }, + { + "epoch": 0.49, + "learning_rate": 0.0008855734376498576, + "loss": 3.4767, + "step": 1684 + }, + { + "epoch": 0.49, + "learning_rate": 0.0008854240435272841, + "loss": 3.3446, + "step": 1685 + }, + { + "epoch": 0.49, + "learning_rate": 0.0008852745645633448, + "loss": 3.4318, + "step": 1686 + }, + { + "epoch": 0.49, + "learning_rate": 0.0008851250007909439, + "loss": 3.2335, + "step": 1687 + }, + { + "epoch": 0.49, + "learning_rate": 0.0008849753522430037, + "loss": 3.3963, + "step": 1688 + }, + { + "epoch": 0.49, + "learning_rate": 0.0008848256189524662, + "loss": 3.4311, + "step": 1689 + }, + { + "epoch": 0.49, + "learning_rate": 0.0008846758009522908, + "loss": 3.4011, + "step": 1690 + }, + { + "epoch": 0.49, + "learning_rate": 0.0008845258982754566, + "loss": 3.4417, + "step": 1691 + }, + { + "epoch": 0.49, + "learning_rate": 0.0008843759109549606, + "loss": 3.4263, + "step": 1692 + }, + { + "epoch": 0.49, + "learning_rate": 0.0008842258390238188, + "loss": 3.3227, + "step": 1693 + }, + { + "epoch": 0.49, + "learning_rate": 0.0008840756825150657, + "loss": 3.4103, + "step": 1694 + }, + { + "epoch": 0.49, + "learning_rate": 0.0008839254414617546, + "loss": 3.331, + "step": 1695 + }, + { + "epoch": 0.49, + "learning_rate": 0.0008837751158969571, + "loss": 3.417, + "step": 1696 + }, + { + "epoch": 0.49, + "learning_rate": 0.0008836247058537638, + "loss": 3.3434, + "step": 1697 + }, + { + "epoch": 0.49, + "learning_rate": 0.0008834742113652834, + "loss": 3.3571, + "step": 1698 + }, + { + "epoch": 0.49, + "learning_rate": 0.0008833236324646437, + "loss": 3.3045, + "step": 1699 + }, + { + "epoch": 0.49, + "learning_rate": 0.0008831729691849906, + "loss": 3.4677, + "step": 1700 + }, + { + "epoch": 0.49, + "learning_rate": 0.000883022221559489, + "loss": 3.288, + "step": 1701 + }, + { + "epoch": 0.49, + "learning_rate": 0.0008828713896213222, + "loss": 3.4299, + "step": 1702 + }, + { + "epoch": 0.49, + "learning_rate": 0.0008827204734036919, + "loss": 3.3239, + "step": 1703 + }, + { + "epoch": 0.49, + "learning_rate": 0.0008825694729398186, + "loss": 3.5415, + "step": 1704 + }, + { + "epoch": 0.49, + "learning_rate": 0.000882418388262941, + "loss": 3.4223, + "step": 1705 + }, + { + "epoch": 0.49, + "learning_rate": 0.0008822672194063171, + "loss": 3.378, + "step": 1706 + }, + { + "epoch": 0.49, + "learning_rate": 0.0008821159664032224, + "loss": 3.4145, + "step": 1707 + }, + { + "epoch": 0.49, + "learning_rate": 0.0008819646292869515, + "loss": 3.3313, + "step": 1708 + }, + { + "epoch": 0.49, + "learning_rate": 0.0008818132080908176, + "loss": 3.4405, + "step": 1709 + }, + { + "epoch": 0.5, + "learning_rate": 0.0008816617028481523, + "loss": 3.3754, + "step": 1710 + }, + { + "epoch": 0.5, + "learning_rate": 0.0008815101135923055, + "loss": 3.4556, + "step": 1711 + }, + { + "epoch": 0.5, + "learning_rate": 0.000881358440356646, + "loss": 3.376, + "step": 1712 + }, + { + "epoch": 0.5, + "learning_rate": 0.0008812066831745602, + "loss": 3.5598, + "step": 1713 + }, + { + "epoch": 0.5, + "learning_rate": 0.0008810548420794543, + "loss": 3.3608, + "step": 1714 + }, + { + "epoch": 0.5, + "learning_rate": 0.000880902917104752, + "loss": 3.3658, + "step": 1715 + }, + { + "epoch": 0.5, + "learning_rate": 0.0008807509082838957, + "loss": 3.3063, + "step": 1716 + }, + { + "epoch": 0.5, + "learning_rate": 0.0008805988156503464, + "loss": 3.3708, + "step": 1717 + }, + { + "epoch": 0.5, + "learning_rate": 0.0008804466392375833, + "loss": 3.3033, + "step": 1718 + }, + { + "epoch": 0.5, + "learning_rate": 0.0008802943790791045, + "loss": 3.3869, + "step": 1719 + }, + { + "epoch": 0.5, + "learning_rate": 0.0008801420352084259, + "loss": 3.4691, + "step": 1720 + }, + { + "epoch": 0.5, + "learning_rate": 0.0008799896076590823, + "loss": 3.4929, + "step": 1721 + }, + { + "epoch": 0.5, + "learning_rate": 0.0008798370964646268, + "loss": 3.3101, + "step": 1722 + }, + { + "epoch": 0.5, + "learning_rate": 0.0008796845016586308, + "loss": 3.3632, + "step": 1723 + }, + { + "epoch": 0.5, + "learning_rate": 0.0008795318232746842, + "loss": 3.4584, + "step": 1724 + }, + { + "epoch": 0.5, + "learning_rate": 0.0008793790613463954, + "loss": 3.519, + "step": 1725 + }, + { + "epoch": 0.5, + "learning_rate": 0.0008792262159073911, + "loss": 3.4006, + "step": 1726 + }, + { + "epoch": 0.5, + "learning_rate": 0.0008790732869913161, + "loss": 3.3995, + "step": 1727 + }, + { + "epoch": 0.5, + "learning_rate": 0.0008789202746318341, + "loss": 3.3605, + "step": 1728 + }, + { + "epoch": 0.5, + "learning_rate": 0.0008787671788626268, + "loss": 3.3151, + "step": 1729 + }, + { + "epoch": 0.5, + "learning_rate": 0.0008786139997173944, + "loss": 3.3407, + "step": 1730 + }, + { + "epoch": 0.5, + "learning_rate": 0.0008784607372298552, + "loss": 3.4603, + "step": 1731 + }, + { + "epoch": 0.5, + "learning_rate": 0.0008783073914337465, + "loss": 3.3278, + "step": 1732 + }, + { + "epoch": 0.5, + "learning_rate": 0.0008781539623628232, + "loss": 3.3586, + "step": 1733 + }, + { + "epoch": 0.5, + "learning_rate": 0.0008780004500508588, + "loss": 3.3956, + "step": 1734 + }, + { + "epoch": 0.5, + "learning_rate": 0.0008778468545316454, + "loss": 3.4038, + "step": 1735 + }, + { + "epoch": 0.5, + "learning_rate": 0.0008776931758389928, + "loss": 3.3723, + "step": 1736 + }, + { + "epoch": 0.5, + "learning_rate": 0.0008775394140067299, + "loss": 3.352, + "step": 1737 + }, + { + "epoch": 0.5, + "learning_rate": 0.0008773855690687031, + "loss": 3.5563, + "step": 1738 + }, + { + "epoch": 0.5, + "learning_rate": 0.0008772316410587779, + "loss": 3.359, + "step": 1739 + }, + { + "epoch": 0.5, + "learning_rate": 0.0008770776300108373, + "loss": 3.4804, + "step": 1740 + }, + { + "epoch": 0.5, + "learning_rate": 0.000876923535958783, + "loss": 3.3439, + "step": 1741 + }, + { + "epoch": 0.5, + "learning_rate": 0.0008767693589365352, + "loss": 3.4438, + "step": 1742 + }, + { + "epoch": 0.5, + "learning_rate": 0.0008766150989780317, + "loss": 3.3398, + "step": 1743 + }, + { + "epoch": 0.51, + "learning_rate": 0.0008764607561172292, + "loss": 3.3372, + "step": 1744 + }, + { + "epoch": 0.51, + "learning_rate": 0.0008763063303881022, + "loss": 3.4553, + "step": 1745 + }, + { + "epoch": 0.51, + "learning_rate": 0.0008761518218246436, + "loss": 3.4436, + "step": 1746 + }, + { + "epoch": 0.51, + "learning_rate": 0.0008759972304608648, + "loss": 3.4692, + "step": 1747 + }, + { + "epoch": 0.51, + "learning_rate": 0.0008758425563307951, + "loss": 3.2915, + "step": 1748 + }, + { + "epoch": 0.51, + "learning_rate": 0.0008756877994684818, + "loss": 3.4361, + "step": 1749 + }, + { + "epoch": 0.51, + "learning_rate": 0.0008755329599079911, + "loss": 3.2118, + "step": 1750 + }, + { + "epoch": 0.51, + "learning_rate": 0.0008753780376834069, + "loss": 3.6013, + "step": 1751 + }, + { + "epoch": 0.51, + "learning_rate": 0.0008752230328288313, + "loss": 3.4064, + "step": 1752 + }, + { + "epoch": 0.51, + "learning_rate": 0.0008750679453783847, + "loss": 3.3342, + "step": 1753 + }, + { + "epoch": 0.51, + "learning_rate": 0.0008749127753662059, + "loss": 3.2794, + "step": 1754 + }, + { + "epoch": 0.51, + "learning_rate": 0.0008747575228264515, + "loss": 3.39, + "step": 1755 + }, + { + "epoch": 0.51, + "learning_rate": 0.0008746021877932965, + "loss": 3.4412, + "step": 1756 + }, + { + "epoch": 0.51, + "learning_rate": 0.0008744467703009339, + "loss": 3.4493, + "step": 1757 + }, + { + "epoch": 0.51, + "learning_rate": 0.0008742912703835749, + "loss": 3.307, + "step": 1758 + }, + { + "epoch": 0.51, + "learning_rate": 0.000874135688075449, + "loss": 3.3551, + "step": 1759 + }, + { + "epoch": 0.51, + "learning_rate": 0.0008739800234108036, + "loss": 3.4491, + "step": 1760 + }, + { + "epoch": 0.51, + "learning_rate": 0.0008738242764239046, + "loss": 3.4067, + "step": 1761 + }, + { + "epoch": 0.51, + "learning_rate": 0.0008736684471490352, + "loss": 3.2745, + "step": 1762 + }, + { + "epoch": 0.51, + "learning_rate": 0.000873512535620498, + "loss": 3.3658, + "step": 1763 + }, + { + "epoch": 0.51, + "learning_rate": 0.0008733565418726125, + "loss": 3.4044, + "step": 1764 + }, + { + "epoch": 0.51, + "learning_rate": 0.0008732004659397169, + "loss": 3.3933, + "step": 1765 + }, + { + "epoch": 0.51, + "learning_rate": 0.0008730443078561674, + "loss": 3.3054, + "step": 1766 + }, + { + "epoch": 0.51, + "learning_rate": 0.0008728880676563383, + "loss": 3.4028, + "step": 1767 + }, + { + "epoch": 0.51, + "learning_rate": 0.0008727317453746218, + "loss": 3.4266, + "step": 1768 + }, + { + "epoch": 0.51, + "learning_rate": 0.0008725753410454284, + "loss": 3.4602, + "step": 1769 + }, + { + "epoch": 0.51, + "learning_rate": 0.0008724188547031866, + "loss": 3.2936, + "step": 1770 + }, + { + "epoch": 0.51, + "learning_rate": 0.0008722622863823427, + "loss": 3.3524, + "step": 1771 + }, + { + "epoch": 0.51, + "learning_rate": 0.0008721056361173616, + "loss": 3.3057, + "step": 1772 + }, + { + "epoch": 0.51, + "learning_rate": 0.0008719489039427256, + "loss": 3.3193, + "step": 1773 + }, + { + "epoch": 0.51, + "learning_rate": 0.0008717920898929355, + "loss": 3.3594, + "step": 1774 + }, + { + "epoch": 0.51, + "learning_rate": 0.0008716351940025097, + "loss": 3.3979, + "step": 1775 + }, + { + "epoch": 0.51, + "learning_rate": 0.0008714782163059852, + "loss": 3.3464, + "step": 1776 + }, + { + "epoch": 0.51, + "learning_rate": 0.0008713211568379166, + "loss": 3.3244, + "step": 1777 + }, + { + "epoch": 0.51, + "learning_rate": 0.0008711640156328764, + "loss": 3.2849, + "step": 1778 + }, + { + "epoch": 0.52, + "learning_rate": 0.0008710067927254555, + "loss": 3.3964, + "step": 1779 + }, + { + "epoch": 0.52, + "learning_rate": 0.0008708494881502622, + "loss": 3.3725, + "step": 1780 + }, + { + "epoch": 0.52, + "learning_rate": 0.0008706921019419236, + "loss": 3.4193, + "step": 1781 + }, + { + "epoch": 0.52, + "learning_rate": 0.0008705346341350839, + "loss": 3.5048, + "step": 1782 + }, + { + "epoch": 0.52, + "learning_rate": 0.0008703770847644058, + "loss": 3.3834, + "step": 1783 + }, + { + "epoch": 0.52, + "learning_rate": 0.0008702194538645698, + "loss": 3.3296, + "step": 1784 + }, + { + "epoch": 0.52, + "learning_rate": 0.0008700617414702745, + "loss": 3.5018, + "step": 1785 + }, + { + "epoch": 0.52, + "learning_rate": 0.000869903947616236, + "loss": 3.3917, + "step": 1786 + }, + { + "epoch": 0.52, + "learning_rate": 0.0008697460723371891, + "loss": 3.4325, + "step": 1787 + }, + { + "epoch": 0.52, + "learning_rate": 0.0008695881156678856, + "loss": 3.2443, + "step": 1788 + }, + { + "epoch": 0.52, + "learning_rate": 0.0008694300776430958, + "loss": 3.4001, + "step": 1789 + }, + { + "epoch": 0.52, + "learning_rate": 0.0008692719582976078, + "loss": 3.5383, + "step": 1790 + }, + { + "epoch": 0.52, + "learning_rate": 0.0008691137576662275, + "loss": 3.4524, + "step": 1791 + }, + { + "epoch": 0.52, + "learning_rate": 0.0008689554757837792, + "loss": 3.3572, + "step": 1792 + }, + { + "epoch": 0.52, + "learning_rate": 0.0008687971126851041, + "loss": 3.4432, + "step": 1793 + }, + { + "epoch": 0.52, + "learning_rate": 0.000868638668405062, + "loss": 3.5079, + "step": 1794 + }, + { + "epoch": 0.52, + "learning_rate": 0.0008684801429785305, + "loss": 3.3333, + "step": 1795 + }, + { + "epoch": 0.52, + "learning_rate": 0.0008683215364404051, + "loss": 3.291, + "step": 1796 + }, + { + "epoch": 0.52, + "learning_rate": 0.0008681628488255986, + "loss": 3.4844, + "step": 1797 + }, + { + "epoch": 0.52, + "learning_rate": 0.0008680040801690425, + "loss": 3.4243, + "step": 1798 + }, + { + "epoch": 0.52, + "learning_rate": 0.0008678452305056854, + "loss": 3.236, + "step": 1799 + }, + { + "epoch": 0.52, + "learning_rate": 0.0008676862998704944, + "loss": 3.2824, + "step": 1800 + }, + { + "epoch": 0.52, + "learning_rate": 0.0008675272882984536, + "loss": 3.4751, + "step": 1801 + }, + { + "epoch": 0.52, + "learning_rate": 0.0008673681958245657, + "loss": 3.3383, + "step": 1802 + }, + { + "epoch": 0.52, + "learning_rate": 0.0008672090224838508, + "loss": 3.3328, + "step": 1803 + }, + { + "epoch": 0.52, + "learning_rate": 0.000867049768311347, + "loss": 3.4494, + "step": 1804 + }, + { + "epoch": 0.52, + "learning_rate": 0.0008668904333421097, + "loss": 3.3132, + "step": 1805 + }, + { + "epoch": 0.52, + "learning_rate": 0.000866731017611213, + "loss": 3.292, + "step": 1806 + }, + { + "epoch": 0.52, + "learning_rate": 0.000866571521153748, + "loss": 3.278, + "step": 1807 + }, + { + "epoch": 0.52, + "learning_rate": 0.0008664119440048236, + "loss": 3.3565, + "step": 1808 + }, + { + "epoch": 0.52, + "learning_rate": 0.000866252286199567, + "loss": 3.3865, + "step": 1809 + }, + { + "epoch": 0.52, + "learning_rate": 0.0008660925477731226, + "loss": 3.2417, + "step": 1810 + }, + { + "epoch": 0.52, + "learning_rate": 0.0008659327287606531, + "loss": 3.4384, + "step": 1811 + }, + { + "epoch": 0.52, + "learning_rate": 0.0008657728291973383, + "loss": 3.5091, + "step": 1812 + }, + { + "epoch": 0.53, + "learning_rate": 0.0008656128491183762, + "loss": 3.323, + "step": 1813 + }, + { + "epoch": 0.53, + "learning_rate": 0.0008654527885589823, + "loss": 3.395, + "step": 1814 + }, + { + "epoch": 0.53, + "learning_rate": 0.0008652926475543898, + "loss": 3.2598, + "step": 1815 + }, + { + "epoch": 0.53, + "learning_rate": 0.00086513242613985, + "loss": 3.5224, + "step": 1816 + }, + { + "epoch": 0.53, + "learning_rate": 0.0008649721243506313, + "loss": 3.4678, + "step": 1817 + }, + { + "epoch": 0.53, + "learning_rate": 0.0008648117422220203, + "loss": 3.3788, + "step": 1818 + }, + { + "epoch": 0.53, + "learning_rate": 0.0008646512797893209, + "loss": 3.3157, + "step": 1819 + }, + { + "epoch": 0.53, + "learning_rate": 0.0008644907370878549, + "loss": 3.3316, + "step": 1820 + }, + { + "epoch": 0.53, + "learning_rate": 0.0008643301141529619, + "loss": 3.5024, + "step": 1821 + }, + { + "epoch": 0.53, + "learning_rate": 0.0008641694110199987, + "loss": 3.4628, + "step": 1822 + }, + { + "epoch": 0.53, + "learning_rate": 0.0008640086277243402, + "loss": 3.403, + "step": 1823 + }, + { + "epoch": 0.53, + "learning_rate": 0.0008638477643013787, + "loss": 3.4095, + "step": 1824 + }, + { + "epoch": 0.53, + "learning_rate": 0.0008636868207865244, + "loss": 3.3465, + "step": 1825 + }, + { + "epoch": 0.53, + "learning_rate": 0.0008635257972152046, + "loss": 3.3619, + "step": 1826 + }, + { + "epoch": 0.53, + "learning_rate": 0.0008633646936228649, + "loss": 3.289, + "step": 1827 + }, + { + "epoch": 0.53, + "learning_rate": 0.0008632035100449682, + "loss": 3.1592, + "step": 1828 + }, + { + "epoch": 0.53, + "learning_rate": 0.0008630422465169947, + "loss": 3.439, + "step": 1829 + }, + { + "epoch": 0.53, + "learning_rate": 0.0008628809030744427, + "loss": 3.4329, + "step": 1830 + }, + { + "epoch": 0.53, + "learning_rate": 0.0008627194797528277, + "loss": 3.3465, + "step": 1831 + }, + { + "epoch": 0.53, + "learning_rate": 0.0008625579765876832, + "loss": 3.4276, + "step": 1832 + }, + { + "epoch": 0.53, + "learning_rate": 0.00086239639361456, + "loss": 3.4684, + "step": 1833 + }, + { + "epoch": 0.53, + "learning_rate": 0.0008622347308690263, + "loss": 3.3125, + "step": 1834 + }, + { + "epoch": 0.53, + "learning_rate": 0.0008620729883866685, + "loss": 3.5461, + "step": 1835 + }, + { + "epoch": 0.53, + "learning_rate": 0.0008619111662030896, + "loss": 3.3789, + "step": 1836 + }, + { + "epoch": 0.53, + "learning_rate": 0.0008617492643539109, + "loss": 3.4293, + "step": 1837 + }, + { + "epoch": 0.53, + "learning_rate": 0.0008615872828747711, + "loss": 3.2324, + "step": 1838 + }, + { + "epoch": 0.53, + "learning_rate": 0.0008614252218013263, + "loss": 3.4566, + "step": 1839 + }, + { + "epoch": 0.53, + "learning_rate": 0.00086126308116925, + "loss": 3.2924, + "step": 1840 + }, + { + "epoch": 0.53, + "learning_rate": 0.0008611008610142335, + "loss": 3.1984, + "step": 1841 + }, + { + "epoch": 0.53, + "learning_rate": 0.0008609385613719854, + "loss": 3.3983, + "step": 1842 + }, + { + "epoch": 0.53, + "learning_rate": 0.0008607761822782317, + "loss": 3.458, + "step": 1843 + }, + { + "epoch": 0.53, + "learning_rate": 0.0008606137237687165, + "loss": 3.295, + "step": 1844 + }, + { + "epoch": 0.53, + "learning_rate": 0.0008604511858792005, + "loss": 3.2751, + "step": 1845 + }, + { + "epoch": 0.53, + "learning_rate": 0.0008602885686454627, + "loss": 3.2126, + "step": 1846 + }, + { + "epoch": 0.53, + "learning_rate": 0.0008601258721032988, + "loss": 3.4474, + "step": 1847 + }, + { + "epoch": 0.54, + "learning_rate": 0.0008599630962885226, + "loss": 3.4366, + "step": 1848 + }, + { + "epoch": 0.54, + "learning_rate": 0.0008598002412369647, + "loss": 3.335, + "step": 1849 + }, + { + "epoch": 0.54, + "learning_rate": 0.0008596373069844741, + "loss": 3.3877, + "step": 1850 + }, + { + "epoch": 0.54, + "learning_rate": 0.0008594742935669164, + "loss": 3.3503, + "step": 1851 + }, + { + "epoch": 0.54, + "learning_rate": 0.0008593112010201747, + "loss": 3.3764, + "step": 1852 + }, + { + "epoch": 0.54, + "learning_rate": 0.0008591480293801499, + "loss": 3.3217, + "step": 1853 + }, + { + "epoch": 0.54, + "learning_rate": 0.00085898477868276, + "loss": 3.3967, + "step": 1854 + }, + { + "epoch": 0.54, + "learning_rate": 0.0008588214489639407, + "loss": 3.3666, + "step": 1855 + }, + { + "epoch": 0.54, + "learning_rate": 0.0008586580402596447, + "loss": 3.4239, + "step": 1856 + }, + { + "epoch": 0.54, + "learning_rate": 0.0008584945526058425, + "loss": 3.3969, + "step": 1857 + }, + { + "epoch": 0.54, + "learning_rate": 0.0008583309860385216, + "loss": 3.3483, + "step": 1858 + }, + { + "epoch": 0.54, + "learning_rate": 0.0008581673405936872, + "loss": 3.3936, + "step": 1859 + }, + { + "epoch": 0.54, + "learning_rate": 0.0008580036163073615, + "loss": 3.4937, + "step": 1860 + }, + { + "epoch": 0.54, + "learning_rate": 0.0008578398132155845, + "loss": 3.2547, + "step": 1861 + }, + { + "epoch": 0.54, + "learning_rate": 0.0008576759313544131, + "loss": 3.3604, + "step": 1862 + }, + { + "epoch": 0.54, + "learning_rate": 0.0008575119707599219, + "loss": 3.2823, + "step": 1863 + }, + { + "epoch": 0.54, + "learning_rate": 0.0008573479314682026, + "loss": 3.4998, + "step": 1864 + }, + { + "epoch": 0.54, + "learning_rate": 0.0008571838135153644, + "loss": 3.1938, + "step": 1865 + }, + { + "epoch": 0.54, + "learning_rate": 0.0008570196169375337, + "loss": 3.282, + "step": 1866 + }, + { + "epoch": 0.54, + "learning_rate": 0.0008568553417708542, + "loss": 3.3254, + "step": 1867 + }, + { + "epoch": 0.54, + "learning_rate": 0.000856690988051487, + "loss": 3.3979, + "step": 1868 + }, + { + "epoch": 0.54, + "learning_rate": 0.0008565265558156101, + "loss": 3.2725, + "step": 1869 + }, + { + "epoch": 0.54, + "learning_rate": 0.0008563620450994196, + "loss": 3.313, + "step": 1870 + }, + { + "epoch": 0.54, + "learning_rate": 0.0008561974559391279, + "loss": 3.2979, + "step": 1871 + }, + { + "epoch": 0.54, + "learning_rate": 0.0008560327883709656, + "loss": 3.4767, + "step": 1872 + }, + { + "epoch": 0.54, + "learning_rate": 0.0008558680424311796, + "loss": 3.3036, + "step": 1873 + }, + { + "epoch": 0.54, + "learning_rate": 0.0008557032181560351, + "loss": 3.4406, + "step": 1874 + }, + { + "epoch": 0.54, + "learning_rate": 0.0008555383155818135, + "loss": 3.4227, + "step": 1875 + }, + { + "epoch": 0.54, + "learning_rate": 0.0008553733347448143, + "loss": 3.3977, + "step": 1876 + }, + { + "epoch": 0.54, + "learning_rate": 0.0008552082756813537, + "loss": 3.3401, + "step": 1877 + }, + { + "epoch": 0.54, + "learning_rate": 0.0008550431384277652, + "loss": 3.2511, + "step": 1878 + }, + { + "epoch": 0.54, + "learning_rate": 0.0008548779230204, + "loss": 3.4007, + "step": 1879 + }, + { + "epoch": 0.54, + "learning_rate": 0.0008547126294956255, + "loss": 3.3354, + "step": 1880 + }, + { + "epoch": 0.54, + "learning_rate": 0.0008545472578898274, + "loss": 3.4504, + "step": 1881 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008543818082394077, + "loss": 3.3961, + "step": 1882 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008542162805807862, + "loss": 3.2174, + "step": 1883 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008540506749503996, + "loss": 3.2711, + "step": 1884 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008538849913847017, + "loss": 3.345, + "step": 1885 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008537192299201637, + "loss": 3.2601, + "step": 1886 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008535533905932737, + "loss": 3.4235, + "step": 1887 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008533874734405371, + "loss": 3.4112, + "step": 1888 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008532214784984762, + "loss": 3.3361, + "step": 1889 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008530554058036308, + "loss": 3.3305, + "step": 1890 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008528892553925576, + "loss": 3.4485, + "step": 1891 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008527230273018303, + "loss": 3.3027, + "step": 1892 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008525567215680398, + "loss": 3.2698, + "step": 1893 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008523903382277941, + "loss": 3.4226, + "step": 1894 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008522238773177184, + "loss": 3.3046, + "step": 1895 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008520573388744548, + "loss": 3.3348, + "step": 1896 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008518907229346626, + "loss": 3.3518, + "step": 1897 + }, + { + "epoch": 0.55, + "learning_rate": 0.000851724029535018, + "loss": 3.4021, + "step": 1898 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008515572587122143, + "loss": 3.3515, + "step": 1899 + }, + { + "epoch": 0.55, + "learning_rate": 0.000851390410502962, + "loss": 3.3212, + "step": 1900 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008512234849439886, + "loss": 3.4275, + "step": 1901 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008510564820720383, + "loss": 3.2734, + "step": 1902 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008508894019238726, + "loss": 3.5278, + "step": 1903 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008507222445362701, + "loss": 3.5169, + "step": 1904 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008505550099460264, + "loss": 3.468, + "step": 1905 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008503876981899536, + "loss": 3.3539, + "step": 1906 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008502203093048815, + "loss": 3.2993, + "step": 1907 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008500528433276562, + "loss": 3.3819, + "step": 1908 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008498853002951414, + "loss": 3.3609, + "step": 1909 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008497176802442175, + "loss": 3.4236, + "step": 1910 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008495499832117815, + "loss": 3.3728, + "step": 1911 + }, + { + "epoch": 0.55, + "learning_rate": 0.000849382209234748, + "loss": 3.2567, + "step": 1912 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008492143583500479, + "loss": 3.4208, + "step": 1913 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008490464305946295, + "loss": 3.2458, + "step": 1914 + }, + { + "epoch": 0.55, + "learning_rate": 0.0008488784260054579, + "loss": 3.4283, + "step": 1915 + }, + { + "epoch": 0.55, + "learning_rate": 0.000848710344619515, + "loss": 3.3848, + "step": 1916 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008485421864737996, + "loss": 3.4151, + "step": 1917 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008483739516053274, + "loss": 3.2856, + "step": 1918 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008482056400511315, + "loss": 3.3593, + "step": 1919 + }, + { + "epoch": 0.56, + "learning_rate": 0.000848037251848261, + "loss": 3.3285, + "step": 1920 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008478687870337824, + "loss": 3.4107, + "step": 1921 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008477002456447791, + "loss": 3.4317, + "step": 1922 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008475316277183509, + "loss": 3.3094, + "step": 1923 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008473629332916153, + "loss": 3.3118, + "step": 1924 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008471941624017057, + "loss": 3.5174, + "step": 1925 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008470253150857729, + "loss": 3.3851, + "step": 1926 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008468563913809843, + "loss": 3.489, + "step": 1927 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008466873913245243, + "loss": 3.4461, + "step": 1928 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008465183149535939, + "loss": 3.2474, + "step": 1929 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008463491623054109, + "loss": 3.3681, + "step": 1930 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008461799334172104, + "loss": 3.3447, + "step": 1931 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008460106283262432, + "loss": 3.3607, + "step": 1932 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008458412470697781, + "loss": 3.5179, + "step": 1933 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008456717896851, + "loss": 3.3276, + "step": 1934 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008455022562095104, + "loss": 3.3412, + "step": 1935 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008453326466803281, + "loss": 3.4028, + "step": 1936 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008451629611348881, + "loss": 3.4494, + "step": 1937 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008449931996105427, + "loss": 3.3164, + "step": 1938 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008448233621446603, + "loss": 3.3388, + "step": 1939 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008446534487746265, + "loss": 3.3252, + "step": 1940 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008444834595378434, + "loss": 3.2484, + "step": 1941 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008443133944717298, + "loss": 3.5031, + "step": 1942 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008441432536137213, + "loss": 3.4053, + "step": 1943 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008439730370012699, + "loss": 3.3728, + "step": 1944 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008438027446718449, + "loss": 3.339, + "step": 1945 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008436323766629314, + "loss": 3.2442, + "step": 1946 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008434619330120319, + "loss": 3.3336, + "step": 1947 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008432914137566651, + "loss": 3.2944, + "step": 1948 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008431208189343669, + "loss": 3.2119, + "step": 1949 + }, + { + "epoch": 0.56, + "learning_rate": 0.0008429501485826889, + "loss": 3.3018, + "step": 1950 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008427794027392002, + "loss": 3.3234, + "step": 1951 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008426085814414859, + "loss": 3.2862, + "step": 1952 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008424376847271482, + "loss": 3.3701, + "step": 1953 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008422667126338057, + "loss": 3.3421, + "step": 1954 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008420956651990937, + "loss": 3.3834, + "step": 1955 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008419245424606638, + "loss": 3.3293, + "step": 1956 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008417533444561843, + "loss": 3.3661, + "step": 1957 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008415820712233402, + "loss": 3.2568, + "step": 1958 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008414107227998329, + "loss": 3.364, + "step": 1959 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008412392992233806, + "loss": 3.3625, + "step": 1960 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008410678005317177, + "loss": 3.3368, + "step": 1961 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008408962267625954, + "loss": 3.4848, + "step": 1962 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008407245779537812, + "loss": 3.3013, + "step": 1963 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008405528541430594, + "loss": 3.3423, + "step": 1964 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008403810553682306, + "loss": 3.2173, + "step": 1965 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008402091816671121, + "loss": 3.4899, + "step": 1966 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008400372330775372, + "loss": 3.4638, + "step": 1967 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008398652096373565, + "loss": 3.3639, + "step": 1968 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008396931113844362, + "loss": 3.4823, + "step": 1969 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008395209383566596, + "loss": 3.3618, + "step": 1970 + }, + { + "epoch": 0.57, + "learning_rate": 0.000839348690591926, + "loss": 3.397, + "step": 1971 + }, + { + "epoch": 0.57, + "learning_rate": 0.000839176368128152, + "loss": 3.2737, + "step": 1972 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008390039710032692, + "loss": 3.3572, + "step": 1973 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008388314992552271, + "loss": 3.2798, + "step": 1974 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008386589529219908, + "loss": 3.3979, + "step": 1975 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008384863320415419, + "loss": 3.4746, + "step": 1976 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008383136366518787, + "loss": 3.4024, + "step": 1977 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008381408667910157, + "loss": 3.4492, + "step": 1978 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008379680224969837, + "loss": 3.3682, + "step": 1979 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008377951038078302, + "loss": 3.2377, + "step": 1980 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008376221107616186, + "loss": 3.3355, + "step": 1981 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008374490433964292, + "loss": 3.4091, + "step": 1982 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008372759017503584, + "loss": 3.3204, + "step": 1983 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008371026858615188, + "loss": 3.3658, + "step": 1984 + }, + { + "epoch": 0.57, + "learning_rate": 0.0008369293957680396, + "loss": 3.2496, + "step": 1985 + }, + { + "epoch": 0.58, + "learning_rate": 0.0008367560315080663, + "loss": 3.3147, + "step": 1986 + }, + { + "epoch": 0.58, + "learning_rate": 0.0008365825931197603, + "loss": 3.3209, + "step": 1987 + }, + { + "epoch": 0.58, + "learning_rate": 0.0008364090806413003, + "loss": 3.3813, + "step": 1988 + }, + { + "epoch": 0.58, + "learning_rate": 0.0008362354941108802, + "loss": 3.2619, + "step": 1989 + }, + { + "epoch": 0.58, + "learning_rate": 0.0008360618335667108, + "loss": 3.4472, + "step": 1990 + }, + { + "epoch": 0.58, + "learning_rate": 0.0008358880990470192, + "loss": 3.2791, + "step": 1991 + }, + { + "epoch": 0.58, + "learning_rate": 0.0008357142905900485, + "loss": 3.5078, + "step": 1992 + }, + { + "epoch": 0.58, + "learning_rate": 0.0008355404082340585, + "loss": 3.2592, + "step": 1993 + }, + { + "epoch": 0.58, + "learning_rate": 0.0008353664520173248, + "loss": 3.3373, + "step": 1994 + }, + { + "epoch": 0.58, + "learning_rate": 0.0008351924219781393, + "loss": 3.544, + "step": 1995 + }, + { + "epoch": 0.58, + "learning_rate": 0.0008350183181548106, + "loss": 3.3556, + "step": 1996 + }, + { + "epoch": 0.58, + "learning_rate": 0.0008348441405856633, + "loss": 3.3404, + "step": 1997 + }, + { + "epoch": 0.58, + "learning_rate": 0.0008346698893090379, + "loss": 3.3787, + "step": 1998 + }, + { + "epoch": 0.58, + "learning_rate": 0.0008344955643632917, + "loss": 3.2588, + "step": 1999 + }, + { + "epoch": 0.58, + "learning_rate": 0.0008343211657867978, + "loss": 3.3134, + "step": 2000 + }, + { + "epoch": 0.58, + "learning_rate": 0.0008341466936179455, + "loss": 3.3613, + "step": 2001 + }, + { + "epoch": 0.58, + "learning_rate": 0.0008339721478951408, + "loss": 3.3776, + "step": 2002 + }, + { + "epoch": 0.58, + "learning_rate": 0.0008337975286568052, + "loss": 3.3187, + "step": 2003 + }, + { + "epoch": 0.58, + "learning_rate": 0.0008336228359413769, + "loss": 3.2765, + "step": 2004 + }, + { + "epoch": 0.58, + "learning_rate": 0.00083344806978731, + "loss": 3.4083, + "step": 2005 + }, + { + "epoch": 0.58, + "learning_rate": 0.0008332732302330748, + "loss": 3.3402, + "step": 2006 + }, + { + "epoch": 0.58, + "learning_rate": 0.000833098317317158, + "loss": 3.4671, + "step": 2007 + }, + { + "epoch": 0.58, + "learning_rate": 0.0008329233310780622, + "loss": 3.1967, + "step": 2008 + }, + { + "epoch": 0.58, + "learning_rate": 0.000832748271554306, + "loss": 3.2754, + "step": 2009 + }, + { + "epoch": 0.58, + "learning_rate": 0.0008325731387844247, + "loss": 3.3834, + "step": 2010 + }, + { + "epoch": 0.58, + "learning_rate": 0.0008323979328069688, + "loss": 3.4381, + "step": 2011 + }, + { + "epoch": 0.58, + "learning_rate": 0.0008322226536605062, + "loss": 3.3755, + "step": 2012 + }, + { + "epoch": 0.58, + "learning_rate": 0.0008320473013836196, + "loss": 3.3647, + "step": 2013 + }, + { + "epoch": 0.58, + "learning_rate": 0.0008318718760149085, + "loss": 3.2551, + "step": 2014 + }, + { + "epoch": 0.58, + "learning_rate": 0.0008316963775929885, + "loss": 3.3266, + "step": 2015 + }, + { + "epoch": 0.58, + "learning_rate": 0.0008315208061564909, + "loss": 3.3726, + "step": 2016 + }, + { + "epoch": 0.58, + "learning_rate": 0.0008313451617440637, + "loss": 3.3969, + "step": 2017 + }, + { + "epoch": 0.58, + "learning_rate": 0.0008311694443943702, + "loss": 3.3384, + "step": 2018 + }, + { + "epoch": 0.58, + "learning_rate": 0.0008309936541460904, + "loss": 3.4492, + "step": 2019 + }, + { + "epoch": 0.59, + "learning_rate": 0.0008308177910379199, + "loss": 3.4375, + "step": 2020 + }, + { + "epoch": 0.59, + "learning_rate": 0.0008306418551085706, + "loss": 3.3988, + "step": 2021 + }, + { + "epoch": 0.59, + "learning_rate": 0.0008304658463967704, + "loss": 3.3608, + "step": 2022 + }, + { + "epoch": 0.59, + "learning_rate": 0.0008302897649412632, + "loss": 3.2904, + "step": 2023 + }, + { + "epoch": 0.59, + "learning_rate": 0.0008301136107808086, + "loss": 3.4792, + "step": 2024 + }, + { + "epoch": 0.59, + "learning_rate": 0.0008299373839541828, + "loss": 3.3866, + "step": 2025 + }, + { + "epoch": 0.59, + "learning_rate": 0.0008297610845001775, + "loss": 3.5166, + "step": 2026 + }, + { + "epoch": 0.59, + "learning_rate": 0.0008295847124576007, + "loss": 3.3389, + "step": 2027 + }, + { + "epoch": 0.59, + "learning_rate": 0.0008294082678652761, + "loss": 3.3418, + "step": 2028 + }, + { + "epoch": 0.59, + "learning_rate": 0.0008292317507620437, + "loss": 3.308, + "step": 2029 + }, + { + "epoch": 0.59, + "learning_rate": 0.0008290551611867593, + "loss": 3.399, + "step": 2030 + }, + { + "epoch": 0.59, + "learning_rate": 0.0008288784991782945, + "loss": 3.3189, + "step": 2031 + }, + { + "epoch": 0.59, + "learning_rate": 0.0008287017647755368, + "loss": 3.2981, + "step": 2032 + }, + { + "epoch": 0.59, + "learning_rate": 0.0008285249580173901, + "loss": 3.3544, + "step": 2033 + }, + { + "epoch": 0.59, + "learning_rate": 0.000828348078942774, + "loss": 3.28, + "step": 2034 + }, + { + "epoch": 0.59, + "learning_rate": 0.0008281711275906237, + "loss": 3.2638, + "step": 2035 + }, + { + "epoch": 0.59, + "learning_rate": 0.0008279941039998906, + "loss": 3.3358, + "step": 2036 + }, + { + "epoch": 0.59, + "learning_rate": 0.0008278170082095422, + "loss": 3.3871, + "step": 2037 + }, + { + "epoch": 0.59, + "learning_rate": 0.0008276398402585614, + "loss": 3.4146, + "step": 2038 + }, + { + "epoch": 0.59, + "learning_rate": 0.0008274626001859475, + "loss": 3.3006, + "step": 2039 + }, + { + "epoch": 0.59, + "learning_rate": 0.0008272852880307154, + "loss": 3.41, + "step": 2040 + }, + { + "epoch": 0.59, + "learning_rate": 0.0008271079038318955, + "loss": 3.3944, + "step": 2041 + }, + { + "epoch": 0.59, + "learning_rate": 0.0008269304476285349, + "loss": 3.2374, + "step": 2042 + }, + { + "epoch": 0.59, + "learning_rate": 0.0008267529194596959, + "loss": 3.3536, + "step": 2043 + }, + { + "epoch": 0.59, + "learning_rate": 0.0008265753193644568, + "loss": 3.3419, + "step": 2044 + }, + { + "epoch": 0.59, + "learning_rate": 0.000826397647381912, + "loss": 3.328, + "step": 2045 + }, + { + "epoch": 0.59, + "learning_rate": 0.0008262199035511713, + "loss": 3.4855, + "step": 2046 + }, + { + "epoch": 0.59, + "learning_rate": 0.0008260420879113605, + "loss": 3.412, + "step": 2047 + }, + { + "epoch": 0.59, + "learning_rate": 0.0008258642005016213, + "loss": 3.3747, + "step": 2048 + }, + { + "epoch": 0.59, + "learning_rate": 0.0008256862413611113, + "loss": 3.3767, + "step": 2049 + }, + { + "epoch": 0.59, + "learning_rate": 0.0008255082105290033, + "loss": 3.4244, + "step": 2050 + }, + { + "epoch": 0.59, + "learning_rate": 0.0008253301080444864, + "loss": 3.3287, + "step": 2051 + }, + { + "epoch": 0.59, + "learning_rate": 0.0008251519339467657, + "loss": 3.3744, + "step": 2052 + }, + { + "epoch": 0.59, + "learning_rate": 0.0008249736882750614, + "loss": 3.2847, + "step": 2053 + }, + { + "epoch": 0.59, + "learning_rate": 0.0008247953710686098, + "loss": 3.3765, + "step": 2054 + }, + { + "epoch": 0.6, + "learning_rate": 0.000824616982366663, + "loss": 3.3201, + "step": 2055 + }, + { + "epoch": 0.6, + "learning_rate": 0.0008244385222084889, + "loss": 3.3447, + "step": 2056 + }, + { + "epoch": 0.6, + "learning_rate": 0.0008242599906333707, + "loss": 3.4563, + "step": 2057 + }, + { + "epoch": 0.6, + "learning_rate": 0.0008240813876806079, + "loss": 3.38, + "step": 2058 + }, + { + "epoch": 0.6, + "learning_rate": 0.0008239027133895153, + "loss": 3.4187, + "step": 2059 + }, + { + "epoch": 0.6, + "learning_rate": 0.0008237239677994234, + "loss": 3.2578, + "step": 2060 + }, + { + "epoch": 0.6, + "learning_rate": 0.0008235451509496788, + "loss": 3.3162, + "step": 2061 + }, + { + "epoch": 0.6, + "learning_rate": 0.0008233662628796435, + "loss": 3.5046, + "step": 2062 + }, + { + "epoch": 0.6, + "learning_rate": 0.0008231873036286947, + "loss": 3.327, + "step": 2063 + }, + { + "epoch": 0.6, + "learning_rate": 0.0008230082732362264, + "loss": 3.3191, + "step": 2064 + }, + { + "epoch": 0.6, + "learning_rate": 0.0008228291717416471, + "loss": 3.3624, + "step": 2065 + }, + { + "epoch": 0.6, + "learning_rate": 0.000822649999184382, + "loss": 3.245, + "step": 2066 + }, + { + "epoch": 0.6, + "learning_rate": 0.0008224707556038711, + "loss": 3.3217, + "step": 2067 + }, + { + "epoch": 0.6, + "learning_rate": 0.00082229144103957, + "loss": 3.3457, + "step": 2068 + }, + { + "epoch": 0.6, + "learning_rate": 0.000822112055530951, + "loss": 3.3784, + "step": 2069 + }, + { + "epoch": 0.6, + "learning_rate": 0.0008219325991175009, + "loss": 3.293, + "step": 2070 + }, + { + "epoch": 0.6, + "learning_rate": 0.0008217530718387223, + "loss": 3.3497, + "step": 2071 + }, + { + "epoch": 0.6, + "learning_rate": 0.000821573473734134, + "loss": 3.4371, + "step": 2072 + }, + { + "epoch": 0.6, + "learning_rate": 0.0008213938048432696, + "loss": 3.2978, + "step": 2073 + }, + { + "epoch": 0.6, + "learning_rate": 0.000821214065205679, + "loss": 3.2809, + "step": 2074 + }, + { + "epoch": 0.6, + "learning_rate": 0.0008210342548609273, + "loss": 3.2783, + "step": 2075 + }, + { + "epoch": 0.6, + "learning_rate": 0.000820854373848595, + "loss": 3.2221, + "step": 2076 + }, + { + "epoch": 0.6, + "learning_rate": 0.0008206744222082785, + "loss": 3.3784, + "step": 2077 + }, + { + "epoch": 0.6, + "learning_rate": 0.0008204943999795896, + "loss": 3.4696, + "step": 2078 + }, + { + "epoch": 0.6, + "learning_rate": 0.0008203143072021555, + "loss": 3.3715, + "step": 2079 + }, + { + "epoch": 0.6, + "learning_rate": 0.0008201341439156194, + "loss": 3.3256, + "step": 2080 + }, + { + "epoch": 0.6, + "learning_rate": 0.0008199539101596393, + "loss": 3.386, + "step": 2081 + }, + { + "epoch": 0.6, + "learning_rate": 0.0008197736059738894, + "loss": 3.1622, + "step": 2082 + }, + { + "epoch": 0.6, + "learning_rate": 0.000819593231398059, + "loss": 3.2572, + "step": 2083 + }, + { + "epoch": 0.6, + "learning_rate": 0.0008194127864718529, + "loss": 3.4193, + "step": 2084 + }, + { + "epoch": 0.6, + "learning_rate": 0.0008192322712349917, + "loss": 3.3013, + "step": 2085 + }, + { + "epoch": 0.6, + "learning_rate": 0.0008190516857272113, + "loss": 3.3038, + "step": 2086 + }, + { + "epoch": 0.6, + "learning_rate": 0.0008188710299882628, + "loss": 3.3601, + "step": 2087 + }, + { + "epoch": 0.6, + "learning_rate": 0.0008186903040579131, + "loss": 3.4277, + "step": 2088 + }, + { + "epoch": 0.6, + "learning_rate": 0.0008185095079759444, + "loss": 3.166, + "step": 2089 + }, + { + "epoch": 0.61, + "learning_rate": 0.0008183286417821548, + "loss": 3.3043, + "step": 2090 + }, + { + "epoch": 0.61, + "learning_rate": 0.0008181477055163567, + "loss": 3.3465, + "step": 2091 + }, + { + "epoch": 0.61, + "learning_rate": 0.0008179666992183791, + "loss": 3.3567, + "step": 2092 + }, + { + "epoch": 0.61, + "learning_rate": 0.0008177856229280659, + "loss": 3.198, + "step": 2093 + }, + { + "epoch": 0.61, + "learning_rate": 0.0008176044766852766, + "loss": 3.3882, + "step": 2094 + }, + { + "epoch": 0.61, + "learning_rate": 0.0008174232605298856, + "loss": 3.3909, + "step": 2095 + }, + { + "epoch": 0.61, + "learning_rate": 0.0008172419745017834, + "loss": 3.2309, + "step": 2096 + }, + { + "epoch": 0.61, + "learning_rate": 0.0008170606186408755, + "loss": 3.2033, + "step": 2097 + }, + { + "epoch": 0.61, + "learning_rate": 0.0008168791929870824, + "loss": 3.373, + "step": 2098 + }, + { + "epoch": 0.61, + "learning_rate": 0.0008166976975803411, + "loss": 3.3108, + "step": 2099 + }, + { + "epoch": 0.61, + "learning_rate": 0.0008165161324606026, + "loss": 3.3767, + "step": 2100 + }, + { + "epoch": 0.61, + "learning_rate": 0.0008163344976678342, + "loss": 3.291, + "step": 2101 + }, + { + "epoch": 0.61, + "learning_rate": 0.0008161527932420181, + "loss": 3.2666, + "step": 2102 + }, + { + "epoch": 0.61, + "learning_rate": 0.000815971019223152, + "loss": 3.33, + "step": 2103 + }, + { + "epoch": 0.61, + "learning_rate": 0.0008157891756512488, + "loss": 3.4123, + "step": 2104 + }, + { + "epoch": 0.61, + "learning_rate": 0.0008156072625663369, + "loss": 3.3861, + "step": 2105 + }, + { + "epoch": 0.61, + "learning_rate": 0.0008154252800084595, + "loss": 3.3821, + "step": 2106 + }, + { + "epoch": 0.61, + "learning_rate": 0.0008152432280176758, + "loss": 3.3206, + "step": 2107 + }, + { + "epoch": 0.61, + "learning_rate": 0.0008150611066340599, + "loss": 3.4567, + "step": 2108 + }, + { + "epoch": 0.61, + "learning_rate": 0.0008148789158977012, + "loss": 3.2937, + "step": 2109 + }, + { + "epoch": 0.61, + "learning_rate": 0.0008146966558487043, + "loss": 3.3507, + "step": 2110 + }, + { + "epoch": 0.61, + "learning_rate": 0.0008145143265271894, + "loss": 3.2633, + "step": 2111 + }, + { + "epoch": 0.61, + "learning_rate": 0.0008143319279732914, + "loss": 3.2777, + "step": 2112 + }, + { + "epoch": 0.61, + "learning_rate": 0.0008141494602271609, + "loss": 3.3884, + "step": 2113 + }, + { + "epoch": 0.61, + "learning_rate": 0.0008139669233289634, + "loss": 3.2869, + "step": 2114 + }, + { + "epoch": 0.61, + "learning_rate": 0.00081378431731888, + "loss": 3.3211, + "step": 2115 + }, + { + "epoch": 0.61, + "learning_rate": 0.0008136016422371065, + "loss": 3.3305, + "step": 2116 + }, + { + "epoch": 0.61, + "learning_rate": 0.0008134188981238546, + "loss": 3.3877, + "step": 2117 + }, + { + "epoch": 0.61, + "learning_rate": 0.0008132360850193506, + "loss": 3.2927, + "step": 2118 + }, + { + "epoch": 0.61, + "learning_rate": 0.0008130532029638361, + "loss": 3.3749, + "step": 2119 + }, + { + "epoch": 0.61, + "learning_rate": 0.0008128702519975679, + "loss": 3.1715, + "step": 2120 + }, + { + "epoch": 0.61, + "learning_rate": 0.0008126872321608184, + "loss": 3.2586, + "step": 2121 + }, + { + "epoch": 0.61, + "learning_rate": 0.0008125041434938746, + "loss": 3.412, + "step": 2122 + }, + { + "epoch": 0.61, + "learning_rate": 0.0008123209860370388, + "loss": 3.3046, + "step": 2123 + }, + { + "epoch": 0.62, + "learning_rate": 0.0008121377598306283, + "loss": 3.3415, + "step": 2124 + }, + { + "epoch": 0.62, + "learning_rate": 0.0008119544649149761, + "loss": 3.2916, + "step": 2125 + }, + { + "epoch": 0.62, + "learning_rate": 0.0008117711013304298, + "loss": 3.4712, + "step": 2126 + }, + { + "epoch": 0.62, + "learning_rate": 0.0008115876691173522, + "loss": 3.3304, + "step": 2127 + }, + { + "epoch": 0.62, + "learning_rate": 0.0008114041683161213, + "loss": 3.2743, + "step": 2128 + }, + { + "epoch": 0.62, + "learning_rate": 0.0008112205989671302, + "loss": 3.3253, + "step": 2129 + }, + { + "epoch": 0.62, + "learning_rate": 0.0008110369611107869, + "loss": 3.3865, + "step": 2130 + }, + { + "epoch": 0.62, + "learning_rate": 0.0008108532547875149, + "loss": 3.2456, + "step": 2131 + }, + { + "epoch": 0.62, + "learning_rate": 0.0008106694800377523, + "loss": 3.1915, + "step": 2132 + }, + { + "epoch": 0.62, + "learning_rate": 0.0008104856369019524, + "loss": 3.2667, + "step": 2133 + }, + { + "epoch": 0.62, + "learning_rate": 0.0008103017254205839, + "loss": 3.2965, + "step": 2134 + }, + { + "epoch": 0.62, + "learning_rate": 0.00081011774563413, + "loss": 3.3934, + "step": 2135 + }, + { + "epoch": 0.62, + "learning_rate": 0.0008099336975830896, + "loss": 3.317, + "step": 2136 + }, + { + "epoch": 0.62, + "learning_rate": 0.0008097495813079756, + "loss": 3.3609, + "step": 2137 + }, + { + "epoch": 0.62, + "learning_rate": 0.000809565396849317, + "loss": 3.2618, + "step": 2138 + }, + { + "epoch": 0.62, + "learning_rate": 0.0008093811442476573, + "loss": 3.2941, + "step": 2139 + }, + { + "epoch": 0.62, + "learning_rate": 0.0008091968235435551, + "loss": 3.4445, + "step": 2140 + }, + { + "epoch": 0.62, + "learning_rate": 0.0008090124347775836, + "loss": 3.182, + "step": 2141 + }, + { + "epoch": 0.62, + "learning_rate": 0.0008088279779903318, + "loss": 3.4259, + "step": 2142 + }, + { + "epoch": 0.62, + "learning_rate": 0.0008086434532224028, + "loss": 3.3834, + "step": 2143 + }, + { + "epoch": 0.62, + "learning_rate": 0.0008084588605144154, + "loss": 3.3397, + "step": 2144 + }, + { + "epoch": 0.62, + "learning_rate": 0.0008082741999070029, + "loss": 3.2279, + "step": 2145 + }, + { + "epoch": 0.62, + "learning_rate": 0.0008080894714408135, + "loss": 3.2908, + "step": 2146 + }, + { + "epoch": 0.62, + "learning_rate": 0.0008079046751565107, + "loss": 3.4242, + "step": 2147 + }, + { + "epoch": 0.62, + "learning_rate": 0.0008077198110947725, + "loss": 3.3703, + "step": 2148 + }, + { + "epoch": 0.62, + "learning_rate": 0.0008075348792962924, + "loss": 3.381, + "step": 2149 + }, + { + "epoch": 0.62, + "learning_rate": 0.0008073498798017785, + "loss": 3.254, + "step": 2150 + }, + { + "epoch": 0.62, + "learning_rate": 0.0008071648126519532, + "loss": 3.4739, + "step": 2151 + }, + { + "epoch": 0.62, + "learning_rate": 0.0008069796778875548, + "loss": 3.3563, + "step": 2152 + }, + { + "epoch": 0.62, + "learning_rate": 0.0008067944755493359, + "loss": 3.2915, + "step": 2153 + }, + { + "epoch": 0.62, + "learning_rate": 0.0008066092056780643, + "loss": 3.2568, + "step": 2154 + }, + { + "epoch": 0.62, + "learning_rate": 0.0008064238683145223, + "loss": 3.3293, + "step": 2155 + }, + { + "epoch": 0.62, + "learning_rate": 0.0008062384634995072, + "loss": 3.2936, + "step": 2156 + }, + { + "epoch": 0.62, + "learning_rate": 0.0008060529912738315, + "loss": 3.2912, + "step": 2157 + }, + { + "epoch": 0.62, + "learning_rate": 0.0008058674516783217, + "loss": 3.3809, + "step": 2158 + }, + { + "epoch": 0.63, + "learning_rate": 0.0008056818447538202, + "loss": 3.3233, + "step": 2159 + }, + { + "epoch": 0.63, + "learning_rate": 0.0008054961705411834, + "loss": 3.3607, + "step": 2160 + }, + { + "epoch": 0.63, + "learning_rate": 0.0008053104290812827, + "loss": 3.3175, + "step": 2161 + }, + { + "epoch": 0.63, + "learning_rate": 0.0008051246204150046, + "loss": 3.3384, + "step": 2162 + }, + { + "epoch": 0.63, + "learning_rate": 0.0008049387445832499, + "loss": 3.3329, + "step": 2163 + }, + { + "epoch": 0.63, + "learning_rate": 0.0008047528016269349, + "loss": 3.2286, + "step": 2164 + }, + { + "epoch": 0.63, + "learning_rate": 0.0008045667915869899, + "loss": 3.4024, + "step": 2165 + }, + { + "epoch": 0.63, + "learning_rate": 0.0008043807145043603, + "loss": 3.2363, + "step": 2166 + }, + { + "epoch": 0.63, + "learning_rate": 0.0008041945704200064, + "loss": 3.3358, + "step": 2167 + }, + { + "epoch": 0.63, + "learning_rate": 0.0008040083593749034, + "loss": 3.3777, + "step": 2168 + }, + { + "epoch": 0.63, + "learning_rate": 0.0008038220814100403, + "loss": 3.4601, + "step": 2169 + }, + { + "epoch": 0.63, + "learning_rate": 0.0008036357365664219, + "loss": 3.2846, + "step": 2170 + }, + { + "epoch": 0.63, + "learning_rate": 0.0008034493248850674, + "loss": 3.2696, + "step": 2171 + }, + { + "epoch": 0.63, + "learning_rate": 0.0008032628464070105, + "loss": 3.3914, + "step": 2172 + }, + { + "epoch": 0.63, + "learning_rate": 0.0008030763011732995, + "loss": 3.1885, + "step": 2173 + }, + { + "epoch": 0.63, + "learning_rate": 0.0008028896892249979, + "loss": 3.3052, + "step": 2174 + }, + { + "epoch": 0.63, + "learning_rate": 0.0008027030106031835, + "loss": 3.2597, + "step": 2175 + }, + { + "epoch": 0.63, + "learning_rate": 0.0008025162653489491, + "loss": 3.3002, + "step": 2176 + }, + { + "epoch": 0.63, + "learning_rate": 0.0008023294535034016, + "loss": 3.2688, + "step": 2177 + }, + { + "epoch": 0.63, + "learning_rate": 0.0008021425751076629, + "loss": 3.312, + "step": 2178 + }, + { + "epoch": 0.63, + "learning_rate": 0.00080195563020287, + "loss": 3.3452, + "step": 2179 + }, + { + "epoch": 0.63, + "learning_rate": 0.0008017686188301737, + "loss": 3.4421, + "step": 2180 + }, + { + "epoch": 0.63, + "learning_rate": 0.0008015815410307398, + "loss": 3.3151, + "step": 2181 + }, + { + "epoch": 0.63, + "learning_rate": 0.0008013943968457491, + "loss": 3.3593, + "step": 2182 + }, + { + "epoch": 0.63, + "learning_rate": 0.0008012071863163964, + "loss": 3.3991, + "step": 2183 + }, + { + "epoch": 0.63, + "learning_rate": 0.0008010199094838914, + "loss": 3.4062, + "step": 2184 + }, + { + "epoch": 0.63, + "learning_rate": 0.0008008325663894586, + "loss": 3.3301, + "step": 2185 + }, + { + "epoch": 0.63, + "learning_rate": 0.0008006451570743365, + "loss": 3.2703, + "step": 2186 + }, + { + "epoch": 0.63, + "learning_rate": 0.0008004576815797786, + "loss": 3.386, + "step": 2187 + }, + { + "epoch": 0.63, + "learning_rate": 0.0008002701399470531, + "loss": 3.364, + "step": 2188 + }, + { + "epoch": 0.63, + "learning_rate": 0.0008000825322174423, + "loss": 3.3026, + "step": 2189 + }, + { + "epoch": 0.63, + "learning_rate": 0.0007998948584322434, + "loss": 3.3486, + "step": 2190 + }, + { + "epoch": 0.63, + "learning_rate": 0.0007997071186327682, + "loss": 3.2615, + "step": 2191 + }, + { + "epoch": 0.63, + "learning_rate": 0.0007995193128603426, + "loss": 3.3764, + "step": 2192 + }, + { + "epoch": 0.64, + "learning_rate": 0.0007993314411563075, + "loss": 3.3229, + "step": 2193 + }, + { + "epoch": 0.64, + "learning_rate": 0.000799143503562018, + "loss": 3.3121, + "step": 2194 + }, + { + "epoch": 0.64, + "learning_rate": 0.000798955500118844, + "loss": 3.1774, + "step": 2195 + }, + { + "epoch": 0.64, + "learning_rate": 0.0007987674308681694, + "loss": 3.3891, + "step": 2196 + }, + { + "epoch": 0.64, + "learning_rate": 0.0007985792958513931, + "loss": 3.3273, + "step": 2197 + }, + { + "epoch": 0.64, + "learning_rate": 0.0007983910951099283, + "loss": 3.2368, + "step": 2198 + }, + { + "epoch": 0.64, + "learning_rate": 0.0007982028286852024, + "loss": 3.3407, + "step": 2199 + }, + { + "epoch": 0.64, + "learning_rate": 0.0007980144966186578, + "loss": 3.2784, + "step": 2200 + }, + { + "epoch": 0.64, + "learning_rate": 0.0007978260989517508, + "loss": 3.1925, + "step": 2201 + }, + { + "epoch": 0.64, + "learning_rate": 0.0007976376357259526, + "loss": 3.1958, + "step": 2202 + }, + { + "epoch": 0.64, + "learning_rate": 0.0007974491069827484, + "loss": 3.3584, + "step": 2203 + }, + { + "epoch": 0.64, + "learning_rate": 0.0007972605127636383, + "loss": 3.3274, + "step": 2204 + }, + { + "epoch": 0.64, + "learning_rate": 0.0007970718531101365, + "loss": 3.3039, + "step": 2205 + }, + { + "epoch": 0.64, + "learning_rate": 0.0007968831280637713, + "loss": 3.3252, + "step": 2206 + }, + { + "epoch": 0.64, + "learning_rate": 0.000796694337666086, + "loss": 3.2968, + "step": 2207 + }, + { + "epoch": 0.64, + "learning_rate": 0.0007965054819586381, + "loss": 3.36, + "step": 2208 + }, + { + "epoch": 0.64, + "learning_rate": 0.0007963165609829995, + "loss": 3.3232, + "step": 2209 + }, + { + "epoch": 0.64, + "learning_rate": 0.0007961275747807562, + "loss": 3.4068, + "step": 2210 + }, + { + "epoch": 0.64, + "learning_rate": 0.0007959385233935086, + "loss": 3.3558, + "step": 2211 + }, + { + "epoch": 0.64, + "learning_rate": 0.0007957494068628718, + "loss": 3.4937, + "step": 2212 + }, + { + "epoch": 0.64, + "learning_rate": 0.0007955602252304752, + "loss": 3.187, + "step": 2213 + }, + { + "epoch": 0.64, + "learning_rate": 0.0007953709785379621, + "loss": 3.4378, + "step": 2214 + }, + { + "epoch": 0.64, + "learning_rate": 0.0007951816668269904, + "loss": 3.3885, + "step": 2215 + }, + { + "epoch": 0.64, + "learning_rate": 0.0007949922901392322, + "loss": 3.2606, + "step": 2216 + }, + { + "epoch": 0.64, + "learning_rate": 0.0007948028485163743, + "loss": 3.4462, + "step": 2217 + }, + { + "epoch": 0.64, + "learning_rate": 0.0007946133420001171, + "loss": 3.2161, + "step": 2218 + }, + { + "epoch": 0.64, + "learning_rate": 0.000794423770632176, + "loss": 3.2039, + "step": 2219 + }, + { + "epoch": 0.64, + "learning_rate": 0.0007942341344542801, + "loss": 3.4182, + "step": 2220 + }, + { + "epoch": 0.64, + "learning_rate": 0.0007940444335081732, + "loss": 3.2639, + "step": 2221 + }, + { + "epoch": 0.64, + "learning_rate": 0.0007938546678356131, + "loss": 3.4901, + "step": 2222 + }, + { + "epoch": 0.64, + "learning_rate": 0.000793664837478372, + "loss": 3.2681, + "step": 2223 + }, + { + "epoch": 0.64, + "learning_rate": 0.0007934749424782361, + "loss": 3.2701, + "step": 2224 + }, + { + "epoch": 0.64, + "learning_rate": 0.0007932849828770061, + "loss": 3.347, + "step": 2225 + }, + { + "epoch": 0.64, + "learning_rate": 0.0007930949587164968, + "loss": 3.3469, + "step": 2226 + }, + { + "epoch": 0.64, + "learning_rate": 0.0007929048700385371, + "loss": 3.3277, + "step": 2227 + }, + { + "epoch": 0.65, + "learning_rate": 0.0007927147168849704, + "loss": 3.4065, + "step": 2228 + }, + { + "epoch": 0.65, + "learning_rate": 0.0007925244992976537, + "loss": 3.4147, + "step": 2229 + }, + { + "epoch": 0.65, + "learning_rate": 0.0007923342173184594, + "loss": 3.2844, + "step": 2230 + }, + { + "epoch": 0.65, + "learning_rate": 0.0007921438709892724, + "loss": 3.2478, + "step": 2231 + }, + { + "epoch": 0.65, + "learning_rate": 0.0007919534603519934, + "loss": 3.3189, + "step": 2232 + }, + { + "epoch": 0.65, + "learning_rate": 0.0007917629854485357, + "loss": 3.1947, + "step": 2233 + }, + { + "epoch": 0.65, + "learning_rate": 0.0007915724463208282, + "loss": 3.4606, + "step": 2234 + }, + { + "epoch": 0.65, + "learning_rate": 0.000791381843010813, + "loss": 3.4158, + "step": 2235 + }, + { + "epoch": 0.65, + "learning_rate": 0.0007911911755604466, + "loss": 3.365, + "step": 2236 + }, + { + "epoch": 0.65, + "learning_rate": 0.0007910004440116996, + "loss": 3.3101, + "step": 2237 + }, + { + "epoch": 0.65, + "learning_rate": 0.0007908096484065569, + "loss": 3.3731, + "step": 2238 + }, + { + "epoch": 0.65, + "learning_rate": 0.0007906187887870172, + "loss": 3.4101, + "step": 2239 + }, + { + "epoch": 0.65, + "learning_rate": 0.0007904278651950933, + "loss": 3.3002, + "step": 2240 + }, + { + "epoch": 0.65, + "learning_rate": 0.0007902368776728125, + "loss": 3.4901, + "step": 2241 + }, + { + "epoch": 0.65, + "learning_rate": 0.0007900458262622156, + "loss": 3.2595, + "step": 2242 + }, + { + "epoch": 0.65, + "learning_rate": 0.0007898547110053581, + "loss": 3.2746, + "step": 2243 + }, + { + "epoch": 0.65, + "learning_rate": 0.0007896635319443087, + "loss": 3.2214, + "step": 2244 + }, + { + "epoch": 0.65, + "learning_rate": 0.000789472289121151, + "loss": 3.3826, + "step": 2245 + }, + { + "epoch": 0.65, + "learning_rate": 0.0007892809825779821, + "loss": 3.2913, + "step": 2246 + }, + { + "epoch": 0.65, + "learning_rate": 0.0007890896123569135, + "loss": 3.2191, + "step": 2247 + }, + { + "epoch": 0.65, + "learning_rate": 0.0007888981785000704, + "loss": 3.3184, + "step": 2248 + }, + { + "epoch": 0.65, + "learning_rate": 0.000788706681049592, + "loss": 3.2472, + "step": 2249 + }, + { + "epoch": 0.65, + "learning_rate": 0.0007885151200476319, + "loss": 3.3683, + "step": 2250 + }, + { + "epoch": 0.65, + "learning_rate": 0.0007883234955363572, + "loss": 3.3096, + "step": 2251 + }, + { + "epoch": 0.65, + "learning_rate": 0.0007881318075579492, + "loss": 3.3168, + "step": 2252 + }, + { + "epoch": 0.65, + "learning_rate": 0.0007879400561546033, + "loss": 3.4886, + "step": 2253 + }, + { + "epoch": 0.65, + "learning_rate": 0.0007877482413685286, + "loss": 3.2121, + "step": 2254 + }, + { + "epoch": 0.65, + "learning_rate": 0.0007875563632419484, + "loss": 3.336, + "step": 2255 + }, + { + "epoch": 0.65, + "learning_rate": 0.0007873644218170996, + "loss": 3.3045, + "step": 2256 + }, + { + "epoch": 0.65, + "learning_rate": 0.0007871724171362336, + "loss": 3.3418, + "step": 2257 + }, + { + "epoch": 0.65, + "learning_rate": 0.000786980349241615, + "loss": 3.166, + "step": 2258 + }, + { + "epoch": 0.65, + "learning_rate": 0.0007867882181755231, + "loss": 3.3934, + "step": 2259 + }, + { + "epoch": 0.65, + "learning_rate": 0.0007865960239802503, + "loss": 3.4106, + "step": 2260 + }, + { + "epoch": 0.65, + "learning_rate": 0.0007864037666981036, + "loss": 3.3376, + "step": 2261 + }, + { + "epoch": 0.66, + "learning_rate": 0.0007862114463714034, + "loss": 3.2463, + "step": 2262 + }, + { + "epoch": 0.66, + "learning_rate": 0.0007860190630424842, + "loss": 3.2966, + "step": 2263 + }, + { + "epoch": 0.66, + "learning_rate": 0.0007858266167536944, + "loss": 3.286, + "step": 2264 + }, + { + "epoch": 0.66, + "learning_rate": 0.0007856341075473961, + "loss": 3.3309, + "step": 2265 + }, + { + "epoch": 0.66, + "learning_rate": 0.0007854415354659654, + "loss": 3.3617, + "step": 2266 + }, + { + "epoch": 0.66, + "learning_rate": 0.0007852489005517922, + "loss": 3.3939, + "step": 2267 + }, + { + "epoch": 0.66, + "learning_rate": 0.0007850562028472801, + "loss": 3.4517, + "step": 2268 + }, + { + "epoch": 0.66, + "learning_rate": 0.0007848634423948469, + "loss": 3.3077, + "step": 2269 + }, + { + "epoch": 0.66, + "learning_rate": 0.0007846706192369235, + "loss": 3.3066, + "step": 2270 + }, + { + "epoch": 0.66, + "learning_rate": 0.0007844777334159554, + "loss": 3.3159, + "step": 2271 + }, + { + "epoch": 0.66, + "learning_rate": 0.0007842847849744013, + "loss": 3.3739, + "step": 2272 + }, + { + "epoch": 0.66, + "learning_rate": 0.0007840917739547342, + "loss": 3.2778, + "step": 2273 + }, + { + "epoch": 0.66, + "learning_rate": 0.0007838987003994405, + "loss": 3.4168, + "step": 2274 + }, + { + "epoch": 0.66, + "learning_rate": 0.0007837055643510202, + "loss": 3.3508, + "step": 2275 + }, + { + "epoch": 0.66, + "learning_rate": 0.0007835123658519878, + "loss": 3.3684, + "step": 2276 + }, + { + "epoch": 0.66, + "learning_rate": 0.0007833191049448706, + "loss": 3.3352, + "step": 2277 + }, + { + "epoch": 0.66, + "learning_rate": 0.0007831257816722104, + "loss": 3.294, + "step": 2278 + }, + { + "epoch": 0.66, + "learning_rate": 0.0007829323960765625, + "loss": 3.2529, + "step": 2279 + }, + { + "epoch": 0.66, + "learning_rate": 0.0007827389482004954, + "loss": 3.3472, + "step": 2280 + }, + { + "epoch": 0.66, + "learning_rate": 0.0007825454380865922, + "loss": 3.3397, + "step": 2281 + }, + { + "epoch": 0.66, + "learning_rate": 0.0007823518657774492, + "loss": 3.2644, + "step": 2282 + }, + { + "epoch": 0.66, + "learning_rate": 0.0007821582313156763, + "loss": 3.406, + "step": 2283 + }, + { + "epoch": 0.66, + "learning_rate": 0.0007819645347438973, + "loss": 3.283, + "step": 2284 + }, + { + "epoch": 0.66, + "learning_rate": 0.0007817707761047497, + "loss": 3.4015, + "step": 2285 + }, + { + "epoch": 0.66, + "learning_rate": 0.0007815769554408845, + "loss": 3.3053, + "step": 2286 + }, + { + "epoch": 0.66, + "learning_rate": 0.0007813830727949663, + "loss": 3.3879, + "step": 2287 + }, + { + "epoch": 0.66, + "learning_rate": 0.0007811891282096737, + "loss": 3.2801, + "step": 2288 + }, + { + "epoch": 0.66, + "learning_rate": 0.0007809951217276985, + "loss": 3.2409, + "step": 2289 + }, + { + "epoch": 0.66, + "learning_rate": 0.0007808010533917464, + "loss": 3.3723, + "step": 2290 + }, + { + "epoch": 0.66, + "learning_rate": 0.0007806069232445368, + "loss": 3.3152, + "step": 2291 + }, + { + "epoch": 0.66, + "learning_rate": 0.0007804127313288023, + "loss": 3.4493, + "step": 2292 + }, + { + "epoch": 0.66, + "learning_rate": 0.0007802184776872894, + "loss": 3.2221, + "step": 2293 + }, + { + "epoch": 0.66, + "learning_rate": 0.0007800241623627582, + "loss": 3.2853, + "step": 2294 + }, + { + "epoch": 0.66, + "learning_rate": 0.0007798297853979823, + "loss": 3.2532, + "step": 2295 + }, + { + "epoch": 0.66, + "learning_rate": 0.0007796353468357489, + "loss": 3.2091, + "step": 2296 + }, + { + "epoch": 0.67, + "learning_rate": 0.0007794408467188586, + "loss": 3.3899, + "step": 2297 + }, + { + "epoch": 0.67, + "learning_rate": 0.0007792462850901258, + "loss": 3.3981, + "step": 2298 + }, + { + "epoch": 0.67, + "learning_rate": 0.0007790516619923783, + "loss": 3.2673, + "step": 2299 + }, + { + "epoch": 0.67, + "learning_rate": 0.0007788569774684575, + "loss": 3.3157, + "step": 2300 + }, + { + "epoch": 0.67, + "learning_rate": 0.0007786622315612182, + "loss": 3.3404, + "step": 2301 + }, + { + "epoch": 0.67, + "learning_rate": 0.0007784674243135289, + "loss": 3.4063, + "step": 2302 + }, + { + "epoch": 0.67, + "learning_rate": 0.0007782725557682713, + "loss": 3.3032, + "step": 2303 + }, + { + "epoch": 0.67, + "learning_rate": 0.0007780776259683411, + "loss": 3.3349, + "step": 2304 + }, + { + "epoch": 0.67, + "learning_rate": 0.000777882634956647, + "loss": 3.1847, + "step": 2305 + }, + { + "epoch": 0.67, + "learning_rate": 0.0007776875827761113, + "loss": 3.3071, + "step": 2306 + }, + { + "epoch": 0.67, + "learning_rate": 0.0007774924694696698, + "loss": 3.5699, + "step": 2307 + }, + { + "epoch": 0.67, + "learning_rate": 0.0007772972950802718, + "loss": 3.2079, + "step": 2308 + }, + { + "epoch": 0.67, + "learning_rate": 0.0007771020596508799, + "loss": 3.3501, + "step": 2309 + }, + { + "epoch": 0.67, + "learning_rate": 0.0007769067632244707, + "loss": 3.2777, + "step": 2310 + }, + { + "epoch": 0.67, + "learning_rate": 0.0007767114058440332, + "loss": 3.4477, + "step": 2311 + }, + { + "epoch": 0.67, + "learning_rate": 0.0007765159875525706, + "loss": 3.2204, + "step": 2312 + }, + { + "epoch": 0.67, + "learning_rate": 0.0007763205083930995, + "loss": 3.264, + "step": 2313 + }, + { + "epoch": 0.67, + "learning_rate": 0.0007761249684086493, + "loss": 3.3216, + "step": 2314 + }, + { + "epoch": 0.67, + "learning_rate": 0.0007759293676422636, + "loss": 3.3178, + "step": 2315 + }, + { + "epoch": 0.67, + "learning_rate": 0.0007757337061369989, + "loss": 3.2545, + "step": 2316 + }, + { + "epoch": 0.67, + "learning_rate": 0.0007755379839359248, + "loss": 3.3306, + "step": 2317 + }, + { + "epoch": 0.67, + "learning_rate": 0.0007753422010821248, + "loss": 3.3021, + "step": 2318 + }, + { + "epoch": 0.67, + "learning_rate": 0.0007751463576186957, + "loss": 3.2546, + "step": 2319 + }, + { + "epoch": 0.67, + "learning_rate": 0.0007749504535887472, + "loss": 3.3511, + "step": 2320 + }, + { + "epoch": 0.67, + "learning_rate": 0.000774754489035403, + "loss": 3.1737, + "step": 2321 + }, + { + "epoch": 0.67, + "learning_rate": 0.0007745584640017995, + "loss": 3.267, + "step": 2322 + }, + { + "epoch": 0.67, + "learning_rate": 0.0007743623785310866, + "loss": 3.2648, + "step": 2323 + }, + { + "epoch": 0.67, + "learning_rate": 0.0007741662326664278, + "loss": 3.3597, + "step": 2324 + }, + { + "epoch": 0.67, + "learning_rate": 0.0007739700264509992, + "loss": 3.3107, + "step": 2325 + }, + { + "epoch": 0.67, + "learning_rate": 0.0007737737599279913, + "loss": 3.3417, + "step": 2326 + }, + { + "epoch": 0.67, + "learning_rate": 0.0007735774331406066, + "loss": 3.3739, + "step": 2327 + }, + { + "epoch": 0.67, + "learning_rate": 0.0007733810461320619, + "loss": 3.3832, + "step": 2328 + }, + { + "epoch": 0.67, + "learning_rate": 0.0007731845989455866, + "loss": 3.3474, + "step": 2329 + }, + { + "epoch": 0.67, + "learning_rate": 0.0007729880916244236, + "loss": 3.3403, + "step": 2330 + }, + { + "epoch": 0.68, + "learning_rate": 0.0007727915242118292, + "loss": 3.3854, + "step": 2331 + }, + { + "epoch": 0.68, + "learning_rate": 0.0007725948967510726, + "loss": 3.2718, + "step": 2332 + }, + { + "epoch": 0.68, + "learning_rate": 0.0007723982092854365, + "loss": 3.3024, + "step": 2333 + }, + { + "epoch": 0.68, + "learning_rate": 0.0007722014618582166, + "loss": 3.3894, + "step": 2334 + }, + { + "epoch": 0.68, + "learning_rate": 0.0007720046545127218, + "loss": 3.336, + "step": 2335 + }, + { + "epoch": 0.68, + "learning_rate": 0.0007718077872922742, + "loss": 3.3541, + "step": 2336 + }, + { + "epoch": 0.68, + "learning_rate": 0.0007716108602402094, + "loss": 3.2757, + "step": 2337 + }, + { + "epoch": 0.68, + "learning_rate": 0.0007714138733998758, + "loss": 3.3654, + "step": 2338 + }, + { + "epoch": 0.68, + "learning_rate": 0.0007712168268146351, + "loss": 3.2871, + "step": 2339 + }, + { + "epoch": 0.68, + "learning_rate": 0.0007710197205278619, + "loss": 3.2296, + "step": 2340 + }, + { + "epoch": 0.68, + "learning_rate": 0.0007708225545829446, + "loss": 3.2833, + "step": 2341 + }, + { + "epoch": 0.68, + "learning_rate": 0.0007706253290232838, + "loss": 3.3375, + "step": 2342 + }, + { + "epoch": 0.68, + "learning_rate": 0.0007704280438922943, + "loss": 3.2294, + "step": 2343 + }, + { + "epoch": 0.68, + "learning_rate": 0.000770230699233403, + "loss": 3.3513, + "step": 2344 + }, + { + "epoch": 0.68, + "learning_rate": 0.0007700332950900504, + "loss": 3.2919, + "step": 2345 + }, + { + "epoch": 0.68, + "learning_rate": 0.00076983583150569, + "loss": 3.2693, + "step": 2346 + }, + { + "epoch": 0.68, + "learning_rate": 0.0007696383085237886, + "loss": 3.2395, + "step": 2347 + }, + { + "epoch": 0.68, + "learning_rate": 0.0007694407261878258, + "loss": 3.2252, + "step": 2348 + }, + { + "epoch": 0.68, + "learning_rate": 0.0007692430845412945, + "loss": 3.3333, + "step": 2349 + }, + { + "epoch": 0.68, + "learning_rate": 0.0007690453836277002, + "loss": 3.3296, + "step": 2350 + }, + { + "epoch": 0.68, + "learning_rate": 0.0007688476234905622, + "loss": 3.3576, + "step": 2351 + }, + { + "epoch": 0.68, + "learning_rate": 0.000768649804173412, + "loss": 3.3986, + "step": 2352 + }, + { + "epoch": 0.68, + "learning_rate": 0.0007684519257197946, + "loss": 3.2361, + "step": 2353 + }, + { + "epoch": 0.68, + "learning_rate": 0.0007682539881732681, + "loss": 3.4595, + "step": 2354 + }, + { + "epoch": 0.68, + "learning_rate": 0.0007680559915774033, + "loss": 3.2328, + "step": 2355 + }, + { + "epoch": 0.68, + "learning_rate": 0.0007678579359757842, + "loss": 3.2864, + "step": 2356 + }, + { + "epoch": 0.68, + "learning_rate": 0.0007676598214120078, + "loss": 3.3988, + "step": 2357 + }, + { + "epoch": 0.68, + "learning_rate": 0.0007674616479296841, + "loss": 3.2249, + "step": 2358 + }, + { + "epoch": 0.68, + "learning_rate": 0.0007672634155724356, + "loss": 3.3609, + "step": 2359 + }, + { + "epoch": 0.68, + "learning_rate": 0.0007670651243838986, + "loss": 3.3224, + "step": 2360 + }, + { + "epoch": 0.68, + "learning_rate": 0.0007668667744077216, + "loss": 3.2143, + "step": 2361 + }, + { + "epoch": 0.68, + "learning_rate": 0.0007666683656875664, + "loss": 3.2994, + "step": 2362 + }, + { + "epoch": 0.68, + "learning_rate": 0.0007664698982671078, + "loss": 3.3185, + "step": 2363 + }, + { + "epoch": 0.68, + "learning_rate": 0.0007662713721900331, + "loss": 3.1976, + "step": 2364 + }, + { + "epoch": 0.68, + "learning_rate": 0.0007660727875000431, + "loss": 3.2574, + "step": 2365 + }, + { + "epoch": 0.69, + "learning_rate": 0.000765874144240851, + "loss": 3.2942, + "step": 2366 + }, + { + "epoch": 0.69, + "learning_rate": 0.0007656754424561834, + "loss": 3.3466, + "step": 2367 + }, + { + "epoch": 0.69, + "learning_rate": 0.000765476682189779, + "loss": 3.2743, + "step": 2368 + }, + { + "epoch": 0.69, + "learning_rate": 0.0007652778634853903, + "loss": 3.3277, + "step": 2369 + }, + { + "epoch": 0.69, + "learning_rate": 0.000765078986386782, + "loss": 3.2839, + "step": 2370 + }, + { + "epoch": 0.69, + "learning_rate": 0.0007648800509377318, + "loss": 3.1942, + "step": 2371 + }, + { + "epoch": 0.69, + "learning_rate": 0.0007646810571820304, + "loss": 3.2388, + "step": 2372 + }, + { + "epoch": 0.69, + "learning_rate": 0.0007644820051634812, + "loss": 3.426, + "step": 2373 + }, + { + "epoch": 0.69, + "learning_rate": 0.0007642828949259007, + "loss": 3.3784, + "step": 2374 + }, + { + "epoch": 0.69, + "learning_rate": 0.0007640837265131177, + "loss": 3.3261, + "step": 2375 + }, + { + "epoch": 0.69, + "learning_rate": 0.0007638844999689743, + "loss": 3.307, + "step": 2376 + }, + { + "epoch": 0.69, + "learning_rate": 0.0007636852153373251, + "loss": 3.2962, + "step": 2377 + }, + { + "epoch": 0.69, + "learning_rate": 0.0007634858726620373, + "loss": 3.2425, + "step": 2378 + }, + { + "epoch": 0.69, + "learning_rate": 0.0007632864719869917, + "loss": 3.3222, + "step": 2379 + }, + { + "epoch": 0.69, + "learning_rate": 0.000763087013356081, + "loss": 3.185, + "step": 2380 + }, + { + "epoch": 0.69, + "learning_rate": 0.0007628874968132111, + "loss": 3.3223, + "step": 2381 + }, + { + "epoch": 0.69, + "learning_rate": 0.0007626879224023002, + "loss": 3.2877, + "step": 2382 + }, + { + "epoch": 0.69, + "learning_rate": 0.00076248829016728, + "loss": 3.3245, + "step": 2383 + }, + { + "epoch": 0.69, + "learning_rate": 0.0007622886001520944, + "loss": 3.2963, + "step": 2384 + }, + { + "epoch": 0.69, + "learning_rate": 0.0007620888524006999, + "loss": 3.1869, + "step": 2385 + }, + { + "epoch": 0.69, + "learning_rate": 0.0007618890469570661, + "loss": 3.2662, + "step": 2386 + }, + { + "epoch": 0.69, + "learning_rate": 0.0007616891838651749, + "loss": 3.2476, + "step": 2387 + }, + { + "epoch": 0.69, + "learning_rate": 0.0007614892631690216, + "loss": 3.228, + "step": 2388 + }, + { + "epoch": 0.69, + "learning_rate": 0.0007612892849126132, + "loss": 3.2759, + "step": 2389 + }, + { + "epoch": 0.69, + "learning_rate": 0.0007610892491399701, + "loss": 3.3322, + "step": 2390 + }, + { + "epoch": 0.69, + "learning_rate": 0.0007608891558951249, + "loss": 3.2079, + "step": 2391 + }, + { + "epoch": 0.69, + "learning_rate": 0.0007606890052221232, + "loss": 3.232, + "step": 2392 + }, + { + "epoch": 0.69, + "learning_rate": 0.0007604887971650233, + "loss": 3.3401, + "step": 2393 + }, + { + "epoch": 0.69, + "learning_rate": 0.0007602885317678957, + "loss": 3.1664, + "step": 2394 + }, + { + "epoch": 0.69, + "learning_rate": 0.0007600882090748238, + "loss": 3.2771, + "step": 2395 + }, + { + "epoch": 0.69, + "learning_rate": 0.0007598878291299036, + "loss": 3.3346, + "step": 2396 + }, + { + "epoch": 0.69, + "learning_rate": 0.0007596873919772438, + "loss": 3.3885, + "step": 2397 + }, + { + "epoch": 0.69, + "learning_rate": 0.0007594868976609657, + "loss": 3.2252, + "step": 2398 + }, + { + "epoch": 0.69, + "learning_rate": 0.0007592863462252024, + "loss": 3.1511, + "step": 2399 + }, + { + "epoch": 0.7, + "learning_rate": 0.0007590857377141009, + "loss": 3.3808, + "step": 2400 + }, + { + "epoch": 0.7, + "learning_rate": 0.0007588850721718199, + "loss": 3.3095, + "step": 2401 + }, + { + "epoch": 0.7, + "learning_rate": 0.0007586843496425309, + "loss": 3.3894, + "step": 2402 + }, + { + "epoch": 0.7, + "learning_rate": 0.0007584835701704176, + "loss": 3.3223, + "step": 2403 + }, + { + "epoch": 0.7, + "learning_rate": 0.0007582827337996768, + "loss": 3.3342, + "step": 2404 + }, + { + "epoch": 0.7, + "learning_rate": 0.0007580818405745176, + "loss": 3.3465, + "step": 2405 + }, + { + "epoch": 0.7, + "learning_rate": 0.0007578808905391615, + "loss": 3.3457, + "step": 2406 + }, + { + "epoch": 0.7, + "learning_rate": 0.0007576798837378425, + "loss": 3.3582, + "step": 2407 + }, + { + "epoch": 0.7, + "learning_rate": 0.0007574788202148071, + "loss": 3.2792, + "step": 2408 + }, + { + "epoch": 0.7, + "learning_rate": 0.0007572777000143145, + "loss": 3.3602, + "step": 2409 + }, + { + "epoch": 0.7, + "learning_rate": 0.0007570765231806362, + "loss": 3.254, + "step": 2410 + }, + { + "epoch": 0.7, + "learning_rate": 0.0007568752897580562, + "loss": 3.4319, + "step": 2411 + }, + { + "epoch": 0.7, + "learning_rate": 0.0007566739997908709, + "loss": 3.3807, + "step": 2412 + }, + { + "epoch": 0.7, + "learning_rate": 0.0007564726533233892, + "loss": 3.2483, + "step": 2413 + }, + { + "epoch": 0.7, + "learning_rate": 0.0007562712503999327, + "loss": 3.4165, + "step": 2414 + }, + { + "epoch": 0.7, + "learning_rate": 0.0007560697910648347, + "loss": 3.2272, + "step": 2415 + }, + { + "epoch": 0.7, + "learning_rate": 0.0007558682753624419, + "loss": 3.3466, + "step": 2416 + }, + { + "epoch": 0.7, + "learning_rate": 0.0007556667033371123, + "loss": 3.3784, + "step": 2417 + }, + { + "epoch": 0.7, + "learning_rate": 0.0007554650750332175, + "loss": 3.165, + "step": 2418 + }, + { + "epoch": 0.7, + "learning_rate": 0.0007552633904951405, + "loss": 3.3042, + "step": 2419 + }, + { + "epoch": 0.7, + "learning_rate": 0.0007550616497672771, + "loss": 3.3435, + "step": 2420 + }, + { + "epoch": 0.7, + "learning_rate": 0.0007548598528940353, + "loss": 3.2307, + "step": 2421 + }, + { + "epoch": 0.7, + "learning_rate": 0.000754657999919836, + "loss": 3.3242, + "step": 2422 + }, + { + "epoch": 0.7, + "learning_rate": 0.0007544560908891116, + "loss": 3.255, + "step": 2423 + }, + { + "epoch": 0.7, + "learning_rate": 0.0007542541258463075, + "loss": 3.1781, + "step": 2424 + }, + { + "epoch": 0.7, + "learning_rate": 0.0007540521048358814, + "loss": 3.2379, + "step": 2425 + }, + { + "epoch": 0.7, + "learning_rate": 0.0007538500279023026, + "loss": 3.366, + "step": 2426 + }, + { + "epoch": 0.7, + "learning_rate": 0.0007536478950900537, + "loss": 3.3713, + "step": 2427 + }, + { + "epoch": 0.7, + "learning_rate": 0.0007534457064436289, + "loss": 3.3321, + "step": 2428 + }, + { + "epoch": 0.7, + "learning_rate": 0.0007532434620075349, + "loss": 3.2991, + "step": 2429 + }, + { + "epoch": 0.7, + "learning_rate": 0.0007530411618262906, + "loss": 3.3414, + "step": 2430 + }, + { + "epoch": 0.7, + "learning_rate": 0.0007528388059444278, + "loss": 3.4424, + "step": 2431 + }, + { + "epoch": 0.7, + "learning_rate": 0.0007526363944064895, + "loss": 3.3561, + "step": 2432 + }, + { + "epoch": 0.7, + "learning_rate": 0.0007524339272570316, + "loss": 3.4258, + "step": 2433 + }, + { + "epoch": 0.7, + "learning_rate": 0.0007522314045406223, + "loss": 3.1827, + "step": 2434 + }, + { + "epoch": 0.71, + "learning_rate": 0.0007520288263018418, + "loss": 3.2712, + "step": 2435 + }, + { + "epoch": 0.71, + "learning_rate": 0.0007518261925852823, + "loss": 3.2915, + "step": 2436 + }, + { + "epoch": 0.71, + "learning_rate": 0.000751623503435549, + "loss": 3.3503, + "step": 2437 + }, + { + "epoch": 0.71, + "learning_rate": 0.0007514207588972583, + "loss": 3.2194, + "step": 2438 + }, + { + "epoch": 0.71, + "learning_rate": 0.0007512179590150396, + "loss": 3.2236, + "step": 2439 + }, + { + "epoch": 0.71, + "learning_rate": 0.000751015103833534, + "loss": 3.2936, + "step": 2440 + }, + { + "epoch": 0.71, + "learning_rate": 0.000750812193397395, + "loss": 3.2713, + "step": 2441 + }, + { + "epoch": 0.71, + "learning_rate": 0.0007506092277512884, + "loss": 3.28, + "step": 2442 + }, + { + "epoch": 0.71, + "learning_rate": 0.0007504062069398918, + "loss": 3.2698, + "step": 2443 + }, + { + "epoch": 0.71, + "learning_rate": 0.0007502031310078949, + "loss": 3.378, + "step": 2444 + }, + { + "epoch": 0.71, + "learning_rate": 0.00075, + "loss": 3.3048, + "step": 2445 + }, + { + "epoch": 0.71, + "learning_rate": 0.0007497968139609213, + "loss": 3.4182, + "step": 2446 + }, + { + "epoch": 0.71, + "learning_rate": 0.0007495935729353849, + "loss": 3.3861, + "step": 2447 + }, + { + "epoch": 0.71, + "learning_rate": 0.0007493902769681292, + "loss": 3.325, + "step": 2448 + }, + { + "epoch": 0.71, + "learning_rate": 0.0007491869261039046, + "loss": 3.3921, + "step": 2449 + }, + { + "epoch": 0.71, + "learning_rate": 0.0007489835203874738, + "loss": 3.3489, + "step": 2450 + }, + { + "epoch": 0.71, + "learning_rate": 0.0007487800598636116, + "loss": 3.1312, + "step": 2451 + }, + { + "epoch": 0.71, + "learning_rate": 0.0007485765445771042, + "loss": 3.307, + "step": 2452 + }, + { + "epoch": 0.71, + "learning_rate": 0.0007483729745727508, + "loss": 3.3065, + "step": 2453 + }, + { + "epoch": 0.71, + "learning_rate": 0.0007481693498953621, + "loss": 3.4184, + "step": 2454 + }, + { + "epoch": 0.71, + "learning_rate": 0.0007479656705897607, + "loss": 3.3657, + "step": 2455 + }, + { + "epoch": 0.71, + "learning_rate": 0.0007477619367007815, + "loss": 3.3791, + "step": 2456 + }, + { + "epoch": 0.71, + "learning_rate": 0.0007475581482732716, + "loss": 3.3281, + "step": 2457 + }, + { + "epoch": 0.71, + "learning_rate": 0.00074735430535209, + "loss": 3.4043, + "step": 2458 + }, + { + "epoch": 0.71, + "learning_rate": 0.000747150407982107, + "loss": 3.3448, + "step": 2459 + }, + { + "epoch": 0.71, + "learning_rate": 0.0007469464562082059, + "loss": 3.3861, + "step": 2460 + }, + { + "epoch": 0.71, + "learning_rate": 0.0007467424500752813, + "loss": 3.1929, + "step": 2461 + }, + { + "epoch": 0.71, + "learning_rate": 0.0007465383896282403, + "loss": 3.2409, + "step": 2462 + }, + { + "epoch": 0.71, + "learning_rate": 0.0007463342749120013, + "loss": 3.2858, + "step": 2463 + }, + { + "epoch": 0.71, + "learning_rate": 0.0007461301059714953, + "loss": 3.3522, + "step": 2464 + }, + { + "epoch": 0.71, + "learning_rate": 0.0007459258828516645, + "loss": 3.2691, + "step": 2465 + }, + { + "epoch": 0.71, + "learning_rate": 0.000745721605597464, + "loss": 3.377, + "step": 2466 + }, + { + "epoch": 0.71, + "learning_rate": 0.0007455172742538599, + "loss": 3.3386, + "step": 2467 + }, + { + "epoch": 0.71, + "learning_rate": 0.0007453128888658307, + "loss": 3.3165, + "step": 2468 + }, + { + "epoch": 0.72, + "learning_rate": 0.0007451084494783667, + "loss": 3.207, + "step": 2469 + }, + { + "epoch": 0.72, + "learning_rate": 0.0007449039561364701, + "loss": 3.395, + "step": 2470 + }, + { + "epoch": 0.72, + "learning_rate": 0.0007446994088851548, + "loss": 3.2555, + "step": 2471 + }, + { + "epoch": 0.72, + "learning_rate": 0.000744494807769447, + "loss": 3.3541, + "step": 2472 + }, + { + "epoch": 0.72, + "learning_rate": 0.0007442901528343841, + "loss": 3.3464, + "step": 2473 + }, + { + "epoch": 0.72, + "learning_rate": 0.0007440854441250159, + "loss": 3.4178, + "step": 2474 + }, + { + "epoch": 0.72, + "learning_rate": 0.0007438806816864039, + "loss": 3.418, + "step": 2475 + }, + { + "epoch": 0.72, + "learning_rate": 0.0007436758655636212, + "loss": 3.2953, + "step": 2476 + }, + { + "epoch": 0.72, + "learning_rate": 0.0007434709958017531, + "loss": 3.2259, + "step": 2477 + }, + { + "epoch": 0.72, + "learning_rate": 0.0007432660724458963, + "loss": 3.1261, + "step": 2478 + }, + { + "epoch": 0.72, + "learning_rate": 0.0007430610955411597, + "loss": 3.3843, + "step": 2479 + }, + { + "epoch": 0.72, + "learning_rate": 0.0007428560651326637, + "loss": 3.269, + "step": 2480 + }, + { + "epoch": 0.72, + "learning_rate": 0.0007426509812655407, + "loss": 3.3477, + "step": 2481 + }, + { + "epoch": 0.72, + "learning_rate": 0.0007424458439849342, + "loss": 3.2765, + "step": 2482 + }, + { + "epoch": 0.72, + "learning_rate": 0.0007422406533360007, + "loss": 3.4115, + "step": 2483 + }, + { + "epoch": 0.72, + "learning_rate": 0.0007420354093639073, + "loss": 3.3335, + "step": 2484 + }, + { + "epoch": 0.72, + "learning_rate": 0.0007418301121138335, + "loss": 3.2718, + "step": 2485 + }, + { + "epoch": 0.72, + "learning_rate": 0.0007416247616309701, + "loss": 3.2432, + "step": 2486 + }, + { + "epoch": 0.72, + "learning_rate": 0.0007414193579605198, + "loss": 3.3722, + "step": 2487 + }, + { + "epoch": 0.72, + "learning_rate": 0.0007412139011476973, + "loss": 3.1986, + "step": 2488 + }, + { + "epoch": 0.72, + "learning_rate": 0.0007410083912377286, + "loss": 3.3292, + "step": 2489 + }, + { + "epoch": 0.72, + "learning_rate": 0.0007408028282758515, + "loss": 3.2684, + "step": 2490 + }, + { + "epoch": 0.72, + "learning_rate": 0.0007405972123073153, + "loss": 3.3755, + "step": 2491 + }, + { + "epoch": 0.72, + "learning_rate": 0.0007403915433773815, + "loss": 3.2879, + "step": 2492 + }, + { + "epoch": 0.72, + "learning_rate": 0.0007401858215313228, + "loss": 3.3021, + "step": 2493 + }, + { + "epoch": 0.72, + "learning_rate": 0.0007399800468144234, + "loss": 3.3069, + "step": 2494 + }, + { + "epoch": 0.72, + "learning_rate": 0.0007397742192719798, + "loss": 3.305, + "step": 2495 + }, + { + "epoch": 0.72, + "learning_rate": 0.0007395683389492995, + "loss": 3.2709, + "step": 2496 + }, + { + "epoch": 0.72, + "learning_rate": 0.0007393624058917019, + "loss": 3.3354, + "step": 2497 + }, + { + "epoch": 0.72, + "learning_rate": 0.0007391564201445181, + "loss": 3.2793, + "step": 2498 + }, + { + "epoch": 0.72, + "learning_rate": 0.0007389503817530904, + "loss": 3.2333, + "step": 2499 + }, + { + "epoch": 0.72, + "learning_rate": 0.0007387442907627732, + "loss": 3.2478, + "step": 2500 + }, + { + "epoch": 0.72, + "learning_rate": 0.0007385381472189321, + "loss": 3.2216, + "step": 2501 + }, + { + "epoch": 0.72, + "learning_rate": 0.0007383319511669443, + "loss": 3.3405, + "step": 2502 + }, + { + "epoch": 0.72, + "learning_rate": 0.0007381257026521988, + "loss": 3.2993, + "step": 2503 + }, + { + "epoch": 0.73, + "learning_rate": 0.0007379194017200958, + "loss": 3.2292, + "step": 2504 + }, + { + "epoch": 0.73, + "learning_rate": 0.0007377130484160476, + "loss": 3.3309, + "step": 2505 + }, + { + "epoch": 0.73, + "learning_rate": 0.0007375066427854774, + "loss": 3.4123, + "step": 2506 + }, + { + "epoch": 0.73, + "learning_rate": 0.0007373001848738202, + "loss": 3.2293, + "step": 2507 + }, + { + "epoch": 0.73, + "learning_rate": 0.0007370936747265226, + "loss": 3.2575, + "step": 2508 + }, + { + "epoch": 0.73, + "learning_rate": 0.0007368871123890425, + "loss": 3.2899, + "step": 2509 + }, + { + "epoch": 0.73, + "learning_rate": 0.0007366804979068493, + "loss": 3.2131, + "step": 2510 + }, + { + "epoch": 0.73, + "learning_rate": 0.0007364738313254243, + "loss": 3.3758, + "step": 2511 + }, + { + "epoch": 0.73, + "learning_rate": 0.0007362671126902594, + "loss": 3.3223, + "step": 2512 + }, + { + "epoch": 0.73, + "learning_rate": 0.0007360603420468589, + "loss": 3.206, + "step": 2513 + }, + { + "epoch": 0.73, + "learning_rate": 0.000735853519440738, + "loss": 3.3188, + "step": 2514 + }, + { + "epoch": 0.73, + "learning_rate": 0.0007356466449174235, + "loss": 3.3324, + "step": 2515 + }, + { + "epoch": 0.73, + "learning_rate": 0.0007354397185224535, + "loss": 3.2082, + "step": 2516 + }, + { + "epoch": 0.73, + "learning_rate": 0.0007352327403013779, + "loss": 3.2798, + "step": 2517 + }, + { + "epoch": 0.73, + "learning_rate": 0.0007350257102997573, + "loss": 3.2976, + "step": 2518 + }, + { + "epoch": 0.73, + "learning_rate": 0.0007348186285631646, + "loss": 3.187, + "step": 2519 + }, + { + "epoch": 0.73, + "learning_rate": 0.000734611495137183, + "loss": 3.2669, + "step": 2520 + }, + { + "epoch": 0.73, + "learning_rate": 0.0007344043100674083, + "loss": 3.2487, + "step": 2521 + }, + { + "epoch": 0.73, + "learning_rate": 0.0007341970733994466, + "loss": 3.3945, + "step": 2522 + }, + { + "epoch": 0.73, + "learning_rate": 0.0007339897851789162, + "loss": 3.2723, + "step": 2523 + }, + { + "epoch": 0.73, + "learning_rate": 0.0007337824454514462, + "loss": 3.2243, + "step": 2524 + }, + { + "epoch": 0.73, + "learning_rate": 0.0007335750542626771, + "loss": 3.3568, + "step": 2525 + }, + { + "epoch": 0.73, + "learning_rate": 0.000733367611658261, + "loss": 3.179, + "step": 2526 + }, + { + "epoch": 0.73, + "learning_rate": 0.0007331601176838612, + "loss": 3.2023, + "step": 2527 + }, + { + "epoch": 0.73, + "learning_rate": 0.0007329525723851519, + "loss": 3.3097, + "step": 2528 + }, + { + "epoch": 0.73, + "learning_rate": 0.0007327449758078193, + "loss": 3.4292, + "step": 2529 + }, + { + "epoch": 0.73, + "learning_rate": 0.0007325373279975602, + "loss": 3.2049, + "step": 2530 + }, + { + "epoch": 0.73, + "learning_rate": 0.0007323296290000835, + "loss": 3.362, + "step": 2531 + }, + { + "epoch": 0.73, + "learning_rate": 0.0007321218788611086, + "loss": 3.2865, + "step": 2532 + }, + { + "epoch": 0.73, + "learning_rate": 0.0007319140776263664, + "loss": 3.3078, + "step": 2533 + }, + { + "epoch": 0.73, + "learning_rate": 0.0007317062253415993, + "loss": 3.2789, + "step": 2534 + }, + { + "epoch": 0.73, + "learning_rate": 0.0007314983220525604, + "loss": 3.3022, + "step": 2535 + }, + { + "epoch": 0.73, + "learning_rate": 0.0007312903678050146, + "loss": 3.2268, + "step": 2536 + }, + { + "epoch": 0.73, + "learning_rate": 0.0007310823626447378, + "loss": 3.2673, + "step": 2537 + }, + { + "epoch": 0.74, + "learning_rate": 0.000730874306617517, + "loss": 3.2883, + "step": 2538 + }, + { + "epoch": 0.74, + "learning_rate": 0.0007306661997691504, + "loss": 3.2493, + "step": 2539 + }, + { + "epoch": 0.74, + "learning_rate": 0.0007304580421454476, + "loss": 3.2458, + "step": 2540 + }, + { + "epoch": 0.74, + "learning_rate": 0.0007302498337922293, + "loss": 3.2342, + "step": 2541 + }, + { + "epoch": 0.74, + "learning_rate": 0.0007300415747553271, + "loss": 3.3653, + "step": 2542 + }, + { + "epoch": 0.74, + "learning_rate": 0.000729833265080584, + "loss": 3.2753, + "step": 2543 + }, + { + "epoch": 0.74, + "learning_rate": 0.0007296249048138543, + "loss": 3.1613, + "step": 2544 + }, + { + "epoch": 0.74, + "learning_rate": 0.0007294164940010031, + "loss": 3.2398, + "step": 2545 + }, + { + "epoch": 0.74, + "learning_rate": 0.0007292080326879067, + "loss": 3.3029, + "step": 2546 + }, + { + "epoch": 0.74, + "learning_rate": 0.0007289995209204529, + "loss": 3.3486, + "step": 2547 + }, + { + "epoch": 0.74, + "learning_rate": 0.0007287909587445398, + "loss": 3.2953, + "step": 2548 + }, + { + "epoch": 0.74, + "learning_rate": 0.0007285823462060776, + "loss": 3.3083, + "step": 2549 + }, + { + "epoch": 0.74, + "learning_rate": 0.0007283736833509867, + "loss": 3.314, + "step": 2550 + }, + { + "epoch": 0.74, + "learning_rate": 0.0007281649702251993, + "loss": 3.1833, + "step": 2551 + }, + { + "epoch": 0.74, + "learning_rate": 0.0007279562068746581, + "loss": 3.3149, + "step": 2552 + }, + { + "epoch": 0.74, + "learning_rate": 0.0007277473933453169, + "loss": 3.2666, + "step": 2553 + }, + { + "epoch": 0.74, + "learning_rate": 0.0007275385296831413, + "loss": 3.3772, + "step": 2554 + }, + { + "epoch": 0.74, + "learning_rate": 0.0007273296159341069, + "loss": 3.4231, + "step": 2555 + }, + { + "epoch": 0.74, + "learning_rate": 0.0007271206521442007, + "loss": 3.3128, + "step": 2556 + }, + { + "epoch": 0.74, + "learning_rate": 0.0007269116383594211, + "loss": 3.3952, + "step": 2557 + }, + { + "epoch": 0.74, + "learning_rate": 0.000726702574625777, + "loss": 3.2771, + "step": 2558 + }, + { + "epoch": 0.74, + "learning_rate": 0.0007264934609892888, + "loss": 3.214, + "step": 2559 + }, + { + "epoch": 0.74, + "learning_rate": 0.0007262842974959873, + "loss": 3.2437, + "step": 2560 + }, + { + "epoch": 0.74, + "learning_rate": 0.0007260750841919144, + "loss": 3.2546, + "step": 2561 + }, + { + "epoch": 0.74, + "learning_rate": 0.0007258658211231235, + "loss": 3.2326, + "step": 2562 + }, + { + "epoch": 0.74, + "learning_rate": 0.0007256565083356785, + "loss": 3.3274, + "step": 2563 + }, + { + "epoch": 0.74, + "learning_rate": 0.0007254471458756543, + "loss": 3.2817, + "step": 2564 + }, + { + "epoch": 0.74, + "learning_rate": 0.0007252377337891364, + "loss": 3.301, + "step": 2565 + }, + { + "epoch": 0.74, + "learning_rate": 0.0007250282721222222, + "loss": 3.1949, + "step": 2566 + }, + { + "epoch": 0.74, + "learning_rate": 0.000724818760921019, + "loss": 3.2885, + "step": 2567 + }, + { + "epoch": 0.74, + "learning_rate": 0.0007246092002316456, + "loss": 3.2824, + "step": 2568 + }, + { + "epoch": 0.74, + "learning_rate": 0.0007243995901002312, + "loss": 3.3139, + "step": 2569 + }, + { + "epoch": 0.74, + "learning_rate": 0.0007241899305729162, + "loss": 3.1892, + "step": 2570 + }, + { + "epoch": 0.74, + "learning_rate": 0.0007239802216958522, + "loss": 3.3741, + "step": 2571 + }, + { + "epoch": 0.74, + "learning_rate": 0.0007237704635152011, + "loss": 3.3037, + "step": 2572 + }, + { + "epoch": 0.75, + "learning_rate": 0.0007235606560771359, + "loss": 3.3483, + "step": 2573 + }, + { + "epoch": 0.75, + "learning_rate": 0.0007233507994278403, + "loss": 3.2818, + "step": 2574 + }, + { + "epoch": 0.75, + "learning_rate": 0.0007231408936135091, + "loss": 3.1891, + "step": 2575 + }, + { + "epoch": 0.75, + "learning_rate": 0.0007229309386803476, + "loss": 3.3037, + "step": 2576 + }, + { + "epoch": 0.75, + "learning_rate": 0.0007227209346745723, + "loss": 3.3566, + "step": 2577 + }, + { + "epoch": 0.75, + "learning_rate": 0.00072251088164241, + "loss": 3.2835, + "step": 2578 + }, + { + "epoch": 0.75, + "learning_rate": 0.0007223007796300986, + "loss": 3.2505, + "step": 2579 + }, + { + "epoch": 0.75, + "learning_rate": 0.0007220906286838868, + "loss": 3.2368, + "step": 2580 + }, + { + "epoch": 0.75, + "learning_rate": 0.0007218804288500342, + "loss": 3.3388, + "step": 2581 + }, + { + "epoch": 0.75, + "learning_rate": 0.000721670180174811, + "loss": 3.2866, + "step": 2582 + }, + { + "epoch": 0.75, + "learning_rate": 0.0007214598827044979, + "loss": 3.3073, + "step": 2583 + }, + { + "epoch": 0.75, + "learning_rate": 0.0007212495364853866, + "loss": 3.3432, + "step": 2584 + }, + { + "epoch": 0.75, + "learning_rate": 0.0007210391415637796, + "loss": 3.2473, + "step": 2585 + }, + { + "epoch": 0.75, + "learning_rate": 0.0007208286979859899, + "loss": 3.2712, + "step": 2586 + }, + { + "epoch": 0.75, + "learning_rate": 0.0007206182057983415, + "loss": 3.2972, + "step": 2587 + }, + { + "epoch": 0.75, + "learning_rate": 0.0007204076650471689, + "loss": 3.2631, + "step": 2588 + }, + { + "epoch": 0.75, + "learning_rate": 0.0007201970757788173, + "loss": 3.2795, + "step": 2589 + }, + { + "epoch": 0.75, + "learning_rate": 0.0007199864380396424, + "loss": 3.3941, + "step": 2590 + }, + { + "epoch": 0.75, + "learning_rate": 0.0007197757518760112, + "loss": 3.3492, + "step": 2591 + }, + { + "epoch": 0.75, + "learning_rate": 0.0007195650173343007, + "loss": 3.2969, + "step": 2592 + }, + { + "epoch": 0.75, + "learning_rate": 0.0007193542344608987, + "loss": 3.367, + "step": 2593 + }, + { + "epoch": 0.75, + "learning_rate": 0.0007191434033022038, + "loss": 3.2465, + "step": 2594 + }, + { + "epoch": 0.75, + "learning_rate": 0.0007189325239046253, + "loss": 3.2793, + "step": 2595 + }, + { + "epoch": 0.75, + "learning_rate": 0.0007187215963145827, + "loss": 3.3054, + "step": 2596 + }, + { + "epoch": 0.75, + "learning_rate": 0.0007185106205785067, + "loss": 3.3007, + "step": 2597 + }, + { + "epoch": 0.75, + "learning_rate": 0.0007182995967428379, + "loss": 3.3126, + "step": 2598 + }, + { + "epoch": 0.75, + "learning_rate": 0.0007180885248540283, + "loss": 3.2236, + "step": 2599 + }, + { + "epoch": 0.75, + "learning_rate": 0.0007178774049585397, + "loss": 3.0986, + "step": 2600 + }, + { + "epoch": 0.75, + "learning_rate": 0.000717666237102845, + "loss": 3.2269, + "step": 2601 + }, + { + "epoch": 0.75, + "learning_rate": 0.0007174550213334273, + "loss": 3.2539, + "step": 2602 + }, + { + "epoch": 0.75, + "learning_rate": 0.0007172437576967805, + "loss": 3.4594, + "step": 2603 + }, + { + "epoch": 0.75, + "learning_rate": 0.0007170324462394091, + "loss": 3.2076, + "step": 2604 + }, + { + "epoch": 0.75, + "learning_rate": 0.0007168210870078277, + "loss": 3.4187, + "step": 2605 + }, + { + "epoch": 0.75, + "learning_rate": 0.0007166096800485618, + "loss": 3.2836, + "step": 2606 + }, + { + "epoch": 0.76, + "learning_rate": 0.0007163982254081475, + "loss": 3.2759, + "step": 2607 + }, + { + "epoch": 0.76, + "learning_rate": 0.0007161867231331309, + "loss": 3.2833, + "step": 2608 + }, + { + "epoch": 0.76, + "learning_rate": 0.000715975173270069, + "loss": 3.3048, + "step": 2609 + }, + { + "epoch": 0.76, + "learning_rate": 0.0007157635758655294, + "loss": 3.3058, + "step": 2610 + }, + { + "epoch": 0.76, + "learning_rate": 0.0007155519309660896, + "loss": 3.2224, + "step": 2611 + }, + { + "epoch": 0.76, + "learning_rate": 0.0007153402386183378, + "loss": 3.2768, + "step": 2612 + }, + { + "epoch": 0.76, + "learning_rate": 0.000715128498868873, + "loss": 3.3039, + "step": 2613 + }, + { + "epoch": 0.76, + "learning_rate": 0.0007149167117643043, + "loss": 3.3904, + "step": 2614 + }, + { + "epoch": 0.76, + "learning_rate": 0.0007147048773512513, + "loss": 3.2719, + "step": 2615 + }, + { + "epoch": 0.76, + "learning_rate": 0.0007144929956763437, + "loss": 3.4837, + "step": 2616 + }, + { + "epoch": 0.76, + "learning_rate": 0.0007142810667862223, + "loss": 3.1921, + "step": 2617 + }, + { + "epoch": 0.76, + "learning_rate": 0.0007140690907275376, + "loss": 3.3359, + "step": 2618 + }, + { + "epoch": 0.76, + "learning_rate": 0.0007138570675469511, + "loss": 3.1794, + "step": 2619 + }, + { + "epoch": 0.76, + "learning_rate": 0.0007136449972911339, + "loss": 3.3222, + "step": 2620 + }, + { + "epoch": 0.76, + "learning_rate": 0.0007134328800067684, + "loss": 3.3043, + "step": 2621 + }, + { + "epoch": 0.76, + "learning_rate": 0.0007132207157405462, + "loss": 3.3152, + "step": 2622 + }, + { + "epoch": 0.76, + "learning_rate": 0.0007130085045391706, + "loss": 3.3887, + "step": 2623 + }, + { + "epoch": 0.76, + "learning_rate": 0.0007127962464493542, + "loss": 3.2894, + "step": 2624 + }, + { + "epoch": 0.76, + "learning_rate": 0.0007125839415178203, + "loss": 3.1888, + "step": 2625 + }, + { + "epoch": 0.76, + "learning_rate": 0.0007123715897913025, + "loss": 3.3921, + "step": 2626 + }, + { + "epoch": 0.76, + "learning_rate": 0.0007121591913165446, + "loss": 3.3132, + "step": 2627 + }, + { + "epoch": 0.76, + "learning_rate": 0.0007119467461403009, + "loss": 3.2523, + "step": 2628 + }, + { + "epoch": 0.76, + "learning_rate": 0.0007117342543093357, + "loss": 3.4282, + "step": 2629 + }, + { + "epoch": 0.76, + "learning_rate": 0.0007115217158704237, + "loss": 3.3204, + "step": 2630 + }, + { + "epoch": 0.76, + "learning_rate": 0.0007113091308703497, + "loss": 3.2632, + "step": 2631 + }, + { + "epoch": 0.76, + "learning_rate": 0.0007110964993559094, + "loss": 3.2652, + "step": 2632 + }, + { + "epoch": 0.76, + "learning_rate": 0.0007108838213739078, + "loss": 3.239, + "step": 2633 + }, + { + "epoch": 0.76, + "learning_rate": 0.000710671096971161, + "loss": 3.4205, + "step": 2634 + }, + { + "epoch": 0.76, + "learning_rate": 0.0007104583261944943, + "loss": 3.2946, + "step": 2635 + }, + { + "epoch": 0.76, + "learning_rate": 0.0007102455090907445, + "loss": 3.3062, + "step": 2636 + }, + { + "epoch": 0.76, + "learning_rate": 0.0007100326457067575, + "loss": 3.3325, + "step": 2637 + }, + { + "epoch": 0.76, + "learning_rate": 0.0007098197360893898, + "loss": 3.3041, + "step": 2638 + }, + { + "epoch": 0.76, + "learning_rate": 0.0007096067802855082, + "loss": 3.3117, + "step": 2639 + }, + { + "epoch": 0.76, + "learning_rate": 0.0007093937783419896, + "loss": 3.356, + "step": 2640 + }, + { + "epoch": 0.76, + "learning_rate": 0.0007091807303057207, + "loss": 3.1935, + "step": 2641 + }, + { + "epoch": 0.77, + "learning_rate": 0.0007089676362235991, + "loss": 3.1475, + "step": 2642 + }, + { + "epoch": 0.77, + "learning_rate": 0.0007087544961425316, + "loss": 3.2449, + "step": 2643 + }, + { + "epoch": 0.77, + "learning_rate": 0.0007085413101094358, + "loss": 3.3046, + "step": 2644 + }, + { + "epoch": 0.77, + "learning_rate": 0.0007083280781712394, + "loss": 3.3148, + "step": 2645 + }, + { + "epoch": 0.77, + "learning_rate": 0.0007081148003748799, + "loss": 3.275, + "step": 2646 + }, + { + "epoch": 0.77, + "learning_rate": 0.0007079014767673047, + "loss": 3.3395, + "step": 2647 + }, + { + "epoch": 0.77, + "learning_rate": 0.0007076881073954721, + "loss": 3.3111, + "step": 2648 + }, + { + "epoch": 0.77, + "learning_rate": 0.0007074746923063497, + "loss": 3.3318, + "step": 2649 + }, + { + "epoch": 0.77, + "learning_rate": 0.0007072612315469155, + "loss": 3.2956, + "step": 2650 + }, + { + "epoch": 0.77, + "learning_rate": 0.0007070477251641575, + "loss": 3.2945, + "step": 2651 + }, + { + "epoch": 0.77, + "learning_rate": 0.0007068341732050737, + "loss": 3.3266, + "step": 2652 + }, + { + "epoch": 0.77, + "learning_rate": 0.0007066205757166722, + "loss": 3.3074, + "step": 2653 + }, + { + "epoch": 0.77, + "learning_rate": 0.000706406932745971, + "loss": 3.332, + "step": 2654 + }, + { + "epoch": 0.77, + "learning_rate": 0.0007061932443399985, + "loss": 3.3611, + "step": 2655 + }, + { + "epoch": 0.77, + "learning_rate": 0.0007059795105457925, + "loss": 3.1384, + "step": 2656 + }, + { + "epoch": 0.77, + "learning_rate": 0.0007057657314104011, + "loss": 3.1879, + "step": 2657 + }, + { + "epoch": 0.77, + "learning_rate": 0.0007055519069808827, + "loss": 3.2258, + "step": 2658 + }, + { + "epoch": 0.77, + "learning_rate": 0.000705338037304305, + "loss": 3.3975, + "step": 2659 + }, + { + "epoch": 0.77, + "learning_rate": 0.000705124122427746, + "loss": 3.1883, + "step": 2660 + }, + { + "epoch": 0.77, + "learning_rate": 0.0007049101623982937, + "loss": 3.3707, + "step": 2661 + }, + { + "epoch": 0.77, + "learning_rate": 0.0007046961572630462, + "loss": 3.3252, + "step": 2662 + }, + { + "epoch": 0.77, + "learning_rate": 0.0007044821070691113, + "loss": 3.4178, + "step": 2663 + }, + { + "epoch": 0.77, + "learning_rate": 0.0007042680118636065, + "loss": 3.3224, + "step": 2664 + }, + { + "epoch": 0.77, + "learning_rate": 0.0007040538716936597, + "loss": 3.3855, + "step": 2665 + }, + { + "epoch": 0.77, + "learning_rate": 0.0007038396866064083, + "loss": 3.4197, + "step": 2666 + }, + { + "epoch": 0.77, + "learning_rate": 0.0007036254566489997, + "loss": 3.2673, + "step": 2667 + }, + { + "epoch": 0.77, + "learning_rate": 0.0007034111818685913, + "loss": 3.2082, + "step": 2668 + }, + { + "epoch": 0.77, + "learning_rate": 0.0007031968623123502, + "loss": 3.4433, + "step": 2669 + }, + { + "epoch": 0.77, + "learning_rate": 0.0007029824980274535, + "loss": 3.4111, + "step": 2670 + }, + { + "epoch": 0.77, + "learning_rate": 0.0007027680890610881, + "loss": 3.3303, + "step": 2671 + }, + { + "epoch": 0.77, + "learning_rate": 0.0007025536354604508, + "loss": 3.1658, + "step": 2672 + }, + { + "epoch": 0.77, + "learning_rate": 0.000702339137272748, + "loss": 3.2231, + "step": 2673 + }, + { + "epoch": 0.77, + "learning_rate": 0.0007021245945451961, + "loss": 3.2974, + "step": 2674 + }, + { + "epoch": 0.77, + "learning_rate": 0.0007019100073250212, + "loss": 3.2037, + "step": 2675 + }, + { + "epoch": 0.77, + "learning_rate": 0.0007016953756594594, + "loss": 3.3163, + "step": 2676 + }, + { + "epoch": 0.78, + "learning_rate": 0.0007014806995957562, + "loss": 3.2365, + "step": 2677 + }, + { + "epoch": 0.78, + "learning_rate": 0.0007012659791811674, + "loss": 3.3193, + "step": 2678 + }, + { + "epoch": 0.78, + "learning_rate": 0.000701051214462958, + "loss": 3.2385, + "step": 2679 + }, + { + "epoch": 0.78, + "learning_rate": 0.0007008364054884032, + "loss": 3.3654, + "step": 2680 + }, + { + "epoch": 0.78, + "learning_rate": 0.0007006215523047879, + "loss": 3.3026, + "step": 2681 + }, + { + "epoch": 0.78, + "learning_rate": 0.0007004066549594063, + "loss": 3.2484, + "step": 2682 + }, + { + "epoch": 0.78, + "learning_rate": 0.0007001917134995627, + "loss": 3.1977, + "step": 2683 + }, + { + "epoch": 0.78, + "learning_rate": 0.000699976727972571, + "loss": 3.392, + "step": 2684 + }, + { + "epoch": 0.78, + "learning_rate": 0.0006997616984257552, + "loss": 3.2801, + "step": 2685 + }, + { + "epoch": 0.78, + "learning_rate": 0.0006995466249064482, + "loss": 3.3344, + "step": 2686 + }, + { + "epoch": 0.78, + "learning_rate": 0.0006993315074619931, + "loss": 3.3117, + "step": 2687 + }, + { + "epoch": 0.78, + "learning_rate": 0.0006991163461397425, + "loss": 3.1371, + "step": 2688 + }, + { + "epoch": 0.78, + "learning_rate": 0.0006989011409870591, + "loss": 3.3533, + "step": 2689 + }, + { + "epoch": 0.78, + "learning_rate": 0.0006986858920513145, + "loss": 3.2598, + "step": 2690 + }, + { + "epoch": 0.78, + "learning_rate": 0.0006984705993798904, + "loss": 3.3226, + "step": 2691 + }, + { + "epoch": 0.78, + "learning_rate": 0.0006982552630201782, + "loss": 3.2958, + "step": 2692 + }, + { + "epoch": 0.78, + "learning_rate": 0.0006980398830195785, + "loss": 3.2262, + "step": 2693 + }, + { + "epoch": 0.78, + "learning_rate": 0.0006978244594255019, + "loss": 3.1237, + "step": 2694 + }, + { + "epoch": 0.78, + "learning_rate": 0.0006976089922853685, + "loss": 3.332, + "step": 2695 + }, + { + "epoch": 0.78, + "learning_rate": 0.0006973934816466078, + "loss": 3.2218, + "step": 2696 + }, + { + "epoch": 0.78, + "learning_rate": 0.0006971779275566593, + "loss": 3.217, + "step": 2697 + }, + { + "epoch": 0.78, + "learning_rate": 0.0006969623300629716, + "loss": 3.2229, + "step": 2698 + }, + { + "epoch": 0.78, + "learning_rate": 0.000696746689213003, + "loss": 3.2771, + "step": 2699 + }, + { + "epoch": 0.78, + "learning_rate": 0.0006965310050542215, + "loss": 3.2674, + "step": 2700 + }, + { + "epoch": 0.78, + "learning_rate": 0.0006963152776341043, + "loss": 3.3478, + "step": 2701 + }, + { + "epoch": 0.78, + "learning_rate": 0.0006960995070001387, + "loss": 3.3323, + "step": 2702 + }, + { + "epoch": 0.78, + "learning_rate": 0.0006958836931998209, + "loss": 3.282, + "step": 2703 + }, + { + "epoch": 0.78, + "learning_rate": 0.000695667836280657, + "loss": 3.1812, + "step": 2704 + }, + { + "epoch": 0.78, + "learning_rate": 0.0006954519362901621, + "loss": 3.1915, + "step": 2705 + }, + { + "epoch": 0.78, + "learning_rate": 0.0006952359932758616, + "loss": 3.2424, + "step": 2706 + }, + { + "epoch": 0.78, + "learning_rate": 0.0006950200072852896, + "loss": 3.2786, + "step": 2707 + }, + { + "epoch": 0.78, + "learning_rate": 0.00069480397836599, + "loss": 3.3386, + "step": 2708 + }, + { + "epoch": 0.78, + "learning_rate": 0.0006945879065655164, + "loss": 3.3075, + "step": 2709 + }, + { + "epoch": 0.78, + "learning_rate": 0.0006943717919314311, + "loss": 3.2141, + "step": 2710 + }, + { + "epoch": 0.79, + "learning_rate": 0.0006941556345113066, + "loss": 3.2514, + "step": 2711 + }, + { + "epoch": 0.79, + "learning_rate": 0.0006939394343527243, + "loss": 3.2487, + "step": 2712 + }, + { + "epoch": 0.79, + "learning_rate": 0.0006937231915032751, + "loss": 3.2856, + "step": 2713 + }, + { + "epoch": 0.79, + "learning_rate": 0.0006935069060105599, + "loss": 3.1706, + "step": 2714 + }, + { + "epoch": 0.79, + "learning_rate": 0.000693290577922188, + "loss": 3.3809, + "step": 2715 + }, + { + "epoch": 0.79, + "learning_rate": 0.0006930742072857787, + "loss": 3.4066, + "step": 2716 + }, + { + "epoch": 0.79, + "learning_rate": 0.0006928577941489606, + "loss": 3.2659, + "step": 2717 + }, + { + "epoch": 0.79, + "learning_rate": 0.0006926413385593715, + "loss": 3.4152, + "step": 2718 + }, + { + "epoch": 0.79, + "learning_rate": 0.0006924248405646589, + "loss": 3.2901, + "step": 2719 + }, + { + "epoch": 0.79, + "learning_rate": 0.0006922083002124789, + "loss": 3.3123, + "step": 2720 + }, + { + "epoch": 0.79, + "learning_rate": 0.0006919917175504977, + "loss": 3.3301, + "step": 2721 + }, + { + "epoch": 0.79, + "learning_rate": 0.0006917750926263903, + "loss": 3.324, + "step": 2722 + }, + { + "epoch": 0.79, + "learning_rate": 0.0006915584254878416, + "loss": 3.2039, + "step": 2723 + }, + { + "epoch": 0.79, + "learning_rate": 0.000691341716182545, + "loss": 3.309, + "step": 2724 + }, + { + "epoch": 0.79, + "learning_rate": 0.0006911249647582037, + "loss": 3.5544, + "step": 2725 + }, + { + "epoch": 0.79, + "learning_rate": 0.0006909081712625299, + "loss": 3.2108, + "step": 2726 + }, + { + "epoch": 0.79, + "learning_rate": 0.0006906913357432454, + "loss": 3.1977, + "step": 2727 + }, + { + "epoch": 0.79, + "learning_rate": 0.0006904744582480812, + "loss": 3.1041, + "step": 2728 + }, + { + "epoch": 0.79, + "learning_rate": 0.0006902575388247772, + "loss": 3.3276, + "step": 2729 + }, + { + "epoch": 0.79, + "learning_rate": 0.0006900405775210828, + "loss": 3.4057, + "step": 2730 + }, + { + "epoch": 0.79, + "learning_rate": 0.0006898235743847563, + "loss": 3.2728, + "step": 2731 + }, + { + "epoch": 0.79, + "learning_rate": 0.0006896065294635657, + "loss": 3.26, + "step": 2732 + }, + { + "epoch": 0.79, + "learning_rate": 0.000689389442805288, + "loss": 3.2951, + "step": 2733 + }, + { + "epoch": 0.79, + "learning_rate": 0.0006891723144577094, + "loss": 3.2983, + "step": 2734 + }, + { + "epoch": 0.79, + "learning_rate": 0.0006889551444686249, + "loss": 3.195, + "step": 2735 + }, + { + "epoch": 0.79, + "learning_rate": 0.0006887379328858392, + "loss": 3.2447, + "step": 2736 + }, + { + "epoch": 0.79, + "learning_rate": 0.0006885206797571659, + "loss": 3.166, + "step": 2737 + }, + { + "epoch": 0.79, + "learning_rate": 0.0006883033851304279, + "loss": 3.2477, + "step": 2738 + }, + { + "epoch": 0.79, + "learning_rate": 0.0006880860490534568, + "loss": 3.2551, + "step": 2739 + }, + { + "epoch": 0.79, + "learning_rate": 0.0006878686715740942, + "loss": 3.1548, + "step": 2740 + }, + { + "epoch": 0.79, + "learning_rate": 0.0006876512527401897, + "loss": 3.2234, + "step": 2741 + }, + { + "epoch": 0.79, + "learning_rate": 0.0006874337925996028, + "loss": 3.1728, + "step": 2742 + }, + { + "epoch": 0.79, + "learning_rate": 0.000687216291200202, + "loss": 3.1858, + "step": 2743 + }, + { + "epoch": 0.79, + "learning_rate": 0.0006869987485898643, + "loss": 3.2024, + "step": 2744 + }, + { + "epoch": 0.79, + "learning_rate": 0.0006867811648164768, + "loss": 3.2454, + "step": 2745 + }, + { + "epoch": 0.8, + "learning_rate": 0.0006865635399279347, + "loss": 3.2286, + "step": 2746 + }, + { + "epoch": 0.8, + "learning_rate": 0.0006863458739721428, + "loss": 3.2516, + "step": 2747 + }, + { + "epoch": 0.8, + "learning_rate": 0.0006861281669970143, + "loss": 3.294, + "step": 2748 + }, + { + "epoch": 0.8, + "learning_rate": 0.0006859104190504725, + "loss": 3.4409, + "step": 2749 + }, + { + "epoch": 0.8, + "learning_rate": 0.0006856926301804488, + "loss": 3.3722, + "step": 2750 + }, + { + "epoch": 0.8, + "learning_rate": 0.000685474800434884, + "loss": 3.3042, + "step": 2751 + }, + { + "epoch": 0.8, + "learning_rate": 0.0006852569298617277, + "loss": 3.3402, + "step": 2752 + }, + { + "epoch": 0.8, + "learning_rate": 0.0006850390185089385, + "loss": 3.2451, + "step": 2753 + }, + { + "epoch": 0.8, + "learning_rate": 0.0006848210664244846, + "loss": 3.3127, + "step": 2754 + }, + { + "epoch": 0.8, + "learning_rate": 0.0006846030736563422, + "loss": 3.2795, + "step": 2755 + }, + { + "epoch": 0.8, + "learning_rate": 0.000684385040252497, + "loss": 3.2501, + "step": 2756 + }, + { + "epoch": 0.8, + "learning_rate": 0.0006841669662609437, + "loss": 3.2865, + "step": 2757 + }, + { + "epoch": 0.8, + "learning_rate": 0.0006839488517296855, + "loss": 3.3314, + "step": 2758 + }, + { + "epoch": 0.8, + "learning_rate": 0.0006837306967067349, + "loss": 3.2642, + "step": 2759 + }, + { + "epoch": 0.8, + "learning_rate": 0.0006835125012401133, + "loss": 3.3216, + "step": 2760 + }, + { + "epoch": 0.8, + "learning_rate": 0.0006832942653778509, + "loss": 3.1626, + "step": 2761 + }, + { + "epoch": 0.8, + "learning_rate": 0.0006830759891679866, + "loss": 3.173, + "step": 2762 + }, + { + "epoch": 0.8, + "learning_rate": 0.0006828576726585686, + "loss": 3.4248, + "step": 2763 + }, + { + "epoch": 0.8, + "learning_rate": 0.000682639315897654, + "loss": 3.2822, + "step": 2764 + }, + { + "epoch": 0.8, + "learning_rate": 0.0006824209189333081, + "loss": 3.2014, + "step": 2765 + }, + { + "epoch": 0.8, + "learning_rate": 0.0006822024818136056, + "loss": 3.5576, + "step": 2766 + }, + { + "epoch": 0.8, + "learning_rate": 0.0006819840045866301, + "loss": 3.4001, + "step": 2767 + }, + { + "epoch": 0.8, + "learning_rate": 0.0006817654873004737, + "loss": 3.2767, + "step": 2768 + }, + { + "epoch": 0.8, + "learning_rate": 0.0006815469300032373, + "loss": 3.2805, + "step": 2769 + }, + { + "epoch": 0.8, + "learning_rate": 0.000681328332743031, + "loss": 3.3265, + "step": 2770 + }, + { + "epoch": 0.8, + "learning_rate": 0.0006811096955679734, + "loss": 3.4123, + "step": 2771 + }, + { + "epoch": 0.8, + "learning_rate": 0.0006808910185261918, + "loss": 3.2895, + "step": 2772 + }, + { + "epoch": 0.8, + "learning_rate": 0.0006806723016658228, + "loss": 3.3089, + "step": 2773 + }, + { + "epoch": 0.8, + "learning_rate": 0.0006804535450350111, + "loss": 3.1922, + "step": 2774 + }, + { + "epoch": 0.8, + "learning_rate": 0.0006802347486819104, + "loss": 3.2314, + "step": 2775 + }, + { + "epoch": 0.8, + "learning_rate": 0.0006800159126546833, + "loss": 3.273, + "step": 2776 + }, + { + "epoch": 0.8, + "learning_rate": 0.000679797037001501, + "loss": 3.3195, + "step": 2777 + }, + { + "epoch": 0.8, + "learning_rate": 0.0006795781217705435, + "loss": 3.2962, + "step": 2778 + }, + { + "epoch": 0.8, + "learning_rate": 0.0006793591670099994, + "loss": 3.305, + "step": 2779 + }, + { + "epoch": 0.81, + "learning_rate": 0.0006791401727680662, + "loss": 3.3173, + "step": 2780 + }, + { + "epoch": 0.81, + "learning_rate": 0.0006789211390929496, + "loss": 3.2163, + "step": 2781 + }, + { + "epoch": 0.81, + "learning_rate": 0.0006787020660328646, + "loss": 3.3347, + "step": 2782 + }, + { + "epoch": 0.81, + "learning_rate": 0.0006784829536360347, + "loss": 3.1824, + "step": 2783 + }, + { + "epoch": 0.81, + "learning_rate": 0.0006782638019506917, + "loss": 3.252, + "step": 2784 + }, + { + "epoch": 0.81, + "learning_rate": 0.0006780446110250765, + "loss": 3.2716, + "step": 2785 + }, + { + "epoch": 0.81, + "learning_rate": 0.0006778253809074384, + "loss": 3.3218, + "step": 2786 + }, + { + "epoch": 0.81, + "learning_rate": 0.0006776061116460353, + "loss": 3.2824, + "step": 2787 + }, + { + "epoch": 0.81, + "learning_rate": 0.0006773868032891335, + "loss": 3.2143, + "step": 2788 + }, + { + "epoch": 0.81, + "learning_rate": 0.0006771674558850088, + "loss": 3.2272, + "step": 2789 + }, + { + "epoch": 0.81, + "learning_rate": 0.0006769480694819447, + "loss": 3.345, + "step": 2790 + }, + { + "epoch": 0.81, + "learning_rate": 0.0006767286441282336, + "loss": 3.3204, + "step": 2791 + }, + { + "epoch": 0.81, + "learning_rate": 0.0006765091798721763, + "loss": 3.2337, + "step": 2792 + }, + { + "epoch": 0.81, + "learning_rate": 0.0006762896767620826, + "loss": 3.2564, + "step": 2793 + }, + { + "epoch": 0.81, + "learning_rate": 0.0006760701348462703, + "loss": 3.2738, + "step": 2794 + }, + { + "epoch": 0.81, + "learning_rate": 0.0006758505541730661, + "loss": 3.4115, + "step": 2795 + }, + { + "epoch": 0.81, + "learning_rate": 0.0006756309347908052, + "loss": 3.1912, + "step": 2796 + }, + { + "epoch": 0.81, + "learning_rate": 0.0006754112767478313, + "loss": 3.2432, + "step": 2797 + }, + { + "epoch": 0.81, + "learning_rate": 0.0006751915800924963, + "loss": 3.315, + "step": 2798 + }, + { + "epoch": 0.81, + "learning_rate": 0.0006749718448731611, + "loss": 3.2842, + "step": 2799 + }, + { + "epoch": 0.81, + "learning_rate": 0.0006747520711381948, + "loss": 3.1246, + "step": 2800 + }, + { + "epoch": 0.81, + "learning_rate": 0.000674532258935975, + "loss": 3.1839, + "step": 2801 + }, + { + "epoch": 0.81, + "learning_rate": 0.0006743124083148878, + "loss": 3.2736, + "step": 2802 + }, + { + "epoch": 0.81, + "learning_rate": 0.0006740925193233281, + "loss": 3.2739, + "step": 2803 + }, + { + "epoch": 0.81, + "learning_rate": 0.0006738725920096983, + "loss": 3.3954, + "step": 2804 + }, + { + "epoch": 0.81, + "learning_rate": 0.0006736526264224101, + "loss": 3.3439, + "step": 2805 + }, + { + "epoch": 0.81, + "learning_rate": 0.0006734326226098835, + "loss": 3.2505, + "step": 2806 + }, + { + "epoch": 0.81, + "learning_rate": 0.0006732125806205468, + "loss": 3.356, + "step": 2807 + }, + { + "epoch": 0.81, + "learning_rate": 0.0006729925005028365, + "loss": 3.2247, + "step": 2808 + }, + { + "epoch": 0.81, + "learning_rate": 0.0006727723823051979, + "loss": 3.4164, + "step": 2809 + }, + { + "epoch": 0.81, + "learning_rate": 0.000672552226076084, + "loss": 3.1969, + "step": 2810 + }, + { + "epoch": 0.81, + "learning_rate": 0.0006723320318639572, + "loss": 3.2439, + "step": 2811 + }, + { + "epoch": 0.81, + "learning_rate": 0.0006721117997172874, + "loss": 3.2822, + "step": 2812 + }, + { + "epoch": 0.81, + "learning_rate": 0.0006718915296845531, + "loss": 3.2659, + "step": 2813 + }, + { + "epoch": 0.81, + "learning_rate": 0.0006716712218142413, + "loss": 3.1938, + "step": 2814 + }, + { + "epoch": 0.82, + "learning_rate": 0.0006714508761548472, + "loss": 3.3545, + "step": 2815 + }, + { + "epoch": 0.82, + "learning_rate": 0.0006712304927548742, + "loss": 3.2272, + "step": 2816 + }, + { + "epoch": 0.82, + "learning_rate": 0.0006710100716628344, + "loss": 3.2744, + "step": 2817 + }, + { + "epoch": 0.82, + "learning_rate": 0.0006707896129272477, + "loss": 3.2233, + "step": 2818 + }, + { + "epoch": 0.82, + "learning_rate": 0.0006705691165966426, + "loss": 3.4276, + "step": 2819 + }, + { + "epoch": 0.82, + "learning_rate": 0.0006703485827195557, + "loss": 3.241, + "step": 2820 + }, + { + "epoch": 0.82, + "learning_rate": 0.0006701280113445324, + "loss": 3.2323, + "step": 2821 + }, + { + "epoch": 0.82, + "learning_rate": 0.0006699074025201253, + "loss": 3.2669, + "step": 2822 + }, + { + "epoch": 0.82, + "learning_rate": 0.0006696867562948961, + "loss": 3.2367, + "step": 2823 + }, + { + "epoch": 0.82, + "learning_rate": 0.0006694660727174146, + "loss": 3.328, + "step": 2824 + }, + { + "epoch": 0.82, + "learning_rate": 0.0006692453518362586, + "loss": 3.2613, + "step": 2825 + }, + { + "epoch": 0.82, + "learning_rate": 0.0006690245937000144, + "loss": 3.1883, + "step": 2826 + }, + { + "epoch": 0.82, + "learning_rate": 0.0006688037983572762, + "loss": 3.2014, + "step": 2827 + }, + { + "epoch": 0.82, + "learning_rate": 0.0006685829658566462, + "loss": 3.3016, + "step": 2828 + }, + { + "epoch": 0.82, + "learning_rate": 0.0006683620962467358, + "loss": 3.2019, + "step": 2829 + }, + { + "epoch": 0.82, + "learning_rate": 0.0006681411895761633, + "loss": 3.3221, + "step": 2830 + }, + { + "epoch": 0.82, + "learning_rate": 0.0006679202458935559, + "loss": 3.3052, + "step": 2831 + }, + { + "epoch": 0.82, + "learning_rate": 0.0006676992652475486, + "loss": 3.328, + "step": 2832 + }, + { + "epoch": 0.82, + "learning_rate": 0.000667478247686785, + "loss": 3.2886, + "step": 2833 + }, + { + "epoch": 0.82, + "learning_rate": 0.0006672571932599164, + "loss": 3.1662, + "step": 2834 + }, + { + "epoch": 0.82, + "learning_rate": 0.0006670361020156024, + "loss": 3.1904, + "step": 2835 + }, + { + "epoch": 0.82, + "learning_rate": 0.0006668149740025104, + "loss": 3.3312, + "step": 2836 + }, + { + "epoch": 0.82, + "learning_rate": 0.0006665938092693165, + "loss": 3.2363, + "step": 2837 + }, + { + "epoch": 0.82, + "learning_rate": 0.0006663726078647045, + "loss": 3.2995, + "step": 2838 + }, + { + "epoch": 0.82, + "learning_rate": 0.000666151369837366, + "loss": 3.3215, + "step": 2839 + }, + { + "epoch": 0.82, + "learning_rate": 0.0006659300952360011, + "loss": 3.2241, + "step": 2840 + }, + { + "epoch": 0.82, + "learning_rate": 0.0006657087841093179, + "loss": 3.1086, + "step": 2841 + }, + { + "epoch": 0.82, + "learning_rate": 0.0006654874365060325, + "loss": 3.2566, + "step": 2842 + }, + { + "epoch": 0.82, + "learning_rate": 0.0006652660524748688, + "loss": 3.2437, + "step": 2843 + }, + { + "epoch": 0.82, + "learning_rate": 0.000665044632064559, + "loss": 3.2176, + "step": 2844 + }, + { + "epoch": 0.82, + "learning_rate": 0.000664823175323843, + "loss": 3.1715, + "step": 2845 + }, + { + "epoch": 0.82, + "learning_rate": 0.0006646016823014694, + "loss": 3.3201, + "step": 2846 + }, + { + "epoch": 0.82, + "learning_rate": 0.0006643801530461938, + "loss": 3.0813, + "step": 2847 + }, + { + "epoch": 0.82, + "learning_rate": 0.0006641585876067806, + "loss": 3.2653, + "step": 2848 + }, + { + "epoch": 0.83, + "learning_rate": 0.0006639369860320016, + "loss": 3.3378, + "step": 2849 + }, + { + "epoch": 0.83, + "learning_rate": 0.0006637153483706369, + "loss": 3.2339, + "step": 2850 + }, + { + "epoch": 0.83, + "learning_rate": 0.0006634936746714745, + "loss": 3.2191, + "step": 2851 + }, + { + "epoch": 0.83, + "learning_rate": 0.0006632719649833101, + "loss": 3.2473, + "step": 2852 + }, + { + "epoch": 0.83, + "learning_rate": 0.0006630502193549475, + "loss": 3.3005, + "step": 2853 + }, + { + "epoch": 0.83, + "learning_rate": 0.0006628284378351985, + "loss": 3.3315, + "step": 2854 + }, + { + "epoch": 0.83, + "learning_rate": 0.0006626066204728826, + "loss": 3.3383, + "step": 2855 + }, + { + "epoch": 0.83, + "learning_rate": 0.0006623847673168276, + "loss": 3.1044, + "step": 2856 + }, + { + "epoch": 0.83, + "learning_rate": 0.0006621628784158685, + "loss": 3.256, + "step": 2857 + }, + { + "epoch": 0.83, + "learning_rate": 0.0006619409538188486, + "loss": 3.1375, + "step": 2858 + }, + { + "epoch": 0.83, + "learning_rate": 0.0006617189935746191, + "loss": 3.2039, + "step": 2859 + }, + { + "epoch": 0.83, + "learning_rate": 0.0006614969977320388, + "loss": 3.2952, + "step": 2860 + }, + { + "epoch": 0.83, + "learning_rate": 0.0006612749663399747, + "loss": 3.2014, + "step": 2861 + }, + { + "epoch": 0.83, + "learning_rate": 0.0006610528994473011, + "loss": 3.2924, + "step": 2862 + }, + { + "epoch": 0.83, + "learning_rate": 0.0006608307971029008, + "loss": 3.2879, + "step": 2863 + }, + { + "epoch": 0.83, + "learning_rate": 0.0006606086593556636, + "loss": 3.3133, + "step": 2864 + }, + { + "epoch": 0.83, + "learning_rate": 0.0006603864862544878, + "loss": 3.2257, + "step": 2865 + }, + { + "epoch": 0.83, + "learning_rate": 0.0006601642778482792, + "loss": 3.1595, + "step": 2866 + }, + { + "epoch": 0.83, + "learning_rate": 0.0006599420341859511, + "loss": 3.2108, + "step": 2867 + }, + { + "epoch": 0.83, + "learning_rate": 0.0006597197553164251, + "loss": 3.2792, + "step": 2868 + }, + { + "epoch": 0.83, + "learning_rate": 0.0006594974412886302, + "loss": 3.2907, + "step": 2869 + }, + { + "epoch": 0.83, + "learning_rate": 0.0006592750921515031, + "loss": 3.3022, + "step": 2870 + }, + { + "epoch": 0.83, + "learning_rate": 0.0006590527079539884, + "loss": 3.3651, + "step": 2871 + }, + { + "epoch": 0.83, + "learning_rate": 0.0006588302887450386, + "loss": 3.2856, + "step": 2872 + }, + { + "epoch": 0.83, + "learning_rate": 0.0006586078345736134, + "loss": 3.3225, + "step": 2873 + }, + { + "epoch": 0.83, + "learning_rate": 0.0006583853454886806, + "loss": 3.3379, + "step": 2874 + }, + { + "epoch": 0.83, + "learning_rate": 0.0006581628215392152, + "loss": 3.1857, + "step": 2875 + }, + { + "epoch": 0.83, + "learning_rate": 0.0006579402627742008, + "loss": 3.2187, + "step": 2876 + }, + { + "epoch": 0.83, + "learning_rate": 0.0006577176692426278, + "loss": 3.1648, + "step": 2877 + }, + { + "epoch": 0.83, + "learning_rate": 0.0006574950409934945, + "loss": 3.1616, + "step": 2878 + }, + { + "epoch": 0.83, + "learning_rate": 0.0006572723780758069, + "loss": 3.3054, + "step": 2879 + }, + { + "epoch": 0.83, + "learning_rate": 0.0006570496805385787, + "loss": 3.1509, + "step": 2880 + }, + { + "epoch": 0.83, + "learning_rate": 0.000656826948430831, + "loss": 3.2289, + "step": 2881 + }, + { + "epoch": 0.83, + "learning_rate": 0.0006566041818015927, + "loss": 3.0798, + "step": 2882 + }, + { + "epoch": 0.83, + "learning_rate": 0.0006563813806999005, + "loss": 3.2209, + "step": 2883 + }, + { + "epoch": 0.84, + "learning_rate": 0.000656158545174798, + "loss": 3.2085, + "step": 2884 + }, + { + "epoch": 0.84, + "learning_rate": 0.0006559356752753371, + "loss": 3.1369, + "step": 2885 + }, + { + "epoch": 0.84, + "learning_rate": 0.000655712771050577, + "loss": 3.3144, + "step": 2886 + }, + { + "epoch": 0.84, + "learning_rate": 0.0006554898325495842, + "loss": 3.0628, + "step": 2887 + }, + { + "epoch": 0.84, + "learning_rate": 0.0006552668598214331, + "loss": 3.4049, + "step": 2888 + }, + { + "epoch": 0.84, + "learning_rate": 0.0006550438529152055, + "loss": 3.3048, + "step": 2889 + }, + { + "epoch": 0.84, + "learning_rate": 0.0006548208118799908, + "loss": 3.4122, + "step": 2890 + }, + { + "epoch": 0.84, + "learning_rate": 0.0006545977367648859, + "loss": 3.2144, + "step": 2891 + }, + { + "epoch": 0.84, + "learning_rate": 0.0006543746276189951, + "loss": 3.4288, + "step": 2892 + }, + { + "epoch": 0.84, + "learning_rate": 0.0006541514844914299, + "loss": 3.2563, + "step": 2893 + }, + { + "epoch": 0.84, + "learning_rate": 0.0006539283074313101, + "loss": 3.2746, + "step": 2894 + }, + { + "epoch": 0.84, + "learning_rate": 0.0006537050964877625, + "loss": 3.282, + "step": 2895 + }, + { + "epoch": 0.84, + "learning_rate": 0.0006534818517099209, + "loss": 3.1768, + "step": 2896 + }, + { + "epoch": 0.84, + "learning_rate": 0.0006532585731469274, + "loss": 3.2541, + "step": 2897 + }, + { + "epoch": 0.84, + "learning_rate": 0.000653035260847931, + "loss": 3.3542, + "step": 2898 + }, + { + "epoch": 0.84, + "learning_rate": 0.0006528119148620882, + "loss": 3.3448, + "step": 2899 + }, + { + "epoch": 0.84, + "learning_rate": 0.0006525885352385631, + "loss": 3.3586, + "step": 2900 + }, + { + "epoch": 0.84, + "learning_rate": 0.0006523651220265269, + "loss": 3.2899, + "step": 2901 + }, + { + "epoch": 0.84, + "learning_rate": 0.0006521416752751586, + "loss": 3.2887, + "step": 2902 + }, + { + "epoch": 0.84, + "learning_rate": 0.0006519181950336441, + "loss": 3.2911, + "step": 2903 + }, + { + "epoch": 0.84, + "learning_rate": 0.0006516946813511774, + "loss": 3.3432, + "step": 2904 + }, + { + "epoch": 0.84, + "learning_rate": 0.0006514711342769588, + "loss": 3.2196, + "step": 2905 + }, + { + "epoch": 0.84, + "learning_rate": 0.0006512475538601968, + "loss": 3.2621, + "step": 2906 + }, + { + "epoch": 0.84, + "learning_rate": 0.000651023940150107, + "loss": 3.2837, + "step": 2907 + }, + { + "epoch": 0.84, + "learning_rate": 0.0006508002931959125, + "loss": 3.2013, + "step": 2908 + }, + { + "epoch": 0.84, + "learning_rate": 0.0006505766130468431, + "loss": 3.2471, + "step": 2909 + }, + { + "epoch": 0.84, + "learning_rate": 0.0006503528997521365, + "loss": 3.2407, + "step": 2910 + }, + { + "epoch": 0.84, + "learning_rate": 0.0006501291533610377, + "loss": 3.1515, + "step": 2911 + }, + { + "epoch": 0.84, + "learning_rate": 0.0006499053739227989, + "loss": 3.2824, + "step": 2912 + }, + { + "epoch": 0.84, + "learning_rate": 0.0006496815614866791, + "loss": 3.3025, + "step": 2913 + }, + { + "epoch": 0.84, + "learning_rate": 0.0006494577161019451, + "loss": 3.13, + "step": 2914 + }, + { + "epoch": 0.84, + "learning_rate": 0.0006492338378178708, + "loss": 3.2099, + "step": 2915 + }, + { + "epoch": 0.84, + "learning_rate": 0.0006490099266837376, + "loss": 3.3331, + "step": 2916 + }, + { + "epoch": 0.84, + "learning_rate": 0.0006487859827488336, + "loss": 3.1048, + "step": 2917 + }, + { + "epoch": 0.85, + "learning_rate": 0.0006485620060624545, + "loss": 3.2354, + "step": 2918 + }, + { + "epoch": 0.85, + "learning_rate": 0.0006483379966739028, + "loss": 3.4434, + "step": 2919 + }, + { + "epoch": 0.85, + "learning_rate": 0.0006481139546324891, + "loss": 3.2768, + "step": 2920 + }, + { + "epoch": 0.85, + "learning_rate": 0.0006478898799875302, + "loss": 3.2981, + "step": 2921 + }, + { + "epoch": 0.85, + "learning_rate": 0.0006476657727883506, + "loss": 3.2779, + "step": 2922 + }, + { + "epoch": 0.85, + "learning_rate": 0.0006474416330842818, + "loss": 3.3172, + "step": 2923 + }, + { + "epoch": 0.85, + "learning_rate": 0.0006472174609246625, + "loss": 3.3083, + "step": 2924 + }, + { + "epoch": 0.85, + "learning_rate": 0.0006469932563588386, + "loss": 3.3294, + "step": 2925 + }, + { + "epoch": 0.85, + "learning_rate": 0.0006467690194361628, + "loss": 3.3147, + "step": 2926 + }, + { + "epoch": 0.85, + "learning_rate": 0.0006465447502059954, + "loss": 3.2609, + "step": 2927 + }, + { + "epoch": 0.85, + "learning_rate": 0.0006463204487177037, + "loss": 3.1807, + "step": 2928 + }, + { + "epoch": 0.85, + "learning_rate": 0.0006460961150206619, + "loss": 3.1629, + "step": 2929 + }, + { + "epoch": 0.85, + "learning_rate": 0.0006458717491642514, + "loss": 3.298, + "step": 2930 + }, + { + "epoch": 0.85, + "learning_rate": 0.0006456473511978607, + "loss": 3.3014, + "step": 2931 + }, + { + "epoch": 0.85, + "learning_rate": 0.0006454229211708853, + "loss": 3.2526, + "step": 2932 + }, + { + "epoch": 0.85, + "learning_rate": 0.0006451984591327278, + "loss": 3.4198, + "step": 2933 + }, + { + "epoch": 0.85, + "learning_rate": 0.0006449739651327979, + "loss": 3.1244, + "step": 2934 + }, + { + "epoch": 0.85, + "learning_rate": 0.0006447494392205122, + "loss": 3.3049, + "step": 2935 + }, + { + "epoch": 0.85, + "learning_rate": 0.0006445248814452944, + "loss": 3.2626, + "step": 2936 + }, + { + "epoch": 0.85, + "learning_rate": 0.0006443002918565754, + "loss": 3.06, + "step": 2937 + }, + { + "epoch": 0.85, + "learning_rate": 0.0006440756705037927, + "loss": 3.3338, + "step": 2938 + }, + { + "epoch": 0.85, + "learning_rate": 0.0006438510174363912, + "loss": 3.2738, + "step": 2939 + }, + { + "epoch": 0.85, + "learning_rate": 0.0006436263327038224, + "loss": 3.2552, + "step": 2940 + }, + { + "epoch": 0.85, + "learning_rate": 0.0006434016163555452, + "loss": 3.1693, + "step": 2941 + }, + { + "epoch": 0.85, + "learning_rate": 0.0006431768684410251, + "loss": 3.2715, + "step": 2942 + }, + { + "epoch": 0.85, + "learning_rate": 0.0006429520890097346, + "loss": 3.1939, + "step": 2943 + }, + { + "epoch": 0.85, + "learning_rate": 0.0006427272781111533, + "loss": 3.2692, + "step": 2944 + }, + { + "epoch": 0.85, + "learning_rate": 0.0006425024357947677, + "loss": 3.1504, + "step": 2945 + }, + { + "epoch": 0.85, + "learning_rate": 0.000642277562110071, + "loss": 3.3287, + "step": 2946 + }, + { + "epoch": 0.85, + "learning_rate": 0.0006420526571065636, + "loss": 3.1353, + "step": 2947 + }, + { + "epoch": 0.85, + "learning_rate": 0.0006418277208337526, + "loss": 3.1197, + "step": 2948 + }, + { + "epoch": 0.85, + "learning_rate": 0.000641602753341152, + "loss": 3.2429, + "step": 2949 + }, + { + "epoch": 0.85, + "learning_rate": 0.0006413777546782828, + "loss": 3.2656, + "step": 2950 + }, + { + "epoch": 0.85, + "learning_rate": 0.0006411527248946728, + "loss": 3.2815, + "step": 2951 + }, + { + "epoch": 0.85, + "learning_rate": 0.0006409276640398564, + "loss": 3.3028, + "step": 2952 + }, + { + "epoch": 0.86, + "learning_rate": 0.0006407025721633752, + "loss": 3.381, + "step": 2953 + }, + { + "epoch": 0.86, + "learning_rate": 0.0006404774493147777, + "loss": 3.3677, + "step": 2954 + }, + { + "epoch": 0.86, + "learning_rate": 0.0006402522955436187, + "loss": 3.2937, + "step": 2955 + }, + { + "epoch": 0.86, + "learning_rate": 0.0006400271108994602, + "loss": 3.1929, + "step": 2956 + }, + { + "epoch": 0.86, + "learning_rate": 0.000639801895431871, + "loss": 3.1225, + "step": 2957 + }, + { + "epoch": 0.86, + "learning_rate": 0.0006395766491904262, + "loss": 3.3399, + "step": 2958 + }, + { + "epoch": 0.86, + "learning_rate": 0.0006393513722247089, + "loss": 3.2407, + "step": 2959 + }, + { + "epoch": 0.86, + "learning_rate": 0.0006391260645843074, + "loss": 3.2491, + "step": 2960 + }, + { + "epoch": 0.86, + "learning_rate": 0.0006389007263188176, + "loss": 3.2679, + "step": 2961 + }, + { + "epoch": 0.86, + "learning_rate": 0.0006386753574778421, + "loss": 3.2853, + "step": 2962 + }, + { + "epoch": 0.86, + "learning_rate": 0.0006384499581109903, + "loss": 3.2665, + "step": 2963 + }, + { + "epoch": 0.86, + "learning_rate": 0.0006382245282678781, + "loss": 3.2671, + "step": 2964 + }, + { + "epoch": 0.86, + "learning_rate": 0.0006379990679981281, + "loss": 3.2733, + "step": 2965 + }, + { + "epoch": 0.86, + "learning_rate": 0.0006377735773513697, + "loss": 3.1639, + "step": 2966 + }, + { + "epoch": 0.86, + "learning_rate": 0.000637548056377239, + "loss": 3.1967, + "step": 2967 + }, + { + "epoch": 0.86, + "learning_rate": 0.000637322505125379, + "loss": 3.1617, + "step": 2968 + }, + { + "epoch": 0.86, + "learning_rate": 0.0006370969236454385, + "loss": 3.2546, + "step": 2969 + }, + { + "epoch": 0.86, + "learning_rate": 0.0006368713119870742, + "loss": 3.253, + "step": 2970 + }, + { + "epoch": 0.86, + "learning_rate": 0.0006366456701999483, + "loss": 3.2741, + "step": 2971 + }, + { + "epoch": 0.86, + "learning_rate": 0.0006364199983337305, + "loss": 3.3579, + "step": 2972 + }, + { + "epoch": 0.86, + "learning_rate": 0.0006361942964380967, + "loss": 3.2905, + "step": 2973 + }, + { + "epoch": 0.86, + "learning_rate": 0.0006359685645627294, + "loss": 3.2896, + "step": 2974 + }, + { + "epoch": 0.86, + "learning_rate": 0.0006357428027573174, + "loss": 3.2191, + "step": 2975 + }, + { + "epoch": 0.86, + "learning_rate": 0.0006355170110715571, + "loss": 3.258, + "step": 2976 + }, + { + "epoch": 0.86, + "learning_rate": 0.0006352911895551507, + "loss": 3.1467, + "step": 2977 + }, + { + "epoch": 0.86, + "learning_rate": 0.0006350653382578068, + "loss": 3.2894, + "step": 2978 + }, + { + "epoch": 0.86, + "learning_rate": 0.0006348394572292411, + "loss": 3.249, + "step": 2979 + }, + { + "epoch": 0.86, + "learning_rate": 0.0006346135465191754, + "loss": 3.2811, + "step": 2980 + }, + { + "epoch": 0.86, + "learning_rate": 0.0006343876061773385, + "loss": 3.2736, + "step": 2981 + }, + { + "epoch": 0.86, + "learning_rate": 0.0006341616362534652, + "loss": 3.3147, + "step": 2982 + }, + { + "epoch": 0.86, + "learning_rate": 0.0006339356367972973, + "loss": 3.1584, + "step": 2983 + }, + { + "epoch": 0.86, + "learning_rate": 0.0006337096078585825, + "loss": 3.2012, + "step": 2984 + }, + { + "epoch": 0.86, + "learning_rate": 0.0006334835494870759, + "loss": 3.259, + "step": 2985 + }, + { + "epoch": 0.86, + "learning_rate": 0.0006332574617325381, + "loss": 3.1947, + "step": 2986 + }, + { + "epoch": 0.87, + "learning_rate": 0.0006330313446447368, + "loss": 3.2572, + "step": 2987 + }, + { + "epoch": 0.87, + "learning_rate": 0.000632805198273446, + "loss": 3.2683, + "step": 2988 + }, + { + "epoch": 0.87, + "learning_rate": 0.0006325790226684459, + "loss": 3.2501, + "step": 2989 + }, + { + "epoch": 0.87, + "learning_rate": 0.0006323528178795236, + "loss": 3.2299, + "step": 2990 + }, + { + "epoch": 0.87, + "learning_rate": 0.0006321265839564722, + "loss": 3.2231, + "step": 2991 + }, + { + "epoch": 0.87, + "learning_rate": 0.0006319003209490914, + "loss": 3.3582, + "step": 2992 + }, + { + "epoch": 0.87, + "learning_rate": 0.0006316740289071872, + "loss": 3.2329, + "step": 2993 + }, + { + "epoch": 0.87, + "learning_rate": 0.0006314477078805723, + "loss": 3.3179, + "step": 2994 + }, + { + "epoch": 0.87, + "learning_rate": 0.0006312213579190655, + "loss": 3.2708, + "step": 2995 + }, + { + "epoch": 0.87, + "learning_rate": 0.0006309949790724922, + "loss": 3.2385, + "step": 2996 + }, + { + "epoch": 0.87, + "learning_rate": 0.0006307685713906834, + "loss": 3.2586, + "step": 2997 + }, + { + "epoch": 0.87, + "learning_rate": 0.0006305421349234774, + "loss": 3.0786, + "step": 2998 + }, + { + "epoch": 0.87, + "learning_rate": 0.0006303156697207187, + "loss": 3.266, + "step": 2999 + }, + { + "epoch": 0.87, + "learning_rate": 0.0006300891758322575, + "loss": 3.2404, + "step": 3000 + }, + { + "epoch": 0.87, + "learning_rate": 0.0006298626533079508, + "loss": 3.1514, + "step": 3001 + }, + { + "epoch": 0.87, + "learning_rate": 0.0006296361021976619, + "loss": 3.2389, + "step": 3002 + }, + { + "epoch": 0.87, + "learning_rate": 0.0006294095225512603, + "loss": 3.2675, + "step": 3003 + }, + { + "epoch": 0.87, + "learning_rate": 0.0006291829144186218, + "loss": 3.211, + "step": 3004 + }, + { + "epoch": 0.87, + "learning_rate": 0.0006289562778496285, + "loss": 3.3841, + "step": 3005 + }, + { + "epoch": 0.87, + "learning_rate": 0.0006287296128941684, + "loss": 3.3493, + "step": 3006 + }, + { + "epoch": 0.87, + "learning_rate": 0.0006285029196021365, + "loss": 3.2789, + "step": 3007 + }, + { + "epoch": 0.87, + "learning_rate": 0.0006282761980234332, + "loss": 3.2232, + "step": 3008 + }, + { + "epoch": 0.87, + "learning_rate": 0.000628049448207966, + "loss": 3.2507, + "step": 3009 + }, + { + "epoch": 0.87, + "learning_rate": 0.0006278226702056476, + "loss": 3.3205, + "step": 3010 + }, + { + "epoch": 0.87, + "learning_rate": 0.0006275958640663976, + "loss": 3.3396, + "step": 3011 + }, + { + "epoch": 0.87, + "learning_rate": 0.000627369029840142, + "loss": 3.2751, + "step": 3012 + }, + { + "epoch": 0.87, + "learning_rate": 0.0006271421675768123, + "loss": 3.3025, + "step": 3013 + }, + { + "epoch": 0.87, + "learning_rate": 0.0006269152773263467, + "loss": 3.3544, + "step": 3014 + }, + { + "epoch": 0.87, + "learning_rate": 0.0006266883591386891, + "loss": 3.2833, + "step": 3015 + }, + { + "epoch": 0.87, + "learning_rate": 0.0006264614130637899, + "loss": 3.2594, + "step": 3016 + }, + { + "epoch": 0.87, + "learning_rate": 0.0006262344391516056, + "loss": 3.234, + "step": 3017 + }, + { + "epoch": 0.87, + "learning_rate": 0.0006260074374520988, + "loss": 3.189, + "step": 3018 + }, + { + "epoch": 0.87, + "learning_rate": 0.000625780408015238, + "loss": 3.3295, + "step": 3019 + }, + { + "epoch": 0.87, + "learning_rate": 0.0006255533508909982, + "loss": 3.2597, + "step": 3020 + }, + { + "epoch": 0.87, + "learning_rate": 0.0006253262661293602, + "loss": 3.3978, + "step": 3021 + }, + { + "epoch": 0.88, + "learning_rate": 0.0006250991537803111, + "loss": 3.2721, + "step": 3022 + }, + { + "epoch": 0.88, + "learning_rate": 0.0006248720138938438, + "loss": 3.2833, + "step": 3023 + }, + { + "epoch": 0.88, + "learning_rate": 0.0006246448465199576, + "loss": 3.2559, + "step": 3024 + }, + { + "epoch": 0.88, + "learning_rate": 0.0006244176517086572, + "loss": 3.2385, + "step": 3025 + }, + { + "epoch": 0.88, + "learning_rate": 0.0006241904295099545, + "loss": 3.3482, + "step": 3026 + }, + { + "epoch": 0.88, + "learning_rate": 0.0006239631799738658, + "loss": 3.1604, + "step": 3027 + }, + { + "epoch": 0.88, + "learning_rate": 0.0006237359031504153, + "loss": 3.1872, + "step": 3028 + }, + { + "epoch": 0.88, + "learning_rate": 0.0006235085990896317, + "loss": 3.22, + "step": 3029 + }, + { + "epoch": 0.88, + "learning_rate": 0.0006232812678415504, + "loss": 3.2486, + "step": 3030 + }, + { + "epoch": 0.88, + "learning_rate": 0.0006230539094562125, + "loss": 3.2382, + "step": 3031 + }, + { + "epoch": 0.88, + "learning_rate": 0.0006228265239836653, + "loss": 3.3039, + "step": 3032 + }, + { + "epoch": 0.88, + "learning_rate": 0.0006225991114739622, + "loss": 3.2392, + "step": 3033 + }, + { + "epoch": 0.88, + "learning_rate": 0.0006223716719771619, + "loss": 3.3457, + "step": 3034 + }, + { + "epoch": 0.88, + "learning_rate": 0.0006221442055433297, + "loss": 3.3264, + "step": 3035 + }, + { + "epoch": 0.88, + "learning_rate": 0.0006219167122225365, + "loss": 3.283, + "step": 3036 + }, + { + "epoch": 0.88, + "learning_rate": 0.0006216891920648593, + "loss": 3.2296, + "step": 3037 + }, + { + "epoch": 0.88, + "learning_rate": 0.000621461645120381, + "loss": 3.3089, + "step": 3038 + }, + { + "epoch": 0.88, + "learning_rate": 0.00062123407143919, + "loss": 3.1844, + "step": 3039 + }, + { + "epoch": 0.88, + "learning_rate": 0.0006210064710713811, + "loss": 3.174, + "step": 3040 + }, + { + "epoch": 0.88, + "learning_rate": 0.0006207788440670546, + "loss": 3.2376, + "step": 3041 + }, + { + "epoch": 0.88, + "learning_rate": 0.0006205511904763172, + "loss": 3.1327, + "step": 3042 + }, + { + "epoch": 0.88, + "learning_rate": 0.000620323510349281, + "loss": 3.2842, + "step": 3043 + }, + { + "epoch": 0.88, + "learning_rate": 0.0006200958037360636, + "loss": 3.3093, + "step": 3044 + }, + { + "epoch": 0.88, + "learning_rate": 0.0006198680706867892, + "loss": 3.2023, + "step": 3045 + }, + { + "epoch": 0.88, + "learning_rate": 0.0006196403112515874, + "loss": 3.351, + "step": 3046 + }, + { + "epoch": 0.88, + "learning_rate": 0.0006194125254805938, + "loss": 3.2677, + "step": 3047 + }, + { + "epoch": 0.88, + "learning_rate": 0.0006191847134239495, + "loss": 3.2655, + "step": 3048 + }, + { + "epoch": 0.88, + "learning_rate": 0.0006189568751318018, + "loss": 3.2504, + "step": 3049 + }, + { + "epoch": 0.88, + "learning_rate": 0.0006187290106543032, + "loss": 3.2788, + "step": 3050 + }, + { + "epoch": 0.88, + "learning_rate": 0.0006185011200416128, + "loss": 3.1929, + "step": 3051 + }, + { + "epoch": 0.88, + "learning_rate": 0.0006182732033438945, + "loss": 3.3505, + "step": 3052 + }, + { + "epoch": 0.88, + "learning_rate": 0.0006180452606113185, + "loss": 3.3154, + "step": 3053 + }, + { + "epoch": 0.88, + "learning_rate": 0.0006178172918940608, + "loss": 3.2751, + "step": 3054 + }, + { + "epoch": 0.88, + "learning_rate": 0.0006175892972423029, + "loss": 3.1969, + "step": 3055 + }, + { + "epoch": 0.89, + "learning_rate": 0.000617361276706232, + "loss": 3.4048, + "step": 3056 + }, + { + "epoch": 0.89, + "learning_rate": 0.0006171332303360411, + "loss": 3.2562, + "step": 3057 + }, + { + "epoch": 0.89, + "learning_rate": 0.0006169051581819288, + "loss": 3.3747, + "step": 3058 + }, + { + "epoch": 0.89, + "learning_rate": 0.0006166770602940994, + "loss": 3.1618, + "step": 3059 + }, + { + "epoch": 0.89, + "learning_rate": 0.0006164489367227633, + "loss": 3.2322, + "step": 3060 + }, + { + "epoch": 0.89, + "learning_rate": 0.0006162207875181353, + "loss": 3.3375, + "step": 3061 + }, + { + "epoch": 0.89, + "learning_rate": 0.0006159926127304373, + "loss": 3.2261, + "step": 3062 + }, + { + "epoch": 0.89, + "learning_rate": 0.0006157644124098963, + "loss": 3.2276, + "step": 3063 + }, + { + "epoch": 0.89, + "learning_rate": 0.0006155361866067445, + "loss": 3.1939, + "step": 3064 + }, + { + "epoch": 0.89, + "learning_rate": 0.0006153079353712201, + "loss": 3.2969, + "step": 3065 + }, + { + "epoch": 0.89, + "learning_rate": 0.0006150796587535669, + "loss": 3.2332, + "step": 3066 + }, + { + "epoch": 0.89, + "learning_rate": 0.0006148513568040344, + "loss": 3.3344, + "step": 3067 + }, + { + "epoch": 0.89, + "learning_rate": 0.0006146230295728771, + "loss": 3.2634, + "step": 3068 + }, + { + "epoch": 0.89, + "learning_rate": 0.000614394677110356, + "loss": 3.2296, + "step": 3069 + }, + { + "epoch": 0.89, + "learning_rate": 0.0006141662994667368, + "loss": 3.1233, + "step": 3070 + }, + { + "epoch": 0.89, + "learning_rate": 0.0006139378966922908, + "loss": 3.2563, + "step": 3071 + }, + { + "epoch": 0.89, + "learning_rate": 0.0006137094688372956, + "loss": 3.3209, + "step": 3072 + }, + { + "epoch": 0.89, + "learning_rate": 0.0006134810159520337, + "loss": 3.1583, + "step": 3073 + }, + { + "epoch": 0.89, + "learning_rate": 0.000613252538086793, + "loss": 3.2603, + "step": 3074 + }, + { + "epoch": 0.89, + "learning_rate": 0.0006130240352918674, + "loss": 3.2493, + "step": 3075 + }, + { + "epoch": 0.89, + "learning_rate": 0.0006127955076175559, + "loss": 3.1237, + "step": 3076 + }, + { + "epoch": 0.89, + "learning_rate": 0.000612566955114163, + "loss": 3.3243, + "step": 3077 + }, + { + "epoch": 0.89, + "learning_rate": 0.0006123383778319991, + "loss": 3.2638, + "step": 3078 + }, + { + "epoch": 0.89, + "learning_rate": 0.0006121097758213793, + "loss": 3.2308, + "step": 3079 + }, + { + "epoch": 0.89, + "learning_rate": 0.0006118811491326249, + "loss": 3.2996, + "step": 3080 + }, + { + "epoch": 0.89, + "learning_rate": 0.0006116524978160619, + "loss": 3.3955, + "step": 3081 + }, + { + "epoch": 0.89, + "learning_rate": 0.0006114238219220223, + "loss": 3.2582, + "step": 3082 + }, + { + "epoch": 0.89, + "learning_rate": 0.0006111951215008437, + "loss": 3.263, + "step": 3083 + }, + { + "epoch": 0.89, + "learning_rate": 0.0006109663966028681, + "loss": 3.1795, + "step": 3084 + }, + { + "epoch": 0.89, + "learning_rate": 0.0006107376472784437, + "loss": 3.2425, + "step": 3085 + }, + { + "epoch": 0.89, + "learning_rate": 0.000610508873577924, + "loss": 3.1689, + "step": 3086 + }, + { + "epoch": 0.89, + "learning_rate": 0.0006102800755516679, + "loss": 3.3359, + "step": 3087 + }, + { + "epoch": 0.89, + "learning_rate": 0.0006100512532500393, + "loss": 3.4719, + "step": 3088 + }, + { + "epoch": 0.89, + "learning_rate": 0.0006098224067234077, + "loss": 3.198, + "step": 3089 + }, + { + "epoch": 0.89, + "learning_rate": 0.0006095935360221476, + "loss": 3.2933, + "step": 3090 + }, + { + "epoch": 0.9, + "learning_rate": 0.0006093646411966396, + "loss": 3.2318, + "step": 3091 + }, + { + "epoch": 0.9, + "learning_rate": 0.0006091357222972687, + "loss": 3.217, + "step": 3092 + }, + { + "epoch": 0.9, + "learning_rate": 0.0006089067793744258, + "loss": 3.2589, + "step": 3093 + }, + { + "epoch": 0.9, + "learning_rate": 0.0006086778124785068, + "loss": 3.3251, + "step": 3094 + }, + { + "epoch": 0.9, + "learning_rate": 0.0006084488216599133, + "loss": 3.1981, + "step": 3095 + }, + { + "epoch": 0.9, + "learning_rate": 0.0006082198069690515, + "loss": 3.2685, + "step": 3096 + }, + { + "epoch": 0.9, + "learning_rate": 0.0006079907684563333, + "loss": 3.393, + "step": 3097 + }, + { + "epoch": 0.9, + "learning_rate": 0.0006077617061721756, + "loss": 3.2113, + "step": 3098 + }, + { + "epoch": 0.9, + "learning_rate": 0.000607532620167001, + "loss": 3.1016, + "step": 3099 + }, + { + "epoch": 0.9, + "learning_rate": 0.0006073035104912367, + "loss": 3.2169, + "step": 3100 + }, + { + "epoch": 0.9, + "learning_rate": 0.0006070743771953157, + "loss": 3.2454, + "step": 3101 + }, + { + "epoch": 0.9, + "learning_rate": 0.0006068452203296754, + "loss": 3.2152, + "step": 3102 + }, + { + "epoch": 0.9, + "learning_rate": 0.0006066160399447594, + "loss": 3.2339, + "step": 3103 + }, + { + "epoch": 0.9, + "learning_rate": 0.0006063868360910159, + "loss": 3.227, + "step": 3104 + }, + { + "epoch": 0.9, + "learning_rate": 0.000606157608818898, + "loss": 3.1516, + "step": 3105 + }, + { + "epoch": 0.9, + "learning_rate": 0.0006059283581788648, + "loss": 3.119, + "step": 3106 + }, + { + "epoch": 0.9, + "learning_rate": 0.0006056990842213796, + "loss": 3.2774, + "step": 3107 + }, + { + "epoch": 0.9, + "learning_rate": 0.0006054697869969114, + "loss": 3.3809, + "step": 3108 + }, + { + "epoch": 0.9, + "learning_rate": 0.0006052404665559341, + "loss": 3.297, + "step": 3109 + }, + { + "epoch": 0.9, + "learning_rate": 0.000605011122948927, + "loss": 3.2112, + "step": 3110 + }, + { + "epoch": 0.9, + "learning_rate": 0.0006047817562263744, + "loss": 3.2805, + "step": 3111 + }, + { + "epoch": 0.9, + "learning_rate": 0.0006045523664387652, + "loss": 3.3048, + "step": 3112 + }, + { + "epoch": 0.9, + "learning_rate": 0.0006043229536365941, + "loss": 3.3136, + "step": 3113 + }, + { + "epoch": 0.9, + "learning_rate": 0.0006040935178703604, + "loss": 3.1619, + "step": 3114 + }, + { + "epoch": 0.9, + "learning_rate": 0.0006038640591905685, + "loss": 3.2138, + "step": 3115 + }, + { + "epoch": 0.9, + "learning_rate": 0.0006036345776477281, + "loss": 3.0883, + "step": 3116 + }, + { + "epoch": 0.9, + "learning_rate": 0.0006034050732923537, + "loss": 3.3112, + "step": 3117 + }, + { + "epoch": 0.9, + "learning_rate": 0.0006031755461749648, + "loss": 3.2973, + "step": 3118 + }, + { + "epoch": 0.9, + "learning_rate": 0.0006029459963460861, + "loss": 3.2762, + "step": 3119 + }, + { + "epoch": 0.9, + "learning_rate": 0.0006027164238562472, + "loss": 3.2457, + "step": 3120 + }, + { + "epoch": 0.9, + "learning_rate": 0.0006024868287559826, + "loss": 3.36, + "step": 3121 + }, + { + "epoch": 0.9, + "learning_rate": 0.0006022572110958319, + "loss": 3.2776, + "step": 3122 + }, + { + "epoch": 0.9, + "learning_rate": 0.0006020275709263397, + "loss": 3.1317, + "step": 3123 + }, + { + "epoch": 0.9, + "learning_rate": 0.0006017979082980553, + "loss": 3.3039, + "step": 3124 + }, + { + "epoch": 0.91, + "learning_rate": 0.0006015682232615336, + "loss": 3.3179, + "step": 3125 + }, + { + "epoch": 0.91, + "learning_rate": 0.0006013385158673333, + "loss": 3.2328, + "step": 3126 + }, + { + "epoch": 0.91, + "learning_rate": 0.000601108786166019, + "loss": 3.2392, + "step": 3127 + }, + { + "epoch": 0.91, + "learning_rate": 0.00060087903420816, + "loss": 3.2182, + "step": 3128 + }, + { + "epoch": 0.91, + "learning_rate": 0.0006006492600443301, + "loss": 3.2096, + "step": 3129 + }, + { + "epoch": 0.91, + "learning_rate": 0.0006004194637251085, + "loss": 3.2318, + "step": 3130 + }, + { + "epoch": 0.91, + "learning_rate": 0.000600189645301079, + "loss": 3.3465, + "step": 3131 + }, + { + "epoch": 0.91, + "learning_rate": 0.0005999598048228302, + "loss": 3.3014, + "step": 3132 + }, + { + "epoch": 0.91, + "learning_rate": 0.0005997299423409559, + "loss": 3.0876, + "step": 3133 + }, + { + "epoch": 0.91, + "learning_rate": 0.0005995000579060545, + "loss": 3.2957, + "step": 3134 + }, + { + "epoch": 0.91, + "learning_rate": 0.0005992701515687291, + "loss": 3.2702, + "step": 3135 + }, + { + "epoch": 0.91, + "learning_rate": 0.0005990402233795877, + "loss": 3.3656, + "step": 3136 + }, + { + "epoch": 0.91, + "learning_rate": 0.0005988102733892434, + "loss": 3.1968, + "step": 3137 + }, + { + "epoch": 0.91, + "learning_rate": 0.0005985803016483138, + "loss": 3.2172, + "step": 3138 + }, + { + "epoch": 0.91, + "learning_rate": 0.0005983503082074213, + "loss": 3.3685, + "step": 3139 + }, + { + "epoch": 0.91, + "learning_rate": 0.0005981202931171933, + "loss": 3.2492, + "step": 3140 + }, + { + "epoch": 0.91, + "learning_rate": 0.0005978902564282616, + "loss": 3.3206, + "step": 3141 + }, + { + "epoch": 0.91, + "learning_rate": 0.0005976601981912632, + "loss": 3.2116, + "step": 3142 + }, + { + "epoch": 0.91, + "learning_rate": 0.0005974301184568395, + "loss": 3.2235, + "step": 3143 + }, + { + "epoch": 0.91, + "learning_rate": 0.0005972000172756366, + "loss": 3.2545, + "step": 3144 + }, + { + "epoch": 0.91, + "learning_rate": 0.0005969698946983055, + "loss": 3.2059, + "step": 3145 + }, + { + "epoch": 0.91, + "learning_rate": 0.0005967397507755022, + "loss": 3.3206, + "step": 3146 + }, + { + "epoch": 0.91, + "learning_rate": 0.0005965095855578869, + "loss": 3.1173, + "step": 3147 + }, + { + "epoch": 0.91, + "learning_rate": 0.0005962793990961243, + "loss": 3.2464, + "step": 3148 + }, + { + "epoch": 0.91, + "learning_rate": 0.0005960491914408845, + "loss": 3.1963, + "step": 3149 + }, + { + "epoch": 0.91, + "learning_rate": 0.0005958189626428419, + "loss": 3.2827, + "step": 3150 + }, + { + "epoch": 0.91, + "learning_rate": 0.0005955887127526754, + "loss": 3.2277, + "step": 3151 + }, + { + "epoch": 0.91, + "learning_rate": 0.0005953584418210689, + "loss": 3.2113, + "step": 3152 + }, + { + "epoch": 0.91, + "learning_rate": 0.0005951281498987105, + "loss": 3.2404, + "step": 3153 + }, + { + "epoch": 0.91, + "learning_rate": 0.0005948978370362933, + "loss": 3.1788, + "step": 3154 + }, + { + "epoch": 0.91, + "learning_rate": 0.0005946675032845148, + "loss": 3.2937, + "step": 3155 + }, + { + "epoch": 0.91, + "learning_rate": 0.0005944371486940772, + "loss": 3.2668, + "step": 3156 + }, + { + "epoch": 0.91, + "learning_rate": 0.0005942067733156871, + "loss": 3.2224, + "step": 3157 + }, + { + "epoch": 0.91, + "learning_rate": 0.0005939763772000559, + "loss": 3.2991, + "step": 3158 + }, + { + "epoch": 0.91, + "learning_rate": 0.0005937459603978997, + "loss": 3.3603, + "step": 3159 + }, + { + "epoch": 0.92, + "learning_rate": 0.0005935155229599386, + "loss": 3.2502, + "step": 3160 + }, + { + "epoch": 0.92, + "learning_rate": 0.0005932850649368978, + "loss": 3.2464, + "step": 3161 + }, + { + "epoch": 0.92, + "learning_rate": 0.0005930545863795067, + "loss": 3.188, + "step": 3162 + }, + { + "epoch": 0.92, + "learning_rate": 0.0005928240873384993, + "loss": 3.1663, + "step": 3163 + }, + { + "epoch": 0.92, + "learning_rate": 0.0005925935678646144, + "loss": 3.2398, + "step": 3164 + }, + { + "epoch": 0.92, + "learning_rate": 0.0005923630280085947, + "loss": 3.2748, + "step": 3165 + }, + { + "epoch": 0.92, + "learning_rate": 0.0005921324678211879, + "loss": 3.1523, + "step": 3166 + }, + { + "epoch": 0.92, + "learning_rate": 0.000591901887353146, + "loss": 3.2155, + "step": 3167 + }, + { + "epoch": 0.92, + "learning_rate": 0.0005916712866552254, + "loss": 3.3291, + "step": 3168 + }, + { + "epoch": 0.92, + "learning_rate": 0.0005914406657781871, + "loss": 3.2218, + "step": 3169 + }, + { + "epoch": 0.92, + "learning_rate": 0.0005912100247727964, + "loss": 3.2184, + "step": 3170 + }, + { + "epoch": 0.92, + "learning_rate": 0.0005909793636898231, + "loss": 3.3097, + "step": 3171 + }, + { + "epoch": 0.92, + "learning_rate": 0.0005907486825800414, + "loss": 3.267, + "step": 3172 + }, + { + "epoch": 0.92, + "learning_rate": 0.0005905179814942301, + "loss": 3.2012, + "step": 3173 + }, + { + "epoch": 0.92, + "learning_rate": 0.0005902872604831719, + "loss": 3.315, + "step": 3174 + }, + { + "epoch": 0.92, + "learning_rate": 0.0005900565195976543, + "loss": 3.2357, + "step": 3175 + }, + { + "epoch": 0.92, + "learning_rate": 0.0005898257588884691, + "loss": 3.3202, + "step": 3176 + }, + { + "epoch": 0.92, + "learning_rate": 0.0005895949784064127, + "loss": 3.3149, + "step": 3177 + }, + { + "epoch": 0.92, + "learning_rate": 0.0005893641782022852, + "loss": 3.2979, + "step": 3178 + }, + { + "epoch": 0.92, + "learning_rate": 0.0005891333583268916, + "loss": 3.3004, + "step": 3179 + }, + { + "epoch": 0.92, + "learning_rate": 0.000588902518831041, + "loss": 3.1125, + "step": 3180 + }, + { + "epoch": 0.92, + "learning_rate": 0.0005886716597655471, + "loss": 3.3378, + "step": 3181 + }, + { + "epoch": 0.92, + "learning_rate": 0.0005884407811812275, + "loss": 3.3147, + "step": 3182 + }, + { + "epoch": 0.92, + "learning_rate": 0.0005882098831289043, + "loss": 3.2988, + "step": 3183 + }, + { + "epoch": 0.92, + "learning_rate": 0.0005879789656594037, + "loss": 3.2934, + "step": 3184 + }, + { + "epoch": 0.92, + "learning_rate": 0.0005877480288235568, + "loss": 3.4488, + "step": 3185 + }, + { + "epoch": 0.92, + "learning_rate": 0.0005875170726721982, + "loss": 3.1786, + "step": 3186 + }, + { + "epoch": 0.92, + "learning_rate": 0.0005872860972561671, + "loss": 3.2047, + "step": 3187 + }, + { + "epoch": 0.92, + "learning_rate": 0.0005870551026263069, + "loss": 3.1891, + "step": 3188 + }, + { + "epoch": 0.92, + "learning_rate": 0.0005868240888334653, + "loss": 3.218, + "step": 3189 + }, + { + "epoch": 0.92, + "learning_rate": 0.000586593055928494, + "loss": 3.2997, + "step": 3190 + }, + { + "epoch": 0.92, + "learning_rate": 0.0005863620039622491, + "loss": 3.0456, + "step": 3191 + }, + { + "epoch": 0.92, + "learning_rate": 0.000586130932985591, + "loss": 3.2112, + "step": 3192 + }, + { + "epoch": 0.92, + "learning_rate": 0.0005858998430493841, + "loss": 3.242, + "step": 3193 + }, + { + "epoch": 0.93, + "learning_rate": 0.0005856687342044968, + "loss": 3.2181, + "step": 3194 + }, + { + "epoch": 0.93, + "learning_rate": 0.0005854376065018021, + "loss": 3.2327, + "step": 3195 + }, + { + "epoch": 0.93, + "learning_rate": 0.000585206459992177, + "loss": 3.2053, + "step": 3196 + }, + { + "epoch": 0.93, + "learning_rate": 0.0005849752947265023, + "loss": 3.3143, + "step": 3197 + }, + { + "epoch": 0.93, + "learning_rate": 0.0005847441107556632, + "loss": 3.2194, + "step": 3198 + }, + { + "epoch": 0.93, + "learning_rate": 0.0005845129081305492, + "loss": 3.1319, + "step": 3199 + }, + { + "epoch": 0.93, + "learning_rate": 0.0005842816869020538, + "loss": 3.2735, + "step": 3200 + }, + { + "epoch": 0.93, + "learning_rate": 0.0005840504471210741, + "loss": 3.2891, + "step": 3201 + }, + { + "epoch": 0.93, + "learning_rate": 0.0005838191888385121, + "loss": 3.2134, + "step": 3202 + }, + { + "epoch": 0.93, + "learning_rate": 0.0005835879121052731, + "loss": 3.2739, + "step": 3203 + }, + { + "epoch": 0.93, + "learning_rate": 0.0005833566169722674, + "loss": 3.1864, + "step": 3204 + }, + { + "epoch": 0.93, + "learning_rate": 0.0005831253034904082, + "loss": 3.303, + "step": 3205 + }, + { + "epoch": 0.93, + "learning_rate": 0.0005828939717106137, + "loss": 3.2396, + "step": 3206 + }, + { + "epoch": 0.93, + "learning_rate": 0.0005826626216838053, + "loss": 3.1644, + "step": 3207 + }, + { + "epoch": 0.93, + "learning_rate": 0.0005824312534609094, + "loss": 3.4487, + "step": 3208 + }, + { + "epoch": 0.93, + "learning_rate": 0.0005821998670928556, + "loss": 3.2508, + "step": 3209 + }, + { + "epoch": 0.93, + "learning_rate": 0.0005819684626305776, + "loss": 3.2202, + "step": 3210 + }, + { + "epoch": 0.93, + "learning_rate": 0.0005817370401250136, + "loss": 3.2755, + "step": 3211 + }, + { + "epoch": 0.93, + "learning_rate": 0.0005815055996271052, + "loss": 3.1585, + "step": 3212 + }, + { + "epoch": 0.93, + "learning_rate": 0.0005812741411877982, + "loss": 3.3328, + "step": 3213 + }, + { + "epoch": 0.93, + "learning_rate": 0.0005810426648580425, + "loss": 3.1315, + "step": 3214 + }, + { + "epoch": 0.93, + "learning_rate": 0.0005808111706887913, + "loss": 3.316, + "step": 3215 + }, + { + "epoch": 0.93, + "learning_rate": 0.0005805796587310028, + "loss": 3.144, + "step": 3216 + }, + { + "epoch": 0.93, + "learning_rate": 0.0005803481290356382, + "loss": 3.2485, + "step": 3217 + }, + { + "epoch": 0.93, + "learning_rate": 0.0005801165816536628, + "loss": 3.3051, + "step": 3218 + }, + { + "epoch": 0.93, + "learning_rate": 0.0005798850166360461, + "loss": 3.2119, + "step": 3219 + }, + { + "epoch": 0.93, + "learning_rate": 0.0005796534340337614, + "loss": 3.1534, + "step": 3220 + }, + { + "epoch": 0.93, + "learning_rate": 0.0005794218338977853, + "loss": 3.3262, + "step": 3221 + }, + { + "epoch": 0.93, + "learning_rate": 0.0005791902162790992, + "loss": 3.2959, + "step": 3222 + }, + { + "epoch": 0.93, + "learning_rate": 0.0005789585812286877, + "loss": 3.2719, + "step": 3223 + }, + { + "epoch": 0.93, + "learning_rate": 0.0005787269287975392, + "loss": 3.2357, + "step": 3224 + }, + { + "epoch": 0.93, + "learning_rate": 0.0005784952590366464, + "loss": 3.235, + "step": 3225 + }, + { + "epoch": 0.93, + "learning_rate": 0.0005782635719970055, + "loss": 3.1888, + "step": 3226 + }, + { + "epoch": 0.93, + "learning_rate": 0.0005780318677296166, + "loss": 3.2985, + "step": 3227 + }, + { + "epoch": 0.93, + "learning_rate": 0.0005778001462854831, + "loss": 3.3284, + "step": 3228 + }, + { + "epoch": 0.94, + "learning_rate": 0.0005775684077156132, + "loss": 3.1872, + "step": 3229 + }, + { + "epoch": 0.94, + "learning_rate": 0.0005773366520710181, + "loss": 3.2582, + "step": 3230 + }, + { + "epoch": 0.94, + "learning_rate": 0.0005771048794027128, + "loss": 3.2592, + "step": 3231 + }, + { + "epoch": 0.94, + "learning_rate": 0.0005768730897617164, + "loss": 3.2764, + "step": 3232 + }, + { + "epoch": 0.94, + "learning_rate": 0.0005766412831990513, + "loss": 3.218, + "step": 3233 + }, + { + "epoch": 0.94, + "learning_rate": 0.000576409459765744, + "loss": 3.3444, + "step": 3234 + }, + { + "epoch": 0.94, + "learning_rate": 0.0005761776195128248, + "loss": 3.2446, + "step": 3235 + }, + { + "epoch": 0.94, + "learning_rate": 0.0005759457624913269, + "loss": 3.1785, + "step": 3236 + }, + { + "epoch": 0.94, + "learning_rate": 0.0005757138887522884, + "loss": 3.2727, + "step": 3237 + }, + { + "epoch": 0.94, + "learning_rate": 0.00057548199834675, + "loss": 3.301, + "step": 3238 + }, + { + "epoch": 0.94, + "learning_rate": 0.0005752500913257568, + "loss": 3.2736, + "step": 3239 + }, + { + "epoch": 0.94, + "learning_rate": 0.0005750181677403569, + "loss": 3.2852, + "step": 3240 + }, + { + "epoch": 0.94, + "learning_rate": 0.0005747862276416028, + "loss": 3.1935, + "step": 3241 + }, + { + "epoch": 0.94, + "learning_rate": 0.0005745542710805502, + "loss": 3.2986, + "step": 3242 + }, + { + "epoch": 0.94, + "learning_rate": 0.0005743222981082582, + "loss": 3.228, + "step": 3243 + }, + { + "epoch": 0.94, + "learning_rate": 0.0005740903087757902, + "loss": 3.2497, + "step": 3244 + }, + { + "epoch": 0.94, + "learning_rate": 0.0005738583031342123, + "loss": 3.3076, + "step": 3245 + }, + { + "epoch": 0.94, + "learning_rate": 0.0005736262812345951, + "loss": 3.3299, + "step": 3246 + }, + { + "epoch": 0.94, + "learning_rate": 0.0005733942431280123, + "loss": 3.2272, + "step": 3247 + }, + { + "epoch": 0.94, + "learning_rate": 0.0005731621888655409, + "loss": 3.2522, + "step": 3248 + }, + { + "epoch": 0.94, + "learning_rate": 0.0005729301184982621, + "loss": 3.2882, + "step": 3249 + }, + { + "epoch": 0.94, + "learning_rate": 0.0005726980320772602, + "loss": 3.1968, + "step": 3250 + }, + { + "epoch": 0.94, + "learning_rate": 0.0005724659296536234, + "loss": 3.2209, + "step": 3251 + }, + { + "epoch": 0.94, + "learning_rate": 0.0005722338112784429, + "loss": 3.1716, + "step": 3252 + }, + { + "epoch": 0.94, + "learning_rate": 0.0005720016770028138, + "loss": 3.2655, + "step": 3253 + }, + { + "epoch": 0.94, + "learning_rate": 0.0005717695268778344, + "loss": 3.1404, + "step": 3254 + }, + { + "epoch": 0.94, + "learning_rate": 0.000571537360954607, + "loss": 3.1403, + "step": 3255 + }, + { + "epoch": 0.94, + "learning_rate": 0.0005713051792842371, + "loss": 3.2154, + "step": 3256 + }, + { + "epoch": 0.94, + "learning_rate": 0.0005710729819178333, + "loss": 3.318, + "step": 3257 + }, + { + "epoch": 0.94, + "learning_rate": 0.0005710729819178333, + "loss": 3.2924, + "step": 3258 + }, + { + "epoch": 0.94, + "learning_rate": 0.0005708407689065082, + "loss": 3.2685, + "step": 3259 + }, + { + "epoch": 0.94, + "learning_rate": 0.0005706085403013773, + "loss": 3.2382, + "step": 3260 + }, + { + "epoch": 0.94, + "learning_rate": 0.0005703762961535604, + "loss": 3.3208, + "step": 3261 + }, + { + "epoch": 0.94, + "learning_rate": 0.0005701440365141799, + "loss": 3.0944, + "step": 3262 + }, + { + "epoch": 0.94, + "learning_rate": 0.0005699117614343618, + "loss": 3.175, + "step": 3263 + }, + { + "epoch": 0.95, + "learning_rate": 0.0005696794709652358, + "loss": 3.1948, + "step": 3264 + }, + { + "epoch": 0.95, + "learning_rate": 0.0005694471651579346, + "loss": 3.1416, + "step": 3265 + }, + { + "epoch": 0.95, + "learning_rate": 0.0005692148440635946, + "loss": 3.1449, + "step": 3266 + }, + { + "epoch": 0.95, + "learning_rate": 0.0005689825077333552, + "loss": 3.3023, + "step": 3267 + }, + { + "epoch": 0.95, + "learning_rate": 0.0005687501562183596, + "loss": 3.358, + "step": 3268 + }, + { + "epoch": 0.95, + "learning_rate": 0.0005685177895697541, + "loss": 3.2512, + "step": 3269 + }, + { + "epoch": 0.95, + "learning_rate": 0.0005682854078386882, + "loss": 3.2648, + "step": 3270 + }, + { + "epoch": 0.95, + "learning_rate": 0.000568053011076315, + "loss": 3.0966, + "step": 3271 + }, + { + "epoch": 0.95, + "learning_rate": 0.0005678205993337907, + "loss": 3.2642, + "step": 3272 + }, + { + "epoch": 0.95, + "learning_rate": 0.0005675881726622749, + "loss": 3.2958, + "step": 3273 + }, + { + "epoch": 0.95, + "learning_rate": 0.0005673557311129306, + "loss": 3.2105, + "step": 3274 + }, + { + "epoch": 0.95, + "learning_rate": 0.0005671232747369236, + "loss": 3.299, + "step": 3275 + }, + { + "epoch": 0.95, + "learning_rate": 0.0005668908035854236, + "loss": 3.2727, + "step": 3276 + }, + { + "epoch": 0.95, + "learning_rate": 0.0005666583177096029, + "loss": 3.1718, + "step": 3277 + }, + { + "epoch": 0.95, + "learning_rate": 0.0005664258171606379, + "loss": 3.1595, + "step": 3278 + }, + { + "epoch": 0.95, + "learning_rate": 0.0005661933019897074, + "loss": 3.1777, + "step": 3279 + }, + { + "epoch": 0.95, + "learning_rate": 0.0005659607722479938, + "loss": 3.2947, + "step": 3280 + }, + { + "epoch": 0.95, + "learning_rate": 0.0005657282279866826, + "loss": 3.187, + "step": 3281 + }, + { + "epoch": 0.95, + "learning_rate": 0.0005654956692569627, + "loss": 3.1988, + "step": 3282 + }, + { + "epoch": 0.95, + "learning_rate": 0.000565263096110026, + "loss": 3.2905, + "step": 3283 + }, + { + "epoch": 0.95, + "learning_rate": 0.0005650305085970672, + "loss": 3.283, + "step": 3284 + }, + { + "epoch": 0.95, + "learning_rate": 0.0005647979067692851, + "loss": 3.2282, + "step": 3285 + }, + { + "epoch": 0.95, + "learning_rate": 0.0005645652906778808, + "loss": 3.232, + "step": 3286 + }, + { + "epoch": 0.95, + "learning_rate": 0.000564332660374059, + "loss": 3.3272, + "step": 3287 + }, + { + "epoch": 0.95, + "learning_rate": 0.0005641000159090272, + "loss": 3.3957, + "step": 3288 + }, + { + "epoch": 0.95, + "learning_rate": 0.0005638673573339966, + "loss": 3.2655, + "step": 3289 + }, + { + "epoch": 0.95, + "learning_rate": 0.0005636346847001806, + "loss": 3.2619, + "step": 3290 + }, + { + "epoch": 0.95, + "learning_rate": 0.0005634019980587968, + "loss": 3.249, + "step": 3291 + }, + { + "epoch": 0.95, + "learning_rate": 0.0005631692974610648, + "loss": 3.2037, + "step": 3292 + }, + { + "epoch": 0.95, + "learning_rate": 0.0005629365829582077, + "loss": 3.2209, + "step": 3293 + }, + { + "epoch": 0.95, + "learning_rate": 0.0005627038546014519, + "loss": 3.1988, + "step": 3294 + }, + { + "epoch": 0.95, + "learning_rate": 0.0005624711124420269, + "loss": 3.33, + "step": 3295 + }, + { + "epoch": 0.95, + "learning_rate": 0.0005622383565311646, + "loss": 3.2762, + "step": 3296 + }, + { + "epoch": 0.95, + "learning_rate": 0.0005620055869201007, + "loss": 3.1339, + "step": 3297 + }, + { + "epoch": 0.96, + "learning_rate": 0.0005617728036600734, + "loss": 3.1705, + "step": 3298 + }, + { + "epoch": 0.96, + "learning_rate": 0.0005615400068023238, + "loss": 3.3136, + "step": 3299 + }, + { + "epoch": 0.96, + "learning_rate": 0.0005613071963980966, + "loss": 3.1917, + "step": 3300 + }, + { + "epoch": 0.96, + "learning_rate": 0.000561074372498639, + "loss": 3.2731, + "step": 3301 + }, + { + "epoch": 0.96, + "learning_rate": 0.0005608415351552013, + "loss": 3.2817, + "step": 3302 + }, + { + "epoch": 0.96, + "learning_rate": 0.0005606086844190367, + "loss": 3.3092, + "step": 3303 + }, + { + "epoch": 0.96, + "learning_rate": 0.0005603758203414015, + "loss": 3.242, + "step": 3304 + }, + { + "epoch": 0.96, + "learning_rate": 0.0005601429429735547, + "loss": 3.2815, + "step": 3305 + }, + { + "epoch": 0.96, + "learning_rate": 0.0005599100523667587, + "loss": 3.2599, + "step": 3306 + }, + { + "epoch": 0.96, + "learning_rate": 0.0005596771485722781, + "loss": 3.1389, + "step": 3307 + }, + { + "epoch": 0.96, + "learning_rate": 0.000559444231641381, + "loss": 3.2742, + "step": 3308 + }, + { + "epoch": 0.96, + "learning_rate": 0.000559211301625338, + "loss": 3.2601, + "step": 3309 + }, + { + "epoch": 0.96, + "learning_rate": 0.0005589783585754232, + "loss": 3.3026, + "step": 3310 + }, + { + "epoch": 0.96, + "learning_rate": 0.0005587454025429126, + "loss": 3.2131, + "step": 3311 + }, + { + "epoch": 0.96, + "learning_rate": 0.0005585124335790857, + "loss": 3.1579, + "step": 3312 + }, + { + "epoch": 0.96, + "learning_rate": 0.0005582794517352249, + "loss": 3.2619, + "step": 3313 + }, + { + "epoch": 0.96, + "learning_rate": 0.0005580464570626152, + "loss": 3.3872, + "step": 3314 + }, + { + "epoch": 0.96, + "learning_rate": 0.0005578134496125445, + "loss": 3.2571, + "step": 3315 + }, + { + "epoch": 0.96, + "learning_rate": 0.0005575804294363034, + "loss": 3.344, + "step": 3316 + }, + { + "epoch": 0.96, + "learning_rate": 0.0005573473965851855, + "loss": 3.3545, + "step": 3317 + }, + { + "epoch": 0.96, + "learning_rate": 0.0005571143511104873, + "loss": 3.1947, + "step": 3318 + }, + { + "epoch": 0.96, + "learning_rate": 0.0005568812930635076, + "loss": 3.2134, + "step": 3319 + }, + { + "epoch": 0.96, + "learning_rate": 0.000556648222495548, + "loss": 3.2624, + "step": 3320 + }, + { + "epoch": 0.96, + "learning_rate": 0.0005564151394579136, + "loss": 3.2663, + "step": 3321 + }, + { + "epoch": 0.96, + "learning_rate": 0.0005561820440019116, + "loss": 3.147, + "step": 3322 + }, + { + "epoch": 0.96, + "learning_rate": 0.000555948936178852, + "loss": 3.2098, + "step": 3323 + }, + { + "epoch": 0.96, + "learning_rate": 0.0005557158160400476, + "loss": 3.2802, + "step": 3324 + }, + { + "epoch": 0.96, + "learning_rate": 0.0005554826836368139, + "loss": 3.2689, + "step": 3325 + }, + { + "epoch": 0.96, + "learning_rate": 0.0005552495390204691, + "loss": 3.0766, + "step": 3326 + }, + { + "epoch": 0.96, + "learning_rate": 0.0005550163822423341, + "loss": 3.1545, + "step": 3327 + }, + { + "epoch": 0.96, + "learning_rate": 0.0005547832133537327, + "loss": 3.347, + "step": 3328 + }, + { + "epoch": 0.96, + "learning_rate": 0.0005545500324059907, + "loss": 3.2382, + "step": 3329 + }, + { + "epoch": 0.96, + "learning_rate": 0.0005543168394504374, + "loss": 3.2939, + "step": 3330 + }, + { + "epoch": 0.96, + "learning_rate": 0.0005540836345384042, + "loss": 3.2832, + "step": 3331 + }, + { + "epoch": 0.96, + "learning_rate": 0.0005538504177212251, + "loss": 3.1392, + "step": 3332 + }, + { + "epoch": 0.97, + "learning_rate": 0.000553617189050237, + "loss": 3.1596, + "step": 3333 + }, + { + "epoch": 0.97, + "learning_rate": 0.0005533839485767795, + "loss": 3.2845, + "step": 3334 + }, + { + "epoch": 0.97, + "learning_rate": 0.0005531506963521943, + "loss": 3.3786, + "step": 3335 + }, + { + "epoch": 0.97, + "learning_rate": 0.0005529174324278261, + "loss": 3.2478, + "step": 3336 + }, + { + "epoch": 0.97, + "learning_rate": 0.0005526841568550223, + "loss": 3.0776, + "step": 3337 + }, + { + "epoch": 0.97, + "learning_rate": 0.0005524508696851322, + "loss": 3.2214, + "step": 3338 + }, + { + "epoch": 0.97, + "learning_rate": 0.0005522175709695084, + "loss": 3.2402, + "step": 3339 + }, + { + "epoch": 0.97, + "learning_rate": 0.0005519842607595055, + "loss": 3.2881, + "step": 3340 + }, + { + "epoch": 0.97, + "learning_rate": 0.0005517509391064809, + "loss": 3.2784, + "step": 3341 + }, + { + "epoch": 0.97, + "learning_rate": 0.0005515176060617945, + "loss": 3.2176, + "step": 3342 + }, + { + "epoch": 0.97, + "learning_rate": 0.0005512842616768087, + "loss": 3.1566, + "step": 3343 + }, + { + "epoch": 0.97, + "learning_rate": 0.0005510509060028885, + "loss": 3.2161, + "step": 3344 + }, + { + "epoch": 0.97, + "learning_rate": 0.000550817539091401, + "loss": 3.1768, + "step": 3345 + }, + { + "epoch": 0.97, + "learning_rate": 0.0005505841609937161, + "loss": 3.2538, + "step": 3346 + }, + { + "epoch": 0.97, + "learning_rate": 0.0005503507717612062, + "loss": 3.3809, + "step": 3347 + }, + { + "epoch": 0.97, + "learning_rate": 0.000550117371445246, + "loss": 3.3108, + "step": 3348 + }, + { + "epoch": 0.97, + "learning_rate": 0.0005498839600972126, + "loss": 3.1988, + "step": 3349 + }, + { + "epoch": 0.97, + "learning_rate": 0.0005496505377684857, + "loss": 3.2589, + "step": 3350 + }, + { + "epoch": 0.97, + "learning_rate": 0.0005494171045104472, + "loss": 3.1137, + "step": 3351 + }, + { + "epoch": 0.97, + "learning_rate": 0.0005491836603744817, + "loss": 3.3186, + "step": 3352 + }, + { + "epoch": 0.97, + "learning_rate": 0.000548950205411976, + "loss": 3.2261, + "step": 3353 + }, + { + "epoch": 0.97, + "learning_rate": 0.0005487167396743194, + "loss": 3.3226, + "step": 3354 + }, + { + "epoch": 0.97, + "learning_rate": 0.0005484832632129031, + "loss": 3.2397, + "step": 3355 + }, + { + "epoch": 0.97, + "learning_rate": 0.0005482497760791214, + "loss": 3.0879, + "step": 3356 + }, + { + "epoch": 0.97, + "learning_rate": 0.0005480162783243706, + "loss": 3.2543, + "step": 3357 + }, + { + "epoch": 0.97, + "learning_rate": 0.0005477827700000492, + "loss": 3.3057, + "step": 3358 + }, + { + "epoch": 0.97, + "learning_rate": 0.000547549251157558, + "loss": 3.2111, + "step": 3359 + }, + { + "epoch": 0.97, + "learning_rate": 0.0005473157218483006, + "loss": 3.3418, + "step": 3360 + }, + { + "epoch": 0.97, + "learning_rate": 0.0005470821821236823, + "loss": 3.205, + "step": 3361 + }, + { + "epoch": 0.97, + "learning_rate": 0.0005468486320351113, + "loss": 3.363, + "step": 3362 + }, + { + "epoch": 0.97, + "learning_rate": 0.0005466150716339975, + "loss": 3.2649, + "step": 3363 + }, + { + "epoch": 0.97, + "learning_rate": 0.0005463815009717533, + "loss": 3.1846, + "step": 3364 + }, + { + "epoch": 0.97, + "learning_rate": 0.0005461479200997935, + "loss": 3.2673, + "step": 3365 + }, + { + "epoch": 0.97, + "learning_rate": 0.0005459143290695351, + "loss": 3.2658, + "step": 3366 + }, + { + "epoch": 0.98, + "learning_rate": 0.0005456807279323971, + "loss": 3.2788, + "step": 3367 + }, + { + "epoch": 0.98, + "learning_rate": 0.0005454471167398008, + "loss": 3.2777, + "step": 3368 + }, + { + "epoch": 0.98, + "learning_rate": 0.00054521349554317, + "loss": 3.2331, + "step": 3369 + }, + { + "epoch": 0.98, + "learning_rate": 0.0005449798643939305, + "loss": 3.315, + "step": 3370 + }, + { + "epoch": 0.98, + "learning_rate": 0.0005447462233435102, + "loss": 3.2811, + "step": 3371 + }, + { + "epoch": 0.98, + "learning_rate": 0.0005445125724433393, + "loss": 3.2094, + "step": 3372 + }, + { + "epoch": 0.98, + "learning_rate": 0.0005442789117448501, + "loss": 3.236, + "step": 3373 + }, + { + "epoch": 0.98, + "learning_rate": 0.0005440452412994774, + "loss": 3.2111, + "step": 3374 + }, + { + "epoch": 0.98, + "learning_rate": 0.0005438115611586575, + "loss": 3.1314, + "step": 3375 + }, + { + "epoch": 0.98, + "learning_rate": 0.0005435778713738292, + "loss": 3.3152, + "step": 3376 + }, + { + "epoch": 0.98, + "learning_rate": 0.0005433441719964333, + "loss": 3.183, + "step": 3377 + }, + { + "epoch": 0.98, + "learning_rate": 0.0005431104630779133, + "loss": 3.2483, + "step": 3378 + }, + { + "epoch": 0.98, + "learning_rate": 0.0005428767446697139, + "loss": 3.2409, + "step": 3379 + }, + { + "epoch": 0.98, + "learning_rate": 0.0005426430168232826, + "loss": 3.1943, + "step": 3380 + }, + { + "epoch": 0.98, + "learning_rate": 0.0005424092795900682, + "loss": 3.1581, + "step": 3381 + }, + { + "epoch": 0.98, + "learning_rate": 0.0005421755330215223, + "loss": 3.1605, + "step": 3382 + }, + { + "epoch": 0.98, + "learning_rate": 0.0005419417771690987, + "loss": 3.3837, + "step": 3383 + }, + { + "epoch": 0.98, + "learning_rate": 0.0005417080120842523, + "loss": 3.1582, + "step": 3384 + }, + { + "epoch": 0.98, + "learning_rate": 0.0005414742378184408, + "loss": 3.2823, + "step": 3385 + }, + { + "epoch": 0.98, + "learning_rate": 0.0005412404544231235, + "loss": 3.2681, + "step": 3386 + }, + { + "epoch": 0.98, + "learning_rate": 0.0005410066619497623, + "loss": 3.1736, + "step": 3387 + }, + { + "epoch": 0.98, + "learning_rate": 0.0005407728604498204, + "loss": 3.238, + "step": 3388 + }, + { + "epoch": 0.98, + "learning_rate": 0.0005405390499747633, + "loss": 3.2183, + "step": 3389 + }, + { + "epoch": 0.98, + "learning_rate": 0.0005403052305760586, + "loss": 3.1151, + "step": 3390 + }, + { + "epoch": 0.98, + "learning_rate": 0.0005400714023051754, + "loss": 3.3011, + "step": 3391 + }, + { + "epoch": 0.98, + "learning_rate": 0.0005398375652135858, + "loss": 3.2663, + "step": 3392 + }, + { + "epoch": 0.98, + "learning_rate": 0.0005396037193527626, + "loss": 3.1158, + "step": 3393 + }, + { + "epoch": 0.98, + "learning_rate": 0.0005393698647741809, + "loss": 3.0783, + "step": 3394 + }, + { + "epoch": 0.98, + "learning_rate": 0.0005391360015293183, + "loss": 3.0968, + "step": 3395 + }, + { + "epoch": 0.98, + "learning_rate": 0.0005389021296696537, + "loss": 3.1548, + "step": 3396 + }, + { + "epoch": 0.98, + "learning_rate": 0.0005386682492466681, + "loss": 3.2438, + "step": 3397 + }, + { + "epoch": 0.98, + "learning_rate": 0.0005384343603118443, + "loss": 3.1949, + "step": 3398 + }, + { + "epoch": 0.98, + "learning_rate": 0.0005382004629166671, + "loss": 3.2351, + "step": 3399 + }, + { + "epoch": 0.98, + "learning_rate": 0.0005379665571126231, + "loss": 3.2981, + "step": 3400 + }, + { + "epoch": 0.98, + "learning_rate": 0.0005377326429512008, + "loss": 3.1211, + "step": 3401 + }, + { + "epoch": 0.99, + "learning_rate": 0.0005374987204838906, + "loss": 3.2632, + "step": 3402 + }, + { + "epoch": 0.99, + "learning_rate": 0.0005372647897621842, + "loss": 3.2347, + "step": 3403 + }, + { + "epoch": 0.99, + "learning_rate": 0.0005370308508375758, + "loss": 3.1656, + "step": 3404 + }, + { + "epoch": 0.99, + "learning_rate": 0.0005367969037615613, + "loss": 3.1788, + "step": 3405 + }, + { + "epoch": 0.99, + "learning_rate": 0.0005365629485856381, + "loss": 3.2396, + "step": 3406 + }, + { + "epoch": 0.99, + "learning_rate": 0.0005363289853613053, + "loss": 3.2726, + "step": 3407 + }, + { + "epoch": 0.99, + "learning_rate": 0.0005360950141400643, + "loss": 3.2178, + "step": 3408 + }, + { + "epoch": 0.99, + "learning_rate": 0.0005358610349734178, + "loss": 3.3275, + "step": 3409 + }, + { + "epoch": 0.99, + "learning_rate": 0.0005356270479128706, + "loss": 3.1435, + "step": 3410 + }, + { + "epoch": 0.99, + "learning_rate": 0.000535393053009929, + "loss": 3.2059, + "step": 3411 + }, + { + "epoch": 0.99, + "learning_rate": 0.0005351590503161007, + "loss": 3.2206, + "step": 3412 + }, + { + "epoch": 0.99, + "learning_rate": 0.0005349250398828958, + "loss": 3.2107, + "step": 3413 + }, + { + "epoch": 0.99, + "learning_rate": 0.0005346910217618259, + "loss": 3.2472, + "step": 3414 + }, + { + "epoch": 0.99, + "learning_rate": 0.000534456996004404, + "loss": 3.1822, + "step": 3415 + }, + { + "epoch": 0.99, + "learning_rate": 0.0005342229626621447, + "loss": 3.3078, + "step": 3416 + }, + { + "epoch": 0.99, + "learning_rate": 0.0005339889217865649, + "loss": 3.1884, + "step": 3417 + }, + { + "epoch": 0.99, + "learning_rate": 0.0005337548734291826, + "loss": 3.3543, + "step": 3418 + }, + { + "epoch": 0.99, + "learning_rate": 0.0005335208176415179, + "loss": 3.1568, + "step": 3419 + }, + { + "epoch": 0.99, + "learning_rate": 0.000533286754475092, + "loss": 3.2582, + "step": 3420 + }, + { + "epoch": 0.99, + "learning_rate": 0.000533052683981428, + "loss": 3.3259, + "step": 3421 + }, + { + "epoch": 0.99, + "learning_rate": 0.0005328186062120508, + "loss": 3.1561, + "step": 3422 + }, + { + "epoch": 0.99, + "learning_rate": 0.0005325845212184866, + "loss": 3.2334, + "step": 3423 + }, + { + "epoch": 0.99, + "learning_rate": 0.0005323504290522631, + "loss": 3.2394, + "step": 3424 + }, + { + "epoch": 0.99, + "learning_rate": 0.0005321163297649099, + "loss": 3.3106, + "step": 3425 + }, + { + "epoch": 0.99, + "learning_rate": 0.0005318822234079584, + "loss": 3.2746, + "step": 3426 + }, + { + "epoch": 0.99, + "learning_rate": 0.0005316481100329408, + "loss": 3.2677, + "step": 3427 + }, + { + "epoch": 0.99, + "learning_rate": 0.0005314139896913912, + "loss": 3.3079, + "step": 3428 + }, + { + "epoch": 0.99, + "learning_rate": 0.0005311798624348457, + "loss": 3.2361, + "step": 3429 + }, + { + "epoch": 0.99, + "learning_rate": 0.000530945728314841, + "loss": 3.3234, + "step": 3430 + }, + { + "epoch": 0.99, + "learning_rate": 0.0005307115873829161, + "loss": 3.1643, + "step": 3431 + }, + { + "epoch": 0.99, + "learning_rate": 0.0005304774396906111, + "loss": 3.2006, + "step": 3432 + }, + { + "epoch": 0.99, + "learning_rate": 0.0005302432852894677, + "loss": 3.3604, + "step": 3433 + }, + { + "epoch": 0.99, + "learning_rate": 0.0005300091242310292, + "loss": 3.1586, + "step": 3434 + }, + { + "epoch": 0.99, + "learning_rate": 0.0005297749565668402, + "loss": 3.1498, + "step": 3435 + }, + { + "epoch": 1.0, + "learning_rate": 0.0005295407823484467, + "loss": 3.2096, + "step": 3436 + }, + { + "epoch": 1.0, + "learning_rate": 0.0005293066016273963, + "loss": 3.2743, + "step": 3437 + }, + { + "epoch": 1.0, + "learning_rate": 0.0005290724144552379, + "loss": 3.1144, + "step": 3438 + }, + { + "epoch": 1.0, + "learning_rate": 0.0005288382208835221, + "loss": 3.2211, + "step": 3439 + }, + { + "epoch": 1.0, + "learning_rate": 0.0005286040209638005, + "loss": 3.2673, + "step": 3440 + }, + { + "epoch": 1.0, + "learning_rate": 0.0005283698147476263, + "loss": 3.2618, + "step": 3441 + }, + { + "epoch": 1.0, + "learning_rate": 0.0005281356022865542, + "loss": 3.2454, + "step": 3442 + }, + { + "epoch": 1.0, + "learning_rate": 0.0005279013836321399, + "loss": 3.2549, + "step": 3443 + }, + { + "epoch": 1.0, + "learning_rate": 0.000527667158835941, + "loss": 3.3002, + "step": 3444 + }, + { + "epoch": 1.0, + "learning_rate": 0.0005274329279495162, + "loss": 3.2493, + "step": 3445 + }, + { + "epoch": 1.0, + "learning_rate": 0.0005271986910244253, + "loss": 3.1588, + "step": 3446 + }, + { + "epoch": 1.0, + "learning_rate": 0.0005269644481122295, + "loss": 3.1964, + "step": 3447 + }, + { + "epoch": 1.0, + "learning_rate": 0.0005267301992644922, + "loss": 3.1834, + "step": 3448 + }, + { + "epoch": 1.0, + "learning_rate": 0.0005264959445327765, + "loss": 3.2016, + "step": 3449 + }, + { + "epoch": 1.0, + "learning_rate": 0.0005262616839686481, + "loss": 3.1399, + "step": 3450 + }, + { + "epoch": 1.0, + "learning_rate": 0.0005260274176236734, + "loss": 3.2123, + "step": 3451 + }, + { + "epoch": 1.0, + "learning_rate": 0.0005257931455494203, + "loss": 3.1565, + "step": 3452 + }, + { + "epoch": 1.0, + "learning_rate": 0.0005255588677974579, + "loss": 3.1769, + "step": 3453 + }, + { + "epoch": 1.0, + "learning_rate": 0.0005253245844193564, + "loss": 3.1369, + "step": 3454 + }, + { + "epoch": 1.0, + "learning_rate": 0.0005250902954666876, + "loss": 3.1813, + "step": 3455 + }, + { + "epoch": 1.0, + "learning_rate": 0.0005248560009910239, + "loss": 3.203, + "step": 3456 + }, + { + "epoch": 1.0, + "learning_rate": 0.0005246217010439398, + "loss": 3.1443, + "step": 3457 + }, + { + "epoch": 1.0, + "learning_rate": 0.0005243873956770101, + "loss": 3.1865, + "step": 3458 + }, + { + "epoch": 1.0, + "learning_rate": 0.0005241530849418115, + "loss": 3.0818, + "step": 3459 + }, + { + "epoch": 1.0, + "learning_rate": 0.0005239187688899213, + "loss": 3.1461, + "step": 3460 + }, + { + "epoch": 1.0, + "learning_rate": 0.0005236844475729185, + "loss": 3.2421, + "step": 3461 + }, + { + "epoch": 1.0, + "learning_rate": 0.000523450121042383, + "loss": 3.1737, + "step": 3462 + }, + { + "epoch": 1.0, + "learning_rate": 0.0005232157893498957, + "loss": 3.0103, + "step": 3463 + }, + { + "epoch": 1.0, + "learning_rate": 0.000522981452547039, + "loss": 3.0988, + "step": 3464 + }, + { + "epoch": 1.0, + "learning_rate": 0.000522747110685396, + "loss": 3.2677, + "step": 3465 + }, + { + "epoch": 1.0, + "learning_rate": 0.0005225127638165514, + "loss": 3.1246, + "step": 3466 + }, + { + "epoch": 1.0, + "learning_rate": 0.0005222784119920907, + "loss": 3.2292, + "step": 3467 + }, + { + "epoch": 1.0, + "learning_rate": 0.0005220440552636003, + "loss": 3.3352, + "step": 3468 + }, + { + "epoch": 1.0, + "learning_rate": 0.0005218096936826681, + "loss": 3.1104, + "step": 3469 + }, + { + "epoch": 1.0, + "learning_rate": 0.0005215753273008827, + "loss": 3.2108, + "step": 3470 + }, + { + "epoch": 1.01, + "learning_rate": 0.0005213409561698343, + "loss": 3.1207, + "step": 3471 + }, + { + "epoch": 1.01, + "learning_rate": 0.0005211065803411135, + "loss": 3.3316, + "step": 3472 + }, + { + "epoch": 1.01, + "learning_rate": 0.0005208721998663122, + "loss": 3.1799, + "step": 3473 + }, + { + "epoch": 1.01, + "learning_rate": 0.0005206378147970235, + "loss": 3.1977, + "step": 3474 + }, + { + "epoch": 1.01, + "learning_rate": 0.0005204034251848413, + "loss": 3.1366, + "step": 3475 + }, + { + "epoch": 1.01, + "learning_rate": 0.0005201690310813605, + "loss": 3.2945, + "step": 3476 + }, + { + "epoch": 1.01, + "learning_rate": 0.000519934632538177, + "loss": 3.1545, + "step": 3477 + }, + { + "epoch": 1.01, + "learning_rate": 0.0005197002296068878, + "loss": 3.3732, + "step": 3478 + }, + { + "epoch": 1.01, + "learning_rate": 0.0005194658223390909, + "loss": 3.1622, + "step": 3479 + }, + { + "epoch": 1.01, + "learning_rate": 0.0005192314107863848, + "loss": 3.1235, + "step": 3480 + }, + { + "epoch": 1.01, + "learning_rate": 0.0005189969950003697, + "loss": 3.2994, + "step": 3481 + }, + { + "epoch": 1.01, + "learning_rate": 0.0005187625750326458, + "loss": 3.2308, + "step": 3482 + }, + { + "epoch": 1.01, + "learning_rate": 0.0005185281509348153, + "loss": 3.2322, + "step": 3483 + }, + { + "epoch": 1.01, + "learning_rate": 0.0005182937227584805, + "loss": 3.113, + "step": 3484 + }, + { + "epoch": 1.01, + "learning_rate": 0.0005180592905552447, + "loss": 3.1934, + "step": 3485 + }, + { + "epoch": 1.01, + "learning_rate": 0.0005178248543767122, + "loss": 3.1936, + "step": 3486 + }, + { + "epoch": 1.01, + "learning_rate": 0.0005175904142744885, + "loss": 3.1288, + "step": 3487 + }, + { + "epoch": 1.01, + "learning_rate": 0.0005173559703001796, + "loss": 3.1299, + "step": 3488 + }, + { + "epoch": 1.01, + "learning_rate": 0.0005171215225053921, + "loss": 3.1182, + "step": 3489 + }, + { + "epoch": 1.01, + "learning_rate": 0.0005168870709417342, + "loss": 3.1636, + "step": 3490 + }, + { + "epoch": 1.01, + "learning_rate": 0.000516652615660814, + "loss": 3.2376, + "step": 3491 + }, + { + "epoch": 1.01, + "learning_rate": 0.0005164181567142415, + "loss": 3.1495, + "step": 3492 + }, + { + "epoch": 1.01, + "learning_rate": 0.0005161836941536263, + "loss": 3.232, + "step": 3493 + }, + { + "epoch": 1.01, + "learning_rate": 0.00051594922803058, + "loss": 3.1684, + "step": 3494 + }, + { + "epoch": 1.01, + "learning_rate": 0.0005157147583967139, + "loss": 3.2577, + "step": 3495 + }, + { + "epoch": 1.01, + "learning_rate": 0.0005154802853036408, + "loss": 3.1508, + "step": 3496 + }, + { + "epoch": 1.01, + "learning_rate": 0.000515245808802974, + "loss": 3.2368, + "step": 3497 + }, + { + "epoch": 1.01, + "learning_rate": 0.0005150113289463275, + "loss": 3.2307, + "step": 3498 + }, + { + "epoch": 1.01, + "learning_rate": 0.0005147768457853162, + "loss": 3.1892, + "step": 3499 + }, + { + "epoch": 1.01, + "learning_rate": 0.0005145423593715557, + "loss": 3.2918, + "step": 3500 + }, + { + "epoch": 1.01, + "learning_rate": 0.0005143078697566622, + "loss": 3.2671, + "step": 3501 + }, + { + "epoch": 1.01, + "learning_rate": 0.0005140733769922525, + "loss": 3.3621, + "step": 3502 + }, + { + "epoch": 1.01, + "learning_rate": 0.0005138388811299446, + "loss": 3.2079, + "step": 3503 + }, + { + "epoch": 1.01, + "learning_rate": 0.0005136043822213566, + "loss": 3.1294, + "step": 3504 + }, + { + "epoch": 1.02, + "learning_rate": 0.0005133698803181075, + "loss": 3.272, + "step": 3505 + }, + { + "epoch": 1.02, + "learning_rate": 0.0005131353754718171, + "loss": 3.0887, + "step": 3506 + }, + { + "epoch": 1.02, + "learning_rate": 0.0005129008677341057, + "loss": 3.1982, + "step": 3507 + }, + { + "epoch": 1.02, + "learning_rate": 0.000512666357156594, + "loss": 3.2666, + "step": 3508 + }, + { + "epoch": 1.02, + "learning_rate": 0.0005124318437909038, + "loss": 3.2653, + "step": 3509 + }, + { + "epoch": 1.02, + "learning_rate": 0.0005121973276886575, + "loss": 3.3024, + "step": 3510 + }, + { + "epoch": 1.02, + "learning_rate": 0.0005119628089014774, + "loss": 3.1919, + "step": 3511 + }, + { + "epoch": 1.02, + "learning_rate": 0.0005117282874809873, + "loss": 3.1976, + "step": 3512 + }, + { + "epoch": 1.02, + "learning_rate": 0.000511493763478811, + "loss": 3.2923, + "step": 3513 + }, + { + "epoch": 1.02, + "learning_rate": 0.0005112592369465731, + "loss": 3.1581, + "step": 3514 + }, + { + "epoch": 1.02, + "learning_rate": 0.0005110247079358986, + "loss": 3.2226, + "step": 3515 + }, + { + "epoch": 1.02, + "learning_rate": 0.0005107901764984132, + "loss": 3.2401, + "step": 3516 + }, + { + "epoch": 1.02, + "learning_rate": 0.0005105556426857429, + "loss": 3.1795, + "step": 3517 + }, + { + "epoch": 1.02, + "learning_rate": 0.0005103211065495146, + "loss": 3.1341, + "step": 3518 + }, + { + "epoch": 1.02, + "learning_rate": 0.0005100865681413556, + "loss": 3.1452, + "step": 3519 + }, + { + "epoch": 1.02, + "learning_rate": 0.0005098520275128934, + "loss": 3.2214, + "step": 3520 + }, + { + "epoch": 1.02, + "learning_rate": 0.0005096174847157562, + "loss": 3.2096, + "step": 3521 + }, + { + "epoch": 1.02, + "learning_rate": 0.0005093829398015728, + "loss": 3.1336, + "step": 3522 + }, + { + "epoch": 1.02, + "learning_rate": 0.0005091483928219723, + "loss": 3.1596, + "step": 3523 + }, + { + "epoch": 1.02, + "learning_rate": 0.0005089138438285841, + "loss": 3.1607, + "step": 3524 + }, + { + "epoch": 1.02, + "learning_rate": 0.0005086792928730383, + "loss": 3.1573, + "step": 3525 + }, + { + "epoch": 1.02, + "learning_rate": 0.0005084447400069655, + "loss": 3.2674, + "step": 3526 + }, + { + "epoch": 1.02, + "learning_rate": 0.0005082101852819967, + "loss": 3.1944, + "step": 3527 + }, + { + "epoch": 1.02, + "learning_rate": 0.0005079756287497629, + "loss": 3.073, + "step": 3528 + }, + { + "epoch": 1.02, + "learning_rate": 0.000507741070461896, + "loss": 3.2572, + "step": 3529 + }, + { + "epoch": 1.02, + "learning_rate": 0.000507506510470028, + "loss": 3.1718, + "step": 3530 + }, + { + "epoch": 1.02, + "learning_rate": 0.0005072719488257914, + "loss": 3.406, + "step": 3531 + }, + { + "epoch": 1.02, + "learning_rate": 0.0005070373855808189, + "loss": 3.2454, + "step": 3532 + }, + { + "epoch": 1.02, + "learning_rate": 0.0005068028207867439, + "loss": 3.1961, + "step": 3533 + }, + { + "epoch": 1.02, + "learning_rate": 0.0005065682544951995, + "loss": 3.0997, + "step": 3534 + }, + { + "epoch": 1.02, + "learning_rate": 0.0005063336867578201, + "loss": 3.1942, + "step": 3535 + }, + { + "epoch": 1.02, + "learning_rate": 0.0005060991176262393, + "loss": 3.1659, + "step": 3536 + }, + { + "epoch": 1.02, + "learning_rate": 0.0005058645471520922, + "loss": 3.2644, + "step": 3537 + }, + { + "epoch": 1.02, + "learning_rate": 0.000505629975387013, + "loss": 3.1847, + "step": 3538 + }, + { + "epoch": 1.02, + "learning_rate": 0.0005053954023826368, + "loss": 3.1335, + "step": 3539 + }, + { + "epoch": 1.03, + "learning_rate": 0.0005051608281905994, + "loss": 3.2163, + "step": 3540 + }, + { + "epoch": 1.03, + "learning_rate": 0.0005049262528625359, + "loss": 3.1759, + "step": 3541 + }, + { + "epoch": 1.03, + "learning_rate": 0.0005046916764500824, + "loss": 3.1575, + "step": 3542 + }, + { + "epoch": 1.03, + "learning_rate": 0.0005044570990048747, + "loss": 3.1792, + "step": 3543 + }, + { + "epoch": 1.03, + "learning_rate": 0.0005042225205785493, + "loss": 3.2795, + "step": 3544 + }, + { + "epoch": 1.03, + "learning_rate": 0.0005039879412227428, + "loss": 3.1508, + "step": 3545 + }, + { + "epoch": 1.03, + "learning_rate": 0.0005037533609890916, + "loss": 3.2232, + "step": 3546 + }, + { + "epoch": 1.03, + "learning_rate": 0.0005035187799292329, + "loss": 3.1986, + "step": 3547 + }, + { + "epoch": 1.03, + "learning_rate": 0.0005032841980948036, + "loss": 3.2635, + "step": 3548 + }, + { + "epoch": 1.03, + "learning_rate": 0.0005030496155374412, + "loss": 3.1836, + "step": 3549 + }, + { + "epoch": 1.03, + "learning_rate": 0.000502815032308783, + "loss": 3.1746, + "step": 3550 + }, + { + "epoch": 1.03, + "learning_rate": 0.0005025804484604664, + "loss": 3.2842, + "step": 3551 + }, + { + "epoch": 1.03, + "learning_rate": 0.0005023458640441294, + "loss": 3.29, + "step": 3552 + }, + { + "epoch": 1.03, + "learning_rate": 0.0005021112791114096, + "loss": 3.2743, + "step": 3553 + }, + { + "epoch": 1.03, + "learning_rate": 0.0005018766937139452, + "loss": 3.2754, + "step": 3554 + }, + { + "epoch": 1.03, + "learning_rate": 0.0005016421079033739, + "loss": 3.3299, + "step": 3555 + }, + { + "epoch": 1.03, + "learning_rate": 0.0005014075217313341, + "loss": 3.1686, + "step": 3556 + }, + { + "epoch": 1.03, + "learning_rate": 0.0005011729352494641, + "loss": 3.1705, + "step": 3557 + }, + { + "epoch": 1.03, + "learning_rate": 0.000500938348509402, + "loss": 3.1121, + "step": 3558 + }, + { + "epoch": 1.03, + "learning_rate": 0.000500703761562786, + "loss": 3.1733, + "step": 3559 + }, + { + "epoch": 1.03, + "learning_rate": 0.0005004691744612547, + "loss": 3.2115, + "step": 3560 + }, + { + "epoch": 1.03, + "learning_rate": 0.0005002345872564467, + "loss": 3.1998, + "step": 3561 + }, + { + "epoch": 1.03, + "learning_rate": 0.0005, + "loss": 3.0932, + "step": 3562 + }, + { + "epoch": 1.03, + "learning_rate": 0.0004997654127435536, + "loss": 3.2241, + "step": 3563 + }, + { + "epoch": 1.03, + "learning_rate": 0.0004995308255387453, + "loss": 3.2415, + "step": 3564 + }, + { + "epoch": 1.03, + "learning_rate": 0.0004992962384372141, + "loss": 3.2524, + "step": 3565 + }, + { + "epoch": 1.03, + "learning_rate": 0.0004990616514905981, + "loss": 3.2116, + "step": 3566 + }, + { + "epoch": 1.03, + "learning_rate": 0.0004988270647505361, + "loss": 3.096, + "step": 3567 + }, + { + "epoch": 1.03, + "learning_rate": 0.0004985924782686658, + "loss": 3.2048, + "step": 3568 + }, + { + "epoch": 1.03, + "learning_rate": 0.0004983578920966261, + "loss": 3.1418, + "step": 3569 + }, + { + "epoch": 1.03, + "learning_rate": 0.0004981233062860548, + "loss": 3.1074, + "step": 3570 + }, + { + "epoch": 1.03, + "learning_rate": 0.0004978887208885904, + "loss": 3.2035, + "step": 3571 + }, + { + "epoch": 1.03, + "learning_rate": 0.0004976541359558707, + "loss": 3.3873, + "step": 3572 + }, + { + "epoch": 1.03, + "learning_rate": 0.0004974195515395337, + "loss": 3.3064, + "step": 3573 + }, + { + "epoch": 1.04, + "learning_rate": 0.0004971849676912172, + "loss": 3.1625, + "step": 3574 + }, + { + "epoch": 1.04, + "learning_rate": 0.0004969503844625588, + "loss": 3.2012, + "step": 3575 + }, + { + "epoch": 1.04, + "learning_rate": 0.0004967158019051965, + "loss": 3.1308, + "step": 3576 + }, + { + "epoch": 1.04, + "learning_rate": 0.0004964812200707672, + "loss": 3.2408, + "step": 3577 + }, + { + "epoch": 1.04, + "learning_rate": 0.0004962466390109085, + "loss": 3.258, + "step": 3578 + }, + { + "epoch": 1.04, + "learning_rate": 0.0004960120587772573, + "loss": 3.2491, + "step": 3579 + }, + { + "epoch": 1.04, + "learning_rate": 0.0004957774794214508, + "loss": 3.137, + "step": 3580 + }, + { + "epoch": 1.04, + "learning_rate": 0.0004955429009951255, + "loss": 3.2677, + "step": 3581 + }, + { + "epoch": 1.04, + "learning_rate": 0.0004953083235499177, + "loss": 3.2497, + "step": 3582 + }, + { + "epoch": 1.04, + "learning_rate": 0.0004950737471374643, + "loss": 3.1953, + "step": 3583 + }, + { + "epoch": 1.04, + "learning_rate": 0.0004948391718094006, + "loss": 3.2716, + "step": 3584 + }, + { + "epoch": 1.04, + "learning_rate": 0.0004946045976173632, + "loss": 3.1777, + "step": 3585 + }, + { + "epoch": 1.04, + "learning_rate": 0.000494370024612987, + "loss": 3.144, + "step": 3586 + }, + { + "epoch": 1.04, + "learning_rate": 0.000494135452847908, + "loss": 3.2341, + "step": 3587 + }, + { + "epoch": 1.04, + "learning_rate": 0.0004939008823737606, + "loss": 3.2268, + "step": 3588 + }, + { + "epoch": 1.04, + "learning_rate": 0.00049366631324218, + "loss": 3.3093, + "step": 3589 + }, + { + "epoch": 1.04, + "learning_rate": 0.0004934317455048005, + "loss": 3.191, + "step": 3590 + }, + { + "epoch": 1.04, + "learning_rate": 0.0004931971792132562, + "loss": 3.2268, + "step": 3591 + }, + { + "epoch": 1.04, + "learning_rate": 0.0004929626144191812, + "loss": 3.2901, + "step": 3592 + }, + { + "epoch": 1.04, + "learning_rate": 0.0004927280511742086, + "loss": 3.3023, + "step": 3593 + }, + { + "epoch": 1.04, + "learning_rate": 0.0004924934895299721, + "loss": 3.3352, + "step": 3594 + }, + { + "epoch": 1.04, + "learning_rate": 0.000492258929538104, + "loss": 3.2554, + "step": 3595 + }, + { + "epoch": 1.04, + "learning_rate": 0.0004920243712502371, + "loss": 3.1866, + "step": 3596 + }, + { + "epoch": 1.04, + "learning_rate": 0.0004917898147180033, + "loss": 3.2505, + "step": 3597 + }, + { + "epoch": 1.04, + "learning_rate": 0.0004915552599930346, + "loss": 3.1956, + "step": 3598 + }, + { + "epoch": 1.04, + "learning_rate": 0.0004913207071269618, + "loss": 3.1773, + "step": 3599 + }, + { + "epoch": 1.04, + "learning_rate": 0.000491086156171416, + "loss": 3.1524, + "step": 3600 + }, + { + "epoch": 1.04, + "learning_rate": 0.0004908516071780281, + "loss": 3.1307, + "step": 3601 + }, + { + "epoch": 1.04, + "learning_rate": 0.0004906170601984272, + "loss": 3.1272, + "step": 3602 + }, + { + "epoch": 1.04, + "learning_rate": 0.0004903825152842438, + "loss": 3.2893, + "step": 3603 + }, + { + "epoch": 1.04, + "learning_rate": 0.0004901479724871066, + "loss": 3.2318, + "step": 3604 + }, + { + "epoch": 1.04, + "learning_rate": 0.0004899134318586445, + "loss": 3.2093, + "step": 3605 + }, + { + "epoch": 1.04, + "learning_rate": 0.0004896788934504853, + "loss": 3.0065, + "step": 3606 + }, + { + "epoch": 1.04, + "learning_rate": 0.0004894443573142572, + "loss": 3.2493, + "step": 3607 + }, + { + "epoch": 1.04, + "learning_rate": 0.000489209823501587, + "loss": 3.2217, + "step": 3608 + }, + { + "epoch": 1.05, + "learning_rate": 0.0004889752920641014, + "loss": 3.2456, + "step": 3609 + }, + { + "epoch": 1.05, + "learning_rate": 0.0004887407630534271, + "loss": 3.1262, + "step": 3610 + }, + { + "epoch": 1.05, + "learning_rate": 0.000488506236521189, + "loss": 3.2429, + "step": 3611 + }, + { + "epoch": 1.05, + "learning_rate": 0.00048827171251901273, + "loss": 3.1723, + "step": 3612 + }, + { + "epoch": 1.05, + "learning_rate": 0.0004880371910985225, + "loss": 3.2008, + "step": 3613 + }, + { + "epoch": 1.05, + "learning_rate": 0.0004878026723113427, + "loss": 3.1396, + "step": 3614 + }, + { + "epoch": 1.05, + "learning_rate": 0.00048756815620909613, + "loss": 3.2738, + "step": 3615 + }, + { + "epoch": 1.05, + "learning_rate": 0.00048733364284340617, + "loss": 3.116, + "step": 3616 + }, + { + "epoch": 1.05, + "learning_rate": 0.0004870991322658944, + "loss": 3.1744, + "step": 3617 + }, + { + "epoch": 1.05, + "learning_rate": 0.00048686462452818296, + "loss": 3.2628, + "step": 3618 + }, + { + "epoch": 1.05, + "learning_rate": 0.00048663011968189265, + "loss": 3.2704, + "step": 3619 + }, + { + "epoch": 1.05, + "learning_rate": 0.0004863956177786435, + "loss": 3.1901, + "step": 3620 + }, + { + "epoch": 1.05, + "learning_rate": 0.0004861611188700555, + "loss": 3.3276, + "step": 3621 + }, + { + "epoch": 1.05, + "learning_rate": 0.00048592662300774736, + "loss": 3.2386, + "step": 3622 + }, + { + "epoch": 1.05, + "learning_rate": 0.0004856921302433379, + "loss": 3.1664, + "step": 3623 + }, + { + "epoch": 1.05, + "learning_rate": 0.0004854576406284443, + "loss": 3.305, + "step": 3624 + }, + { + "epoch": 1.05, + "learning_rate": 0.0004852231542146839, + "loss": 3.2886, + "step": 3625 + }, + { + "epoch": 1.05, + "learning_rate": 0.0004849886710536725, + "loss": 3.0892, + "step": 3626 + }, + { + "epoch": 1.05, + "learning_rate": 0.0004847541911970261, + "loss": 3.2835, + "step": 3627 + }, + { + "epoch": 1.05, + "learning_rate": 0.0004845197146963594, + "loss": 3.1733, + "step": 3628 + }, + { + "epoch": 1.05, + "learning_rate": 0.00048428524160328625, + "loss": 3.3403, + "step": 3629 + }, + { + "epoch": 1.05, + "learning_rate": 0.00048405077196942017, + "loss": 3.2257, + "step": 3630 + }, + { + "epoch": 1.05, + "learning_rate": 0.0004838163058463736, + "loss": 3.1959, + "step": 3631 + }, + { + "epoch": 1.05, + "learning_rate": 0.00048358184328575873, + "loss": 3.1408, + "step": 3632 + }, + { + "epoch": 1.05, + "learning_rate": 0.000483347384339186, + "loss": 3.1444, + "step": 3633 + }, + { + "epoch": 1.05, + "learning_rate": 0.0004831129290582659, + "loss": 3.2625, + "step": 3634 + }, + { + "epoch": 1.05, + "learning_rate": 0.0004828784774946078, + "loss": 3.2941, + "step": 3635 + }, + { + "epoch": 1.05, + "learning_rate": 0.0004826440296998206, + "loss": 3.2095, + "step": 3636 + }, + { + "epoch": 1.05, + "learning_rate": 0.0004824095857255117, + "loss": 3.1675, + "step": 3637 + }, + { + "epoch": 1.05, + "learning_rate": 0.00048217514562328784, + "loss": 3.2552, + "step": 3638 + }, + { + "epoch": 1.05, + "learning_rate": 0.0004819407094447555, + "loss": 3.1501, + "step": 3639 + }, + { + "epoch": 1.05, + "learning_rate": 0.0004817062772415196, + "loss": 3.1004, + "step": 3640 + }, + { + "epoch": 1.05, + "learning_rate": 0.00048147184906518483, + "loss": 3.1336, + "step": 3641 + }, + { + "epoch": 1.05, + "learning_rate": 0.00048123742496735414, + "loss": 3.1509, + "step": 3642 + }, + { + "epoch": 1.06, + "learning_rate": 0.0004810030049996304, + "loss": 3.1597, + "step": 3643 + }, + { + "epoch": 1.06, + "learning_rate": 0.0004807685892136151, + "loss": 3.2088, + "step": 3644 + }, + { + "epoch": 1.06, + "learning_rate": 0.0004805341776609092, + "loss": 3.1144, + "step": 3645 + }, + { + "epoch": 1.06, + "learning_rate": 0.00048029977039311237, + "loss": 3.1333, + "step": 3646 + }, + { + "epoch": 1.06, + "learning_rate": 0.00048006536746182313, + "loss": 3.2032, + "step": 3647 + }, + { + "epoch": 1.06, + "learning_rate": 0.00047983096891863965, + "loss": 3.2688, + "step": 3648 + }, + { + "epoch": 1.06, + "learning_rate": 0.00047959657481515874, + "loss": 3.2184, + "step": 3649 + }, + { + "epoch": 1.06, + "learning_rate": 0.00047936218520297665, + "loss": 3.2813, + "step": 3650 + }, + { + "epoch": 1.06, + "learning_rate": 0.0004791278001336878, + "loss": 3.2475, + "step": 3651 + }, + { + "epoch": 1.06, + "learning_rate": 0.0004788934196588866, + "loss": 3.1451, + "step": 3652 + }, + { + "epoch": 1.06, + "learning_rate": 0.00047865904383016567, + "loss": 3.1412, + "step": 3653 + }, + { + "epoch": 1.06, + "learning_rate": 0.0004784246726991173, + "loss": 3.155, + "step": 3654 + }, + { + "epoch": 1.06, + "learning_rate": 0.0004781903063173321, + "loss": 3.257, + "step": 3655 + }, + { + "epoch": 1.06, + "learning_rate": 0.00047795594473639975, + "loss": 3.2712, + "step": 3656 + }, + { + "epoch": 1.06, + "learning_rate": 0.0004777215880079095, + "loss": 3.2999, + "step": 3657 + }, + { + "epoch": 1.06, + "learning_rate": 0.0004774872361834486, + "loss": 3.2229, + "step": 3658 + }, + { + "epoch": 1.06, + "learning_rate": 0.0004772528893146041, + "loss": 3.3174, + "step": 3659 + }, + { + "epoch": 1.06, + "learning_rate": 0.00047701854745296106, + "loss": 3.2488, + "step": 3660 + }, + { + "epoch": 1.06, + "learning_rate": 0.00047678421065010436, + "loss": 3.179, + "step": 3661 + }, + { + "epoch": 1.06, + "learning_rate": 0.000476549878957617, + "loss": 3.1016, + "step": 3662 + }, + { + "epoch": 1.06, + "learning_rate": 0.00047631555242708156, + "loss": 3.238, + "step": 3663 + }, + { + "epoch": 1.06, + "learning_rate": 0.0004760812311100788, + "loss": 3.0342, + "step": 3664 + }, + { + "epoch": 1.06, + "learning_rate": 0.0004758469150581886, + "loss": 3.0674, + "step": 3665 + }, + { + "epoch": 1.06, + "learning_rate": 0.0004756126043229901, + "loss": 3.1496, + "step": 3666 + }, + { + "epoch": 1.06, + "learning_rate": 0.0004753782989560603, + "loss": 3.1733, + "step": 3667 + }, + { + "epoch": 1.06, + "learning_rate": 0.00047514399900897627, + "loss": 3.3232, + "step": 3668 + }, + { + "epoch": 1.06, + "learning_rate": 0.0004749097045333126, + "loss": 3.3586, + "step": 3669 + }, + { + "epoch": 1.06, + "learning_rate": 0.0004746754155806437, + "loss": 3.2001, + "step": 3670 + }, + { + "epoch": 1.06, + "learning_rate": 0.00047444113220254213, + "loss": 3.1849, + "step": 3671 + }, + { + "epoch": 1.06, + "learning_rate": 0.0004742068544505798, + "loss": 3.2893, + "step": 3672 + }, + { + "epoch": 1.06, + "learning_rate": 0.0004739725823763267, + "loss": 3.0684, + "step": 3673 + }, + { + "epoch": 1.06, + "learning_rate": 0.000473738316031352, + "loss": 3.11, + "step": 3674 + }, + { + "epoch": 1.06, + "learning_rate": 0.00047350405546722367, + "loss": 3.2859, + "step": 3675 + }, + { + "epoch": 1.06, + "learning_rate": 0.00047326980073550797, + "loss": 3.223, + "step": 3676 + }, + { + "epoch": 1.06, + "learning_rate": 0.00047303555188777047, + "loss": 3.3546, + "step": 3677 + }, + { + "epoch": 1.07, + "learning_rate": 0.00047280130897557485, + "loss": 3.1524, + "step": 3678 + }, + { + "epoch": 1.07, + "learning_rate": 0.0004725670720504839, + "loss": 3.1957, + "step": 3679 + }, + { + "epoch": 1.07, + "learning_rate": 0.00047233284116405893, + "loss": 3.1913, + "step": 3680 + }, + { + "epoch": 1.07, + "learning_rate": 0.0004720986163678602, + "loss": 3.0721, + "step": 3681 + }, + { + "epoch": 1.07, + "learning_rate": 0.000471864397713446, + "loss": 3.1537, + "step": 3682 + }, + { + "epoch": 1.07, + "learning_rate": 0.0004716301852523738, + "loss": 3.2208, + "step": 3683 + }, + { + "epoch": 1.07, + "learning_rate": 0.00047139597903619976, + "loss": 3.2736, + "step": 3684 + }, + { + "epoch": 1.07, + "learning_rate": 0.000471161779116478, + "loss": 3.2384, + "step": 3685 + }, + { + "epoch": 1.07, + "learning_rate": 0.0004709275855447621, + "loss": 3.3967, + "step": 3686 + }, + { + "epoch": 1.07, + "learning_rate": 0.00047069339837260373, + "loss": 3.2931, + "step": 3687 + }, + { + "epoch": 1.07, + "learning_rate": 0.0004704592176515534, + "loss": 3.2945, + "step": 3688 + }, + { + "epoch": 1.07, + "learning_rate": 0.00047022504343315985, + "loss": 3.1712, + "step": 3689 + }, + { + "epoch": 1.07, + "learning_rate": 0.00046999087576897095, + "loss": 3.2604, + "step": 3690 + }, + { + "epoch": 1.07, + "learning_rate": 0.00046975671471053245, + "loss": 3.2514, + "step": 3691 + }, + { + "epoch": 1.07, + "learning_rate": 0.000469522560309389, + "loss": 3.1834, + "step": 3692 + }, + { + "epoch": 1.07, + "learning_rate": 0.00046928841261708416, + "loss": 3.1217, + "step": 3693 + }, + { + "epoch": 1.07, + "learning_rate": 0.0004690542716851591, + "loss": 3.1033, + "step": 3694 + }, + { + "epoch": 1.07, + "learning_rate": 0.0004688201375651545, + "loss": 3.0763, + "step": 3695 + }, + { + "epoch": 1.07, + "learning_rate": 0.0004685860103086087, + "loss": 3.0679, + "step": 3696 + }, + { + "epoch": 1.07, + "learning_rate": 0.00046835188996705935, + "loss": 3.2249, + "step": 3697 + }, + { + "epoch": 1.07, + "learning_rate": 0.0004681177765920416, + "loss": 3.0226, + "step": 3698 + }, + { + "epoch": 1.07, + "learning_rate": 0.0004678836702350901, + "loss": 3.2229, + "step": 3699 + }, + { + "epoch": 1.07, + "learning_rate": 0.000467649570947737, + "loss": 3.2604, + "step": 3700 + }, + { + "epoch": 1.07, + "learning_rate": 0.0004674154787815135, + "loss": 3.2413, + "step": 3701 + }, + { + "epoch": 1.07, + "learning_rate": 0.0004671813937879494, + "loss": 3.2905, + "step": 3702 + }, + { + "epoch": 1.07, + "learning_rate": 0.00046694731601857204, + "loss": 3.1761, + "step": 3703 + }, + { + "epoch": 1.07, + "learning_rate": 0.00046671324552490813, + "loss": 3.2753, + "step": 3704 + }, + { + "epoch": 1.07, + "learning_rate": 0.0004664791823584821, + "loss": 3.2358, + "step": 3705 + }, + { + "epoch": 1.07, + "learning_rate": 0.0004662451265708174, + "loss": 3.0456, + "step": 3706 + }, + { + "epoch": 1.07, + "learning_rate": 0.00046601107821343514, + "loss": 3.3786, + "step": 3707 + }, + { + "epoch": 1.07, + "learning_rate": 0.00046577703733785536, + "loss": 3.1959, + "step": 3708 + }, + { + "epoch": 1.07, + "learning_rate": 0.00046554300399559613, + "loss": 3.192, + "step": 3709 + }, + { + "epoch": 1.07, + "learning_rate": 0.0004653089782381742, + "loss": 3.324, + "step": 3710 + }, + { + "epoch": 1.07, + "learning_rate": 0.0004650749601171043, + "loss": 3.1865, + "step": 3711 + }, + { + "epoch": 1.08, + "learning_rate": 0.00046484094968389934, + "loss": 3.109, + "step": 3712 + }, + { + "epoch": 1.08, + "learning_rate": 0.00046460694699007116, + "loss": 3.2007, + "step": 3713 + }, + { + "epoch": 1.08, + "learning_rate": 0.0004643729520871293, + "loss": 3.2807, + "step": 3714 + }, + { + "epoch": 1.08, + "learning_rate": 0.00046413896502658214, + "loss": 3.0879, + "step": 3715 + }, + { + "epoch": 1.08, + "learning_rate": 0.0004639049858599357, + "loss": 3.2074, + "step": 3716 + }, + { + "epoch": 1.08, + "learning_rate": 0.0004636710146386947, + "loss": 3.278, + "step": 3717 + }, + { + "epoch": 1.08, + "learning_rate": 0.00046343705141436195, + "loss": 3.2555, + "step": 3718 + }, + { + "epoch": 1.08, + "learning_rate": 0.0004632030962384388, + "loss": 3.1672, + "step": 3719 + }, + { + "epoch": 1.08, + "learning_rate": 0.0004629691491624243, + "loss": 3.2779, + "step": 3720 + }, + { + "epoch": 1.08, + "learning_rate": 0.00046273521023781596, + "loss": 3.172, + "step": 3721 + }, + { + "epoch": 1.08, + "learning_rate": 0.0004625012795161097, + "loss": 3.1836, + "step": 3722 + }, + { + "epoch": 1.08, + "learning_rate": 0.0004622673570487992, + "loss": 3.3293, + "step": 3723 + }, + { + "epoch": 1.08, + "learning_rate": 0.000462033442887377, + "loss": 3.2018, + "step": 3724 + }, + { + "epoch": 1.08, + "learning_rate": 0.0004617995370833329, + "loss": 3.3641, + "step": 3725 + }, + { + "epoch": 1.08, + "learning_rate": 0.00046156563968815573, + "loss": 3.2679, + "step": 3726 + }, + { + "epoch": 1.08, + "learning_rate": 0.00046133175075333183, + "loss": 3.191, + "step": 3727 + }, + { + "epoch": 1.08, + "learning_rate": 0.00046109787033034636, + "loss": 3.2499, + "step": 3728 + }, + { + "epoch": 1.08, + "learning_rate": 0.00046086399847068184, + "loss": 3.2342, + "step": 3729 + }, + { + "epoch": 1.08, + "learning_rate": 0.00046063013522581916, + "loss": 3.1732, + "step": 3730 + }, + { + "epoch": 1.08, + "learning_rate": 0.0004603962806472376, + "loss": 3.2129, + "step": 3731 + }, + { + "epoch": 1.08, + "learning_rate": 0.00046016243478641425, + "loss": 3.1685, + "step": 3732 + }, + { + "epoch": 1.08, + "learning_rate": 0.00045992859769482453, + "loss": 3.0929, + "step": 3733 + }, + { + "epoch": 1.08, + "learning_rate": 0.0004596947694239415, + "loss": 3.1812, + "step": 3734 + }, + { + "epoch": 1.08, + "learning_rate": 0.0004594609500252368, + "loss": 3.1994, + "step": 3735 + }, + { + "epoch": 1.08, + "learning_rate": 0.00045922713955017967, + "loss": 3.087, + "step": 3736 + }, + { + "epoch": 1.08, + "learning_rate": 0.00045899333805023786, + "loss": 3.1594, + "step": 3737 + }, + { + "epoch": 1.08, + "learning_rate": 0.0004587595455768765, + "loss": 3.1735, + "step": 3738 + }, + { + "epoch": 1.08, + "learning_rate": 0.0004585257621815593, + "loss": 3.1604, + "step": 3739 + }, + { + "epoch": 1.08, + "learning_rate": 0.0004582919879157479, + "loss": 3.2697, + "step": 3740 + }, + { + "epoch": 1.08, + "learning_rate": 0.00045805822283090137, + "loss": 3.2114, + "step": 3741 + }, + { + "epoch": 1.08, + "learning_rate": 0.0004578244669784777, + "loss": 3.0601, + "step": 3742 + }, + { + "epoch": 1.08, + "learning_rate": 0.00045759072040993187, + "loss": 3.1277, + "step": 3743 + }, + { + "epoch": 1.08, + "learning_rate": 0.0004573569831767176, + "loss": 3.2905, + "step": 3744 + }, + { + "epoch": 1.08, + "learning_rate": 0.00045712325533028603, + "loss": 3.2718, + "step": 3745 + }, + { + "epoch": 1.08, + "learning_rate": 0.0004568895369220868, + "loss": 3.278, + "step": 3746 + }, + { + "epoch": 1.09, + "learning_rate": 0.0004566558280035667, + "loss": 3.1588, + "step": 3747 + }, + { + "epoch": 1.09, + "learning_rate": 0.00045642212862617086, + "loss": 3.1912, + "step": 3748 + }, + { + "epoch": 1.09, + "learning_rate": 0.00045618843884134277, + "loss": 3.1865, + "step": 3749 + }, + { + "epoch": 1.09, + "learning_rate": 0.0004559547587005227, + "loss": 3.1859, + "step": 3750 + }, + { + "epoch": 1.09, + "learning_rate": 0.00045572108825514996, + "loss": 3.1617, + "step": 3751 + }, + { + "epoch": 1.09, + "learning_rate": 0.00045548742755666073, + "loss": 3.2566, + "step": 3752 + }, + { + "epoch": 1.09, + "learning_rate": 0.00045525377665648985, + "loss": 3.2713, + "step": 3753 + }, + { + "epoch": 1.09, + "learning_rate": 0.0004550201356060695, + "loss": 3.1732, + "step": 3754 + }, + { + "epoch": 1.09, + "learning_rate": 0.00045478650445683014, + "loss": 3.1581, + "step": 3755 + }, + { + "epoch": 1.09, + "learning_rate": 0.00045455288326019935, + "loss": 3.1202, + "step": 3756 + }, + { + "epoch": 1.09, + "learning_rate": 0.00045431927206760305, + "loss": 3.1627, + "step": 3757 + }, + { + "epoch": 1.09, + "learning_rate": 0.0004540856709304651, + "loss": 3.1361, + "step": 3758 + }, + { + "epoch": 1.09, + "learning_rate": 0.0004538520799002065, + "loss": 3.0894, + "step": 3759 + }, + { + "epoch": 1.09, + "learning_rate": 0.00045361849902824676, + "loss": 3.2309, + "step": 3760 + }, + { + "epoch": 1.09, + "learning_rate": 0.0004533849283660025, + "loss": 3.0817, + "step": 3761 + }, + { + "epoch": 1.09, + "learning_rate": 0.00045315136796488876, + "loss": 3.0932, + "step": 3762 + }, + { + "epoch": 1.09, + "learning_rate": 0.00045291781787631756, + "loss": 3.2852, + "step": 3763 + }, + { + "epoch": 1.09, + "learning_rate": 0.00045268427815169955, + "loss": 3.2032, + "step": 3764 + }, + { + "epoch": 1.09, + "learning_rate": 0.0004524507488424421, + "loss": 3.1599, + "step": 3765 + }, + { + "epoch": 1.09, + "learning_rate": 0.000452217229999951, + "loss": 3.2816, + "step": 3766 + }, + { + "epoch": 1.09, + "learning_rate": 0.0004519837216756296, + "loss": 3.2345, + "step": 3767 + }, + { + "epoch": 1.09, + "learning_rate": 0.0004517502239208786, + "loss": 3.3493, + "step": 3768 + }, + { + "epoch": 1.09, + "learning_rate": 0.00045151673678709694, + "loss": 3.1288, + "step": 3769 + }, + { + "epoch": 1.09, + "learning_rate": 0.0004512832603256807, + "loss": 3.2525, + "step": 3770 + }, + { + "epoch": 1.09, + "learning_rate": 0.00045104979458802404, + "loss": 3.2619, + "step": 3771 + }, + { + "epoch": 1.09, + "learning_rate": 0.00045081633962551823, + "loss": 3.2968, + "step": 3772 + }, + { + "epoch": 1.09, + "learning_rate": 0.0004505828954895529, + "loss": 3.0708, + "step": 3773 + }, + { + "epoch": 1.09, + "learning_rate": 0.00045034946223151447, + "loss": 3.2054, + "step": 3774 + }, + { + "epoch": 1.09, + "learning_rate": 0.0004501160399027874, + "loss": 3.1687, + "step": 3775 + }, + { + "epoch": 1.09, + "learning_rate": 0.0004498826285547542, + "loss": 3.3402, + "step": 3776 + }, + { + "epoch": 1.09, + "learning_rate": 0.00044964922823879385, + "loss": 3.2077, + "step": 3777 + }, + { + "epoch": 1.09, + "learning_rate": 0.000449415839006284, + "loss": 3.2226, + "step": 3778 + }, + { + "epoch": 1.09, + "learning_rate": 0.00044918246090859906, + "loss": 3.1346, + "step": 3779 + }, + { + "epoch": 1.09, + "learning_rate": 0.00044894909399711166, + "loss": 3.1409, + "step": 3780 + }, + { + "epoch": 1.1, + "learning_rate": 0.00044871573832319124, + "loss": 3.2565, + "step": 3781 + }, + { + "epoch": 1.1, + "learning_rate": 0.0004484823939382056, + "loss": 3.1807, + "step": 3782 + }, + { + "epoch": 1.1, + "learning_rate": 0.0004482490608935193, + "loss": 3.209, + "step": 3783 + }, + { + "epoch": 1.1, + "learning_rate": 0.0004480157392404947, + "loss": 3.2073, + "step": 3784 + }, + { + "epoch": 1.1, + "learning_rate": 0.0004477824290304919, + "loss": 3.1481, + "step": 3785 + }, + { + "epoch": 1.1, + "learning_rate": 0.00044754913031486794, + "loss": 3.1632, + "step": 3786 + }, + { + "epoch": 1.1, + "learning_rate": 0.0004473158431449779, + "loss": 3.0496, + "step": 3787 + }, + { + "epoch": 1.1, + "learning_rate": 0.0004470825675721738, + "loss": 3.2307, + "step": 3788 + }, + { + "epoch": 1.1, + "learning_rate": 0.00044684930364780577, + "loss": 3.2482, + "step": 3789 + }, + { + "epoch": 1.1, + "learning_rate": 0.0004466160514232206, + "loss": 3.1386, + "step": 3790 + }, + { + "epoch": 1.1, + "learning_rate": 0.000446382810949763, + "loss": 3.3337, + "step": 3791 + }, + { + "epoch": 1.1, + "learning_rate": 0.0004461495822787751, + "loss": 3.2076, + "step": 3792 + }, + { + "epoch": 1.1, + "learning_rate": 0.00044591636546159597, + "loss": 3.0852, + "step": 3793 + }, + { + "epoch": 1.1, + "learning_rate": 0.00044568316054956284, + "loss": 3.0665, + "step": 3794 + }, + { + "epoch": 1.1, + "learning_rate": 0.00044544996759400936, + "loss": 3.2644, + "step": 3795 + }, + { + "epoch": 1.1, + "learning_rate": 0.00044521678664626745, + "loss": 3.284, + "step": 3796 + }, + { + "epoch": 1.1, + "learning_rate": 0.0004449836177576658, + "loss": 3.2861, + "step": 3797 + }, + { + "epoch": 1.1, + "learning_rate": 0.000444750460979531, + "loss": 3.2307, + "step": 3798 + }, + { + "epoch": 1.1, + "learning_rate": 0.00044451731636318615, + "loss": 3.2224, + "step": 3799 + }, + { + "epoch": 1.1, + "learning_rate": 0.00044428418395995245, + "loss": 3.1316, + "step": 3800 + }, + { + "epoch": 1.1, + "learning_rate": 0.00044405106382114796, + "loss": 3.2926, + "step": 3801 + }, + { + "epoch": 1.1, + "learning_rate": 0.0004438179559980884, + "loss": 3.2539, + "step": 3802 + }, + { + "epoch": 1.1, + "learning_rate": 0.0004435848605420865, + "loss": 3.1522, + "step": 3803 + }, + { + "epoch": 1.1, + "learning_rate": 0.0004433517775044521, + "loss": 3.2026, + "step": 3804 + }, + { + "epoch": 1.1, + "learning_rate": 0.00044311870693649263, + "loss": 3.2937, + "step": 3805 + }, + { + "epoch": 1.1, + "learning_rate": 0.00044288564888951277, + "loss": 3.2174, + "step": 3806 + }, + { + "epoch": 1.1, + "learning_rate": 0.00044265260341481456, + "loss": 3.2023, + "step": 3807 + }, + { + "epoch": 1.1, + "learning_rate": 0.00044241957056369665, + "loss": 3.0363, + "step": 3808 + }, + { + "epoch": 1.1, + "learning_rate": 0.00044218655038745557, + "loss": 3.2262, + "step": 3809 + }, + { + "epoch": 1.1, + "learning_rate": 0.0004419535429373848, + "loss": 3.1189, + "step": 3810 + }, + { + "epoch": 1.1, + "learning_rate": 0.00044172054826477526, + "loss": 3.1864, + "step": 3811 + }, + { + "epoch": 1.1, + "learning_rate": 0.00044148756642091447, + "loss": 3.304, + "step": 3812 + }, + { + "epoch": 1.1, + "learning_rate": 0.0004412545974570876, + "loss": 3.2396, + "step": 3813 + }, + { + "epoch": 1.1, + "learning_rate": 0.00044102164142457707, + "loss": 3.3437, + "step": 3814 + }, + { + "epoch": 1.1, + "learning_rate": 0.000440788698374662, + "loss": 3.2134, + "step": 3815 + }, + { + "epoch": 1.11, + "learning_rate": 0.0004405557683586192, + "loss": 3.2442, + "step": 3816 + }, + { + "epoch": 1.11, + "learning_rate": 0.000440322851427722, + "loss": 3.2183, + "step": 3817 + }, + { + "epoch": 1.11, + "learning_rate": 0.00044008994763324147, + "loss": 3.405, + "step": 3818 + }, + { + "epoch": 1.11, + "learning_rate": 0.00043985705702644524, + "loss": 3.1572, + "step": 3819 + }, + { + "epoch": 1.11, + "learning_rate": 0.00043962417965859866, + "loss": 3.142, + "step": 3820 + }, + { + "epoch": 1.11, + "learning_rate": 0.00043939131558096346, + "loss": 3.1039, + "step": 3821 + }, + { + "epoch": 1.11, + "learning_rate": 0.0004391584648447988, + "loss": 3.2459, + "step": 3822 + }, + { + "epoch": 1.11, + "learning_rate": 0.00043892562750136115, + "loss": 3.1952, + "step": 3823 + }, + { + "epoch": 1.11, + "learning_rate": 0.00043869280360190347, + "loss": 3.0562, + "step": 3824 + }, + { + "epoch": 1.11, + "learning_rate": 0.0004384599931976764, + "loss": 3.1578, + "step": 3825 + }, + { + "epoch": 1.11, + "learning_rate": 0.0004382271963399268, + "loss": 3.323, + "step": 3826 + }, + { + "epoch": 1.11, + "learning_rate": 0.0004379944130798994, + "loss": 3.174, + "step": 3827 + }, + { + "epoch": 1.11, + "learning_rate": 0.0004377616434688353, + "loss": 3.1091, + "step": 3828 + }, + { + "epoch": 1.11, + "learning_rate": 0.0004375288875579732, + "loss": 3.0464, + "step": 3829 + }, + { + "epoch": 1.11, + "learning_rate": 0.0004372961453985481, + "loss": 3.3275, + "step": 3830 + }, + { + "epoch": 1.11, + "learning_rate": 0.0004370634170417924, + "loss": 3.2356, + "step": 3831 + }, + { + "epoch": 1.11, + "learning_rate": 0.00043683070253893553, + "loss": 3.1687, + "step": 3832 + }, + { + "epoch": 1.11, + "learning_rate": 0.00043659800194120334, + "loss": 3.1444, + "step": 3833 + }, + { + "epoch": 1.11, + "learning_rate": 0.00043636531529981943, + "loss": 3.1932, + "step": 3834 + }, + { + "epoch": 1.11, + "learning_rate": 0.0004361326426660035, + "loss": 3.4213, + "step": 3835 + }, + { + "epoch": 1.11, + "learning_rate": 0.0004358999840909728, + "loss": 3.0843, + "step": 3836 + }, + { + "epoch": 1.11, + "learning_rate": 0.000435667339625941, + "loss": 3.2171, + "step": 3837 + }, + { + "epoch": 1.11, + "learning_rate": 0.0004354347093221194, + "loss": 3.2208, + "step": 3838 + }, + { + "epoch": 1.11, + "learning_rate": 0.00043520209323071507, + "loss": 3.1032, + "step": 3839 + }, + { + "epoch": 1.11, + "learning_rate": 0.00043496949140293283, + "loss": 3.249, + "step": 3840 + }, + { + "epoch": 1.11, + "learning_rate": 0.00043473690388997434, + "loss": 3.2737, + "step": 3841 + }, + { + "epoch": 1.11, + "learning_rate": 0.00043450433074303734, + "loss": 3.2754, + "step": 3842 + }, + { + "epoch": 1.11, + "learning_rate": 0.0004342717720133174, + "loss": 3.2744, + "step": 3843 + }, + { + "epoch": 1.11, + "learning_rate": 0.0004340392277520061, + "loss": 3.3137, + "step": 3844 + }, + { + "epoch": 1.11, + "learning_rate": 0.0004338066980102926, + "loss": 3.2754, + "step": 3845 + }, + { + "epoch": 1.11, + "learning_rate": 0.000433574182839362, + "loss": 3.2811, + "step": 3846 + }, + { + "epoch": 1.11, + "learning_rate": 0.00043334168229039704, + "loss": 3.2642, + "step": 3847 + }, + { + "epoch": 1.11, + "learning_rate": 0.0004331091964145766, + "loss": 3.0624, + "step": 3848 + }, + { + "epoch": 1.11, + "learning_rate": 0.00043287672526307644, + "loss": 3.2067, + "step": 3849 + }, + { + "epoch": 1.11, + "learning_rate": 0.00043264426888706966, + "loss": 3.3398, + "step": 3850 + }, + { + "epoch": 1.12, + "learning_rate": 0.0004324118273377251, + "loss": 3.236, + "step": 3851 + }, + { + "epoch": 1.12, + "learning_rate": 0.0004321794006662093, + "loss": 3.1633, + "step": 3852 + }, + { + "epoch": 1.12, + "learning_rate": 0.00043194698892368497, + "loss": 3.1391, + "step": 3853 + }, + { + "epoch": 1.12, + "learning_rate": 0.0004317145921613119, + "loss": 3.0568, + "step": 3854 + }, + { + "epoch": 1.12, + "learning_rate": 0.0004314822104302459, + "loss": 3.2879, + "step": 3855 + }, + { + "epoch": 1.12, + "learning_rate": 0.00043124984378164046, + "loss": 3.1896, + "step": 3856 + }, + { + "epoch": 1.12, + "learning_rate": 0.0004310174922666449, + "loss": 3.2834, + "step": 3857 + }, + { + "epoch": 1.12, + "learning_rate": 0.0004307851559364055, + "loss": 3.2652, + "step": 3858 + }, + { + "epoch": 1.12, + "learning_rate": 0.0004305528348420656, + "loss": 3.1892, + "step": 3859 + }, + { + "epoch": 1.12, + "learning_rate": 0.00043032052903476433, + "loss": 3.229, + "step": 3860 + }, + { + "epoch": 1.12, + "learning_rate": 0.0004300882385656383, + "loss": 3.2305, + "step": 3861 + }, + { + "epoch": 1.12, + "learning_rate": 0.00042985596348582014, + "loss": 3.203, + "step": 3862 + }, + { + "epoch": 1.12, + "learning_rate": 0.0004296237038464397, + "loss": 3.2652, + "step": 3863 + }, + { + "epoch": 1.12, + "learning_rate": 0.00042939145969862264, + "loss": 3.1965, + "step": 3864 + }, + { + "epoch": 1.12, + "learning_rate": 0.000429159231093492, + "loss": 3.163, + "step": 3865 + }, + { + "epoch": 1.12, + "learning_rate": 0.00042892701808216696, + "loss": 3.1824, + "step": 3866 + }, + { + "epoch": 1.12, + "learning_rate": 0.00042869482071576306, + "loss": 3.0142, + "step": 3867 + }, + { + "epoch": 1.12, + "learning_rate": 0.00042846263904539305, + "loss": 3.2287, + "step": 3868 + }, + { + "epoch": 1.12, + "learning_rate": 0.0004282304731221656, + "loss": 3.2401, + "step": 3869 + }, + { + "epoch": 1.12, + "learning_rate": 0.0004279983229971863, + "loss": 3.1882, + "step": 3870 + }, + { + "epoch": 1.12, + "learning_rate": 0.00042776618872155715, + "loss": 3.3685, + "step": 3871 + }, + { + "epoch": 1.12, + "learning_rate": 0.0004275340703463767, + "loss": 3.2122, + "step": 3872 + }, + { + "epoch": 1.12, + "learning_rate": 0.0004273019679227397, + "loss": 3.232, + "step": 3873 + }, + { + "epoch": 1.12, + "learning_rate": 0.00042706988150173784, + "loss": 3.1538, + "step": 3874 + }, + { + "epoch": 1.12, + "learning_rate": 0.00042683781113445925, + "loss": 3.0751, + "step": 3875 + }, + { + "epoch": 1.12, + "learning_rate": 0.00042660575687198787, + "loss": 3.2469, + "step": 3876 + }, + { + "epoch": 1.12, + "learning_rate": 0.00042637371876540507, + "loss": 3.2119, + "step": 3877 + }, + { + "epoch": 1.12, + "learning_rate": 0.00042614169686578773, + "loss": 3.2187, + "step": 3878 + }, + { + "epoch": 1.12, + "learning_rate": 0.00042590969122420995, + "loss": 3.1348, + "step": 3879 + }, + { + "epoch": 1.12, + "learning_rate": 0.0004256777018917418, + "loss": 3.1702, + "step": 3880 + }, + { + "epoch": 1.12, + "learning_rate": 0.00042544572891945, + "loss": 3.0849, + "step": 3881 + }, + { + "epoch": 1.12, + "learning_rate": 0.0004252137723583972, + "loss": 3.167, + "step": 3882 + }, + { + "epoch": 1.12, + "learning_rate": 0.0004249818322596431, + "loss": 3.1909, + "step": 3883 + }, + { + "epoch": 1.12, + "learning_rate": 0.00042474990867424347, + "loss": 3.3824, + "step": 3884 + }, + { + "epoch": 1.13, + "learning_rate": 0.00042451800165325005, + "loss": 3.1535, + "step": 3885 + }, + { + "epoch": 1.13, + "learning_rate": 0.00042428611124771184, + "loss": 3.2029, + "step": 3886 + }, + { + "epoch": 1.13, + "learning_rate": 0.0004240542375086731, + "loss": 3.207, + "step": 3887 + }, + { + "epoch": 1.13, + "learning_rate": 0.0004238223804871754, + "loss": 3.1492, + "step": 3888 + }, + { + "epoch": 1.13, + "learning_rate": 0.00042359054023425593, + "loss": 3.1949, + "step": 3889 + }, + { + "epoch": 1.13, + "learning_rate": 0.0004233587168009488, + "loss": 3.0165, + "step": 3890 + }, + { + "epoch": 1.13, + "learning_rate": 0.0004231269102382837, + "loss": 3.0848, + "step": 3891 + }, + { + "epoch": 1.13, + "learning_rate": 0.00042289512059728727, + "loss": 3.1501, + "step": 3892 + }, + { + "epoch": 1.13, + "learning_rate": 0.0004226633479289819, + "loss": 3.0703, + "step": 3893 + }, + { + "epoch": 1.13, + "learning_rate": 0.00042243159228438687, + "loss": 3.2244, + "step": 3894 + }, + { + "epoch": 1.13, + "learning_rate": 0.0004221998537145169, + "loss": 3.0597, + "step": 3895 + }, + { + "epoch": 1.13, + "learning_rate": 0.00042196813227038355, + "loss": 3.1418, + "step": 3896 + }, + { + "epoch": 1.13, + "learning_rate": 0.0004217364280029946, + "loss": 3.0592, + "step": 3897 + }, + { + "epoch": 1.13, + "learning_rate": 0.00042150474096335356, + "loss": 3.3565, + "step": 3898 + }, + { + "epoch": 1.13, + "learning_rate": 0.0004212730712024609, + "loss": 3.2946, + "step": 3899 + }, + { + "epoch": 1.13, + "learning_rate": 0.0004210414187713124, + "loss": 3.2278, + "step": 3900 + }, + { + "epoch": 1.13, + "learning_rate": 0.00042080978372090086, + "loss": 3.3059, + "step": 3901 + }, + { + "epoch": 1.13, + "learning_rate": 0.0004205781661022146, + "loss": 3.2639, + "step": 3902 + }, + { + "epoch": 1.13, + "learning_rate": 0.00042034656596623876, + "loss": 3.2645, + "step": 3903 + }, + { + "epoch": 1.13, + "learning_rate": 0.00042011498336395393, + "loss": 3.2686, + "step": 3904 + }, + { + "epoch": 1.13, + "learning_rate": 0.00041988341834633714, + "loss": 3.2458, + "step": 3905 + }, + { + "epoch": 1.13, + "learning_rate": 0.000419651870964362, + "loss": 3.1302, + "step": 3906 + }, + { + "epoch": 1.13, + "learning_rate": 0.0004194203412689972, + "loss": 3.1789, + "step": 3907 + }, + { + "epoch": 1.13, + "learning_rate": 0.0004191888293112087, + "loss": 3.2982, + "step": 3908 + }, + { + "epoch": 1.13, + "learning_rate": 0.00041895733514195766, + "loss": 3.268, + "step": 3909 + }, + { + "epoch": 1.13, + "learning_rate": 0.00041872585881220186, + "loss": 3.1106, + "step": 3910 + }, + { + "epoch": 1.13, + "learning_rate": 0.00041849440037289477, + "loss": 3.1786, + "step": 3911 + }, + { + "epoch": 1.13, + "learning_rate": 0.00041826295987498654, + "loss": 3.2415, + "step": 3912 + }, + { + "epoch": 1.13, + "learning_rate": 0.00041803153736942253, + "loss": 3.2482, + "step": 3913 + }, + { + "epoch": 1.13, + "learning_rate": 0.0004178001329071445, + "loss": 3.1382, + "step": 3914 + }, + { + "epoch": 1.13, + "learning_rate": 0.0004175687465390908, + "loss": 3.2586, + "step": 3915 + }, + { + "epoch": 1.13, + "learning_rate": 0.00041733737831619466, + "loss": 3.0783, + "step": 3916 + }, + { + "epoch": 1.13, + "learning_rate": 0.0004171060282893865, + "loss": 3.0757, + "step": 3917 + }, + { + "epoch": 1.13, + "learning_rate": 0.0004168746965095918, + "loss": 3.1406, + "step": 3918 + }, + { + "epoch": 1.13, + "learning_rate": 0.00041664338302773276, + "loss": 3.2691, + "step": 3919 + }, + { + "epoch": 1.14, + "learning_rate": 0.0004164120878947267, + "loss": 3.1951, + "step": 3920 + }, + { + "epoch": 1.14, + "learning_rate": 0.000416180811161488, + "loss": 3.2827, + "step": 3921 + }, + { + "epoch": 1.14, + "learning_rate": 0.000415949552878926, + "loss": 3.2308, + "step": 3922 + }, + { + "epoch": 1.14, + "learning_rate": 0.0004157183130979463, + "loss": 3.3029, + "step": 3923 + }, + { + "epoch": 1.14, + "learning_rate": 0.0004154870918694509, + "loss": 3.2606, + "step": 3924 + }, + { + "epoch": 1.14, + "learning_rate": 0.0004152558892443368, + "loss": 3.2146, + "step": 3925 + }, + { + "epoch": 1.14, + "learning_rate": 0.00041502470527349786, + "loss": 3.2293, + "step": 3926 + }, + { + "epoch": 1.14, + "learning_rate": 0.00041479354000782307, + "loss": 3.161, + "step": 3927 + }, + { + "epoch": 1.14, + "learning_rate": 0.0004145623934981979, + "loss": 3.2265, + "step": 3928 + }, + { + "epoch": 1.14, + "learning_rate": 0.00041433126579550316, + "loss": 3.2566, + "step": 3929 + }, + { + "epoch": 1.14, + "learning_rate": 0.0004141001569506161, + "loss": 3.2367, + "step": 3930 + }, + { + "epoch": 1.14, + "learning_rate": 0.00041386906701440914, + "loss": 3.1939, + "step": 3931 + }, + { + "epoch": 1.14, + "learning_rate": 0.00041363799603775093, + "loss": 3.2132, + "step": 3932 + }, + { + "epoch": 1.14, + "learning_rate": 0.00041340694407150625, + "loss": 3.0105, + "step": 3933 + }, + { + "epoch": 1.14, + "learning_rate": 0.00041317591116653486, + "loss": 3.1917, + "step": 3934 + }, + { + "epoch": 1.14, + "learning_rate": 0.00041294489737369325, + "loss": 3.179, + "step": 3935 + }, + { + "epoch": 1.14, + "learning_rate": 0.0004127139027438329, + "loss": 3.1875, + "step": 3936 + }, + { + "epoch": 1.14, + "learning_rate": 0.0004124829273278019, + "loss": 3.2307, + "step": 3937 + }, + { + "epoch": 1.14, + "learning_rate": 0.0004122519711764431, + "loss": 3.1339, + "step": 3938 + }, + { + "epoch": 1.14, + "learning_rate": 0.0004120210343405963, + "loss": 3.131, + "step": 3939 + }, + { + "epoch": 1.14, + "learning_rate": 0.0004117901168710959, + "loss": 3.1833, + "step": 3940 + }, + { + "epoch": 1.14, + "learning_rate": 0.00041155921881877254, + "loss": 3.298, + "step": 3941 + }, + { + "epoch": 1.14, + "learning_rate": 0.00041132834023445305, + "loss": 3.2729, + "step": 3942 + }, + { + "epoch": 1.14, + "learning_rate": 0.000411097481168959, + "loss": 3.1716, + "step": 3943 + }, + { + "epoch": 1.14, + "learning_rate": 0.0004108666416731085, + "loss": 3.1331, + "step": 3944 + }, + { + "epoch": 1.14, + "learning_rate": 0.0004106358217977148, + "loss": 3.2906, + "step": 3945 + }, + { + "epoch": 1.14, + "learning_rate": 0.00041040502159358746, + "loss": 3.1609, + "step": 3946 + }, + { + "epoch": 1.14, + "learning_rate": 0.00041017424111153077, + "loss": 3.2043, + "step": 3947 + }, + { + "epoch": 1.14, + "learning_rate": 0.00040994348040234574, + "loss": 3.0853, + "step": 3948 + }, + { + "epoch": 1.14, + "learning_rate": 0.0004097127395168283, + "loss": 3.3268, + "step": 3949 + }, + { + "epoch": 1.14, + "learning_rate": 0.00040948201850577007, + "loss": 3.097, + "step": 3950 + }, + { + "epoch": 1.14, + "learning_rate": 0.0004092513174199587, + "loss": 3.21, + "step": 3951 + }, + { + "epoch": 1.14, + "learning_rate": 0.0004090206363101769, + "loss": 3.1494, + "step": 3952 + }, + { + "epoch": 1.14, + "learning_rate": 0.0004087899752272037, + "loss": 3.1524, + "step": 3953 + }, + { + "epoch": 1.15, + "learning_rate": 0.0004085593342218129, + "loss": 3.1724, + "step": 3954 + }, + { + "epoch": 1.15, + "learning_rate": 0.00040832871334477465, + "loss": 3.0849, + "step": 3955 + }, + { + "epoch": 1.15, + "learning_rate": 0.00040809811264685405, + "loss": 3.1859, + "step": 3956 + }, + { + "epoch": 1.15, + "learning_rate": 0.0004078675321788121, + "loss": 3.3641, + "step": 3957 + }, + { + "epoch": 1.15, + "learning_rate": 0.0004076369719914055, + "loss": 3.2084, + "step": 3958 + }, + { + "epoch": 1.15, + "learning_rate": 0.0004074064321353857, + "loss": 3.2282, + "step": 3959 + }, + { + "epoch": 1.15, + "learning_rate": 0.00040717591266150086, + "loss": 3.3031, + "step": 3960 + }, + { + "epoch": 1.15, + "learning_rate": 0.0004069454136204934, + "loss": 3.2404, + "step": 3961 + }, + { + "epoch": 1.15, + "learning_rate": 0.00040671493506310233, + "loss": 3.1711, + "step": 3962 + }, + { + "epoch": 1.15, + "learning_rate": 0.0004064844770400614, + "loss": 3.2755, + "step": 3963 + }, + { + "epoch": 1.15, + "learning_rate": 0.0004062540396021005, + "loss": 3.1002, + "step": 3964 + }, + { + "epoch": 1.15, + "learning_rate": 0.0004060236227999441, + "loss": 3.2025, + "step": 3965 + }, + { + "epoch": 1.15, + "learning_rate": 0.00040579322668431297, + "loss": 3.1782, + "step": 3966 + }, + { + "epoch": 1.15, + "learning_rate": 0.00040556285130592304, + "loss": 3.3421, + "step": 3967 + }, + { + "epoch": 1.15, + "learning_rate": 0.00040533249671548526, + "loss": 3.1477, + "step": 3968 + }, + { + "epoch": 1.15, + "learning_rate": 0.0004051021629637068, + "loss": 3.1892, + "step": 3969 + }, + { + "epoch": 1.15, + "learning_rate": 0.00040487185010128945, + "loss": 3.3576, + "step": 3970 + }, + { + "epoch": 1.15, + "learning_rate": 0.0004046415581789312, + "loss": 3.1145, + "step": 3971 + }, + { + "epoch": 1.15, + "learning_rate": 0.0004044112872473246, + "loss": 3.2326, + "step": 3972 + }, + { + "epoch": 1.15, + "learning_rate": 0.0004041810373571582, + "loss": 3.2882, + "step": 3973 + }, + { + "epoch": 1.15, + "learning_rate": 0.00040395080855911553, + "loss": 3.1865, + "step": 3974 + }, + { + "epoch": 1.15, + "learning_rate": 0.0004037206009038758, + "loss": 3.2908, + "step": 3975 + }, + { + "epoch": 1.15, + "learning_rate": 0.00040349041444211344, + "loss": 3.1152, + "step": 3976 + }, + { + "epoch": 1.15, + "learning_rate": 0.0004032602492244979, + "loss": 3.1498, + "step": 3977 + }, + { + "epoch": 1.15, + "learning_rate": 0.00040303010530169447, + "loss": 3.4095, + "step": 3978 + }, + { + "epoch": 1.15, + "learning_rate": 0.0004027999827243635, + "loss": 3.0859, + "step": 3979 + }, + { + "epoch": 1.15, + "learning_rate": 0.0004025698815431607, + "loss": 3.0825, + "step": 3980 + }, + { + "epoch": 1.15, + "learning_rate": 0.00040233980180873685, + "loss": 3.2347, + "step": 3981 + }, + { + "epoch": 1.15, + "learning_rate": 0.0004021097435717385, + "loss": 3.2511, + "step": 3982 + }, + { + "epoch": 1.15, + "learning_rate": 0.0004018797068828068, + "loss": 3.2395, + "step": 3983 + }, + { + "epoch": 1.15, + "learning_rate": 0.0004016496917925787, + "loss": 3.2503, + "step": 3984 + }, + { + "epoch": 1.15, + "learning_rate": 0.0004014196983516862, + "loss": 3.2179, + "step": 3985 + }, + { + "epoch": 1.15, + "learning_rate": 0.0004011897266107567, + "loss": 3.1806, + "step": 3986 + }, + { + "epoch": 1.15, + "learning_rate": 0.0004009597766204124, + "loss": 3.1287, + "step": 3987 + }, + { + "epoch": 1.15, + "learning_rate": 0.000400729848431271, + "loss": 3.284, + "step": 3988 + }, + { + "epoch": 1.16, + "learning_rate": 0.0004004999420939456, + "loss": 3.312, + "step": 3989 + }, + { + "epoch": 1.16, + "learning_rate": 0.0004002700576590441, + "loss": 3.3213, + "step": 3990 + }, + { + "epoch": 1.16, + "learning_rate": 0.0004000401951771699, + "loss": 3.3023, + "step": 3991 + }, + { + "epoch": 1.16, + "learning_rate": 0.00039981035469892107, + "loss": 3.2983, + "step": 3992 + }, + { + "epoch": 1.16, + "learning_rate": 0.0003995805362748916, + "loss": 3.236, + "step": 3993 + }, + { + "epoch": 1.16, + "learning_rate": 0.0003993507399556699, + "loss": 3.3492, + "step": 3994 + }, + { + "epoch": 1.16, + "learning_rate": 0.0003991209657918402, + "loss": 3.223, + "step": 3995 + }, + { + "epoch": 1.16, + "learning_rate": 0.00039889121383398113, + "loss": 3.1532, + "step": 3996 + }, + { + "epoch": 1.16, + "learning_rate": 0.00039866148413266676, + "loss": 3.1948, + "step": 3997 + }, + { + "epoch": 1.16, + "learning_rate": 0.00039843177673846666, + "loss": 3.2222, + "step": 3998 + }, + { + "epoch": 1.16, + "learning_rate": 0.0003982020917019446, + "loss": 3.2088, + "step": 3999 + }, + { + "epoch": 1.16, + "learning_rate": 0.00039797242907366025, + "loss": 3.1355, + "step": 4000 + }, + { + "epoch": 1.16, + "learning_rate": 0.000397742788904168, + "loss": 3.0896, + "step": 4001 + }, + { + "epoch": 1.16, + "learning_rate": 0.00039751317124401745, + "loss": 3.2837, + "step": 4002 + }, + { + "epoch": 1.16, + "learning_rate": 0.0003972835761437528, + "loss": 3.2005, + "step": 4003 + }, + { + "epoch": 1.16, + "learning_rate": 0.00039705400365391396, + "loss": 3.1852, + "step": 4004 + }, + { + "epoch": 1.16, + "learning_rate": 0.0003968244538250353, + "loss": 3.1617, + "step": 4005 + }, + { + "epoch": 1.16, + "learning_rate": 0.0003965949267076464, + "loss": 3.2276, + "step": 4006 + }, + { + "epoch": 1.16, + "learning_rate": 0.00039636542235227206, + "loss": 3.2348, + "step": 4007 + }, + { + "epoch": 1.16, + "learning_rate": 0.00039613594080943154, + "loss": 3.2677, + "step": 4008 + }, + { + "epoch": 1.16, + "learning_rate": 0.0003959064821296398, + "loss": 3.2916, + "step": 4009 + }, + { + "epoch": 1.16, + "learning_rate": 0.0003956770463634059, + "loss": 3.1391, + "step": 4010 + }, + { + "epoch": 1.16, + "learning_rate": 0.0003954476335612349, + "loss": 3.1435, + "step": 4011 + }, + { + "epoch": 1.16, + "learning_rate": 0.00039521824377362566, + "loss": 3.1791, + "step": 4012 + }, + { + "epoch": 1.16, + "learning_rate": 0.0003949888770510731, + "loss": 3.2232, + "step": 4013 + }, + { + "epoch": 1.16, + "learning_rate": 0.000394759533444066, + "loss": 3.1358, + "step": 4014 + }, + { + "epoch": 1.16, + "learning_rate": 0.0003945302130030888, + "loss": 3.1231, + "step": 4015 + }, + { + "epoch": 1.16, + "learning_rate": 0.00039430091577862073, + "loss": 3.2749, + "step": 4016 + }, + { + "epoch": 1.16, + "learning_rate": 0.0003940716418211354, + "loss": 3.2106, + "step": 4017 + }, + { + "epoch": 1.16, + "learning_rate": 0.00039384239118110204, + "loss": 3.2774, + "step": 4018 + }, + { + "epoch": 1.16, + "learning_rate": 0.00039361316390898416, + "loss": 3.2353, + "step": 4019 + }, + { + "epoch": 1.16, + "learning_rate": 0.0003933839600552407, + "loss": 3.2145, + "step": 4020 + }, + { + "epoch": 1.16, + "learning_rate": 0.00039315477967032456, + "loss": 3.2081, + "step": 4021 + }, + { + "epoch": 1.16, + "learning_rate": 0.0003929256228046845, + "loss": 3.2771, + "step": 4022 + }, + { + "epoch": 1.17, + "learning_rate": 0.00039269648950876343, + "loss": 3.1912, + "step": 4023 + }, + { + "epoch": 1.17, + "learning_rate": 0.00039246737983299906, + "loss": 3.2578, + "step": 4024 + }, + { + "epoch": 1.17, + "learning_rate": 0.0003922382938278245, + "loss": 3.3033, + "step": 4025 + }, + { + "epoch": 1.17, + "learning_rate": 0.00039200923154366686, + "loss": 3.1755, + "step": 4026 + }, + { + "epoch": 1.17, + "learning_rate": 0.0003917801930309486, + "loss": 3.1325, + "step": 4027 + }, + { + "epoch": 1.17, + "learning_rate": 0.0003915511783400867, + "loss": 3.1023, + "step": 4028 + }, + { + "epoch": 1.17, + "learning_rate": 0.00039132218752149317, + "loss": 3.2588, + "step": 4029 + }, + { + "epoch": 1.17, + "learning_rate": 0.00039109322062557425, + "loss": 3.1979, + "step": 4030 + }, + { + "epoch": 1.17, + "learning_rate": 0.00039086427770273135, + "loss": 3.1973, + "step": 4031 + }, + { + "epoch": 1.17, + "learning_rate": 0.00039063535880336063, + "loss": 3.298, + "step": 4032 + }, + { + "epoch": 1.17, + "learning_rate": 0.00039040646397785244, + "loss": 3.1923, + "step": 4033 + }, + { + "epoch": 1.17, + "learning_rate": 0.00039017759327659264, + "loss": 3.178, + "step": 4034 + }, + { + "epoch": 1.17, + "learning_rate": 0.00038994874674996085, + "loss": 3.3328, + "step": 4035 + }, + { + "epoch": 1.17, + "learning_rate": 0.00038971992444833213, + "loss": 3.0656, + "step": 4036 + }, + { + "epoch": 1.17, + "learning_rate": 0.0003894911264220758, + "loss": 3.1726, + "step": 4037 + }, + { + "epoch": 1.17, + "learning_rate": 0.00038926235272155636, + "loss": 3.1708, + "step": 4038 + }, + { + "epoch": 1.17, + "learning_rate": 0.000389033603397132, + "loss": 3.17, + "step": 4039 + }, + { + "epoch": 1.17, + "learning_rate": 0.0003888048784991565, + "loss": 3.2044, + "step": 4040 + }, + { + "epoch": 1.17, + "learning_rate": 0.0003885761780779777, + "loss": 3.2153, + "step": 4041 + }, + { + "epoch": 1.17, + "learning_rate": 0.00038834750218393814, + "loss": 3.1924, + "step": 4042 + }, + { + "epoch": 1.17, + "learning_rate": 0.00038811885086737543, + "loss": 3.1763, + "step": 4043 + }, + { + "epoch": 1.17, + "learning_rate": 0.00038789022417862076, + "loss": 3.1785, + "step": 4044 + }, + { + "epoch": 1.17, + "learning_rate": 0.000387661622168001, + "loss": 3.1306, + "step": 4045 + }, + { + "epoch": 1.17, + "learning_rate": 0.00038743304488583687, + "loss": 3.1417, + "step": 4046 + }, + { + "epoch": 1.17, + "learning_rate": 0.00038720449238244416, + "loss": 3.1771, + "step": 4047 + }, + { + "epoch": 1.17, + "learning_rate": 0.00038697596470813257, + "loss": 3.2698, + "step": 4048 + }, + { + "epoch": 1.17, + "learning_rate": 0.00038674746191320696, + "loss": 3.1484, + "step": 4049 + }, + { + "epoch": 1.17, + "learning_rate": 0.00038651898404796646, + "loss": 3.1724, + "step": 4050 + }, + { + "epoch": 1.17, + "learning_rate": 0.0003862905311627044, + "loss": 3.1, + "step": 4051 + }, + { + "epoch": 1.17, + "learning_rate": 0.00038606210330770923, + "loss": 3.0924, + "step": 4052 + }, + { + "epoch": 1.17, + "learning_rate": 0.0003858337005332634, + "loss": 3.2093, + "step": 4053 + }, + { + "epoch": 1.17, + "learning_rate": 0.0003856053228896442, + "loss": 3.2267, + "step": 4054 + }, + { + "epoch": 1.17, + "learning_rate": 0.0003853769704271228, + "loss": 3.2123, + "step": 4055 + }, + { + "epoch": 1.17, + "learning_rate": 0.00038514864319596576, + "loss": 3.2561, + "step": 4056 + }, + { + "epoch": 1.17, + "learning_rate": 0.00038492034124643304, + "loss": 3.1676, + "step": 4057 + }, + { + "epoch": 1.18, + "learning_rate": 0.00038469206462877993, + "loss": 3.1073, + "step": 4058 + }, + { + "epoch": 1.18, + "learning_rate": 0.0003844638133932557, + "loss": 3.2115, + "step": 4059 + }, + { + "epoch": 1.18, + "learning_rate": 0.0003842355875901038, + "loss": 3.2581, + "step": 4060 + }, + { + "epoch": 1.18, + "learning_rate": 0.0003840073872695627, + "loss": 3.2943, + "step": 4061 + }, + { + "epoch": 1.18, + "learning_rate": 0.0003837792124818646, + "loss": 3.2224, + "step": 4062 + }, + { + "epoch": 1.18, + "learning_rate": 0.000383551063277237, + "loss": 3.2126, + "step": 4063 + }, + { + "epoch": 1.18, + "learning_rate": 0.0003833229397059005, + "loss": 3.1695, + "step": 4064 + }, + { + "epoch": 1.18, + "learning_rate": 0.0003830948418180713, + "loss": 3.2221, + "step": 4065 + }, + { + "epoch": 1.18, + "learning_rate": 0.00038286676966395897, + "loss": 3.3321, + "step": 4066 + }, + { + "epoch": 1.18, + "learning_rate": 0.000382638723293768, + "loss": 3.3215, + "step": 4067 + }, + { + "epoch": 1.18, + "learning_rate": 0.00038241070275769727, + "loss": 3.2907, + "step": 4068 + }, + { + "epoch": 1.18, + "learning_rate": 0.00038218270810593924, + "loss": 3.1451, + "step": 4069 + }, + { + "epoch": 1.18, + "learning_rate": 0.00038195473938868157, + "loss": 3.1538, + "step": 4070 + }, + { + "epoch": 1.18, + "learning_rate": 0.00038172679665610554, + "loss": 3.1464, + "step": 4071 + }, + { + "epoch": 1.18, + "learning_rate": 0.0003814988799583874, + "loss": 3.1446, + "step": 4072 + }, + { + "epoch": 1.18, + "learning_rate": 0.00038127098934569674, + "loss": 3.1271, + "step": 4073 + }, + { + "epoch": 1.18, + "learning_rate": 0.0003810431248681983, + "loss": 3.1067, + "step": 4074 + }, + { + "epoch": 1.18, + "learning_rate": 0.00038081528657605044, + "loss": 3.2711, + "step": 4075 + }, + { + "epoch": 1.18, + "learning_rate": 0.0003805874745194063, + "loss": 3.3308, + "step": 4076 + }, + { + "epoch": 1.18, + "learning_rate": 0.00038035968874841256, + "loss": 3.1709, + "step": 4077 + }, + { + "epoch": 1.18, + "learning_rate": 0.00038013192931321093, + "loss": 3.2316, + "step": 4078 + }, + { + "epoch": 1.18, + "learning_rate": 0.00037990419626393655, + "loss": 3.1083, + "step": 4079 + }, + { + "epoch": 1.18, + "learning_rate": 0.0003796764896507192, + "loss": 3.1901, + "step": 4080 + }, + { + "epoch": 1.18, + "learning_rate": 0.0003794488095236829, + "loss": 3.1927, + "step": 4081 + }, + { + "epoch": 1.18, + "learning_rate": 0.00037922115593294533, + "loss": 3.114, + "step": 4082 + }, + { + "epoch": 1.18, + "learning_rate": 0.000378993528928619, + "loss": 3.2058, + "step": 4083 + }, + { + "epoch": 1.18, + "learning_rate": 0.00037876592856081, + "loss": 3.1745, + "step": 4084 + }, + { + "epoch": 1.18, + "learning_rate": 0.0003785383548796191, + "loss": 3.2863, + "step": 4085 + }, + { + "epoch": 1.18, + "learning_rate": 0.0003783108079351406, + "loss": 3.2427, + "step": 4086 + }, + { + "epoch": 1.18, + "learning_rate": 0.0003780832877774635, + "loss": 3.179, + "step": 4087 + }, + { + "epoch": 1.18, + "learning_rate": 0.0003778557944566704, + "loss": 3.1784, + "step": 4088 + }, + { + "epoch": 1.18, + "learning_rate": 0.0003776283280228381, + "loss": 3.1608, + "step": 4089 + }, + { + "epoch": 1.18, + "learning_rate": 0.00037740088852603797, + "loss": 3.1194, + "step": 4090 + }, + { + "epoch": 1.18, + "learning_rate": 0.00037717347601633466, + "loss": 3.2013, + "step": 4091 + }, + { + "epoch": 1.19, + "learning_rate": 0.00037694609054378755, + "loss": 3.1917, + "step": 4092 + }, + { + "epoch": 1.19, + "learning_rate": 0.00037671873215844965, + "loss": 3.1769, + "step": 4093 + }, + { + "epoch": 1.19, + "learning_rate": 0.00037649140091036845, + "loss": 3.2586, + "step": 4094 + }, + { + "epoch": 1.19, + "learning_rate": 0.00037626409684958474, + "loss": 3.2098, + "step": 4095 + }, + { + "epoch": 1.19, + "learning_rate": 0.0003760368200261342, + "loss": 3.1913, + "step": 4096 + }, + { + "epoch": 1.19, + "learning_rate": 0.00037580957049004583, + "loss": 3.1873, + "step": 4097 + }, + { + "epoch": 1.19, + "learning_rate": 0.0003755823482913428, + "loss": 3.17, + "step": 4098 + }, + { + "epoch": 1.19, + "learning_rate": 0.00037535515348004265, + "loss": 3.2382, + "step": 4099 + }, + { + "epoch": 1.19, + "learning_rate": 0.0003751279861061562, + "loss": 3.2544, + "step": 4100 + }, + { + "epoch": 1.19, + "learning_rate": 0.0003749008462196889, + "loss": 3.2093, + "step": 4101 + }, + { + "epoch": 1.19, + "learning_rate": 0.00037467373387063967, + "loss": 3.1262, + "step": 4102 + }, + { + "epoch": 1.19, + "learning_rate": 0.00037444664910900186, + "loss": 3.2361, + "step": 4103 + }, + { + "epoch": 1.19, + "learning_rate": 0.00037421959198476195, + "loss": 3.0531, + "step": 4104 + }, + { + "epoch": 1.19, + "learning_rate": 0.0003739925625479013, + "loss": 3.1786, + "step": 4105 + }, + { + "epoch": 1.19, + "learning_rate": 0.00037376556084839466, + "loss": 3.1437, + "step": 4106 + }, + { + "epoch": 1.19, + "learning_rate": 0.0003735385869362102, + "loss": 3.345, + "step": 4107 + }, + { + "epoch": 1.19, + "learning_rate": 0.0003733116408613112, + "loss": 3.1622, + "step": 4108 + }, + { + "epoch": 1.19, + "learning_rate": 0.00037308472267365343, + "loss": 3.1459, + "step": 4109 + }, + { + "epoch": 1.19, + "learning_rate": 0.0003728578324231877, + "loss": 3.2328, + "step": 4110 + }, + { + "epoch": 1.19, + "learning_rate": 0.000372630970159858, + "loss": 3.1024, + "step": 4111 + }, + { + "epoch": 1.19, + "learning_rate": 0.00037240413593360244, + "loss": 3.3138, + "step": 4112 + }, + { + "epoch": 1.19, + "learning_rate": 0.0003721773297943525, + "loss": 3.162, + "step": 4113 + }, + { + "epoch": 1.19, + "learning_rate": 0.00037195055179203417, + "loss": 3.289, + "step": 4114 + }, + { + "epoch": 1.19, + "learning_rate": 0.00037172380197656693, + "loss": 3.2632, + "step": 4115 + }, + { + "epoch": 1.19, + "learning_rate": 0.0003714970803978636, + "loss": 3.1751, + "step": 4116 + }, + { + "epoch": 1.19, + "learning_rate": 0.0003712703871058317, + "loss": 3.2248, + "step": 4117 + }, + { + "epoch": 1.19, + "learning_rate": 0.0003710437221503716, + "loss": 3.1523, + "step": 4118 + }, + { + "epoch": 1.19, + "learning_rate": 0.0003708170855813782, + "loss": 3.1638, + "step": 4119 + }, + { + "epoch": 1.19, + "learning_rate": 0.0003705904774487396, + "loss": 3.1285, + "step": 4120 + }, + { + "epoch": 1.19, + "learning_rate": 0.00037036389780233813, + "loss": 3.1076, + "step": 4121 + }, + { + "epoch": 1.19, + "learning_rate": 0.0003701373466920492, + "loss": 3.2363, + "step": 4122 + }, + { + "epoch": 1.19, + "learning_rate": 0.00036991082416774256, + "loss": 3.0701, + "step": 4123 + }, + { + "epoch": 1.19, + "learning_rate": 0.0003696843302792815, + "loss": 3.0883, + "step": 4124 + }, + { + "epoch": 1.19, + "learning_rate": 0.0003694578650765226, + "loss": 3.2974, + "step": 4125 + }, + { + "epoch": 1.19, + "learning_rate": 0.0003692314286093167, + "loss": 3.1886, + "step": 4126 + }, + { + "epoch": 1.2, + "learning_rate": 0.0003690050209275079, + "loss": 3.106, + "step": 4127 + }, + { + "epoch": 1.2, + "learning_rate": 0.0003687786420809345, + "loss": 3.2187, + "step": 4128 + }, + { + "epoch": 1.2, + "learning_rate": 0.00036855229211942757, + "loss": 3.1785, + "step": 4129 + }, + { + "epoch": 1.2, + "learning_rate": 0.00036832597109281285, + "loss": 3.0497, + "step": 4130 + }, + { + "epoch": 1.2, + "learning_rate": 0.0003680996790509087, + "loss": 3.0127, + "step": 4131 + }, + { + "epoch": 1.2, + "learning_rate": 0.0003678734160435279, + "loss": 3.0846, + "step": 4132 + }, + { + "epoch": 1.2, + "learning_rate": 0.00036764718212047666, + "loss": 3.1767, + "step": 4133 + }, + { + "epoch": 1.2, + "learning_rate": 0.0003674209773315542, + "loss": 3.102, + "step": 4134 + }, + { + "epoch": 1.2, + "learning_rate": 0.00036719480172655417, + "loss": 3.1547, + "step": 4135 + }, + { + "epoch": 1.2, + "learning_rate": 0.0003669686553552632, + "loss": 3.1923, + "step": 4136 + }, + { + "epoch": 1.2, + "learning_rate": 0.000366742538267462, + "loss": 3.3023, + "step": 4137 + }, + { + "epoch": 1.2, + "learning_rate": 0.00036651645051292417, + "loss": 3.1525, + "step": 4138 + }, + { + "epoch": 1.2, + "learning_rate": 0.00036629039214141755, + "loss": 3.2204, + "step": 4139 + }, + { + "epoch": 1.2, + "learning_rate": 0.00036606436320270287, + "loss": 3.1223, + "step": 4140 + }, + { + "epoch": 1.2, + "learning_rate": 0.0003658383637465349, + "loss": 3.3409, + "step": 4141 + }, + { + "epoch": 1.2, + "learning_rate": 0.00036561239382266173, + "loss": 3.1399, + "step": 4142 + }, + { + "epoch": 1.2, + "learning_rate": 0.00036538645348082466, + "loss": 3.2694, + "step": 4143 + }, + { + "epoch": 1.2, + "learning_rate": 0.00036516054277075906, + "loss": 3.268, + "step": 4144 + }, + { + "epoch": 1.2, + "learning_rate": 0.00036493466174219313, + "loss": 3.1023, + "step": 4145 + }, + { + "epoch": 1.2, + "learning_rate": 0.0003647088104448494, + "loss": 3.2836, + "step": 4146 + }, + { + "epoch": 1.2, + "learning_rate": 0.0003644829889284428, + "loss": 3.3069, + "step": 4147 + }, + { + "epoch": 1.2, + "learning_rate": 0.0003642571972426826, + "loss": 3.2055, + "step": 4148 + }, + { + "epoch": 1.2, + "learning_rate": 0.0003640314354372708, + "loss": 3.231, + "step": 4149 + }, + { + "epoch": 1.2, + "learning_rate": 0.00036380570356190343, + "loss": 3.177, + "step": 4150 + }, + { + "epoch": 1.2, + "learning_rate": 0.0003635800016662696, + "loss": 3.145, + "step": 4151 + }, + { + "epoch": 1.2, + "learning_rate": 0.0003633543298000517, + "loss": 3.1336, + "step": 4152 + }, + { + "epoch": 1.2, + "learning_rate": 0.000363128688012926, + "loss": 3.2226, + "step": 4153 + }, + { + "epoch": 1.2, + "learning_rate": 0.00036290307635456143, + "loss": 3.2366, + "step": 4154 + }, + { + "epoch": 1.2, + "learning_rate": 0.00036267749487462126, + "loss": 3.1169, + "step": 4155 + }, + { + "epoch": 1.2, + "learning_rate": 0.00036245194362276095, + "loss": 3.1966, + "step": 4156 + }, + { + "epoch": 1.2, + "learning_rate": 0.0003622264226486304, + "loss": 3.1723, + "step": 4157 + }, + { + "epoch": 1.2, + "learning_rate": 0.0003620009320018719, + "loss": 3.0829, + "step": 4158 + }, + { + "epoch": 1.2, + "learning_rate": 0.000361775471732122, + "loss": 3.2862, + "step": 4159 + }, + { + "epoch": 1.2, + "learning_rate": 0.0003615500418890099, + "loss": 3.1118, + "step": 4160 + }, + { + "epoch": 1.21, + "learning_rate": 0.00036132464252215804, + "loss": 3.1997, + "step": 4161 + }, + { + "epoch": 1.21, + "learning_rate": 0.0003610992736811827, + "loss": 3.2473, + "step": 4162 + }, + { + "epoch": 1.21, + "learning_rate": 0.0003608739354156928, + "loss": 3.0605, + "step": 4163 + }, + { + "epoch": 1.21, + "learning_rate": 0.0003606486277752914, + "loss": 3.2096, + "step": 4164 + }, + { + "epoch": 1.21, + "learning_rate": 0.00036042335080957373, + "loss": 3.1528, + "step": 4165 + }, + { + "epoch": 1.21, + "learning_rate": 0.00036019810456812916, + "loss": 3.2216, + "step": 4166 + }, + { + "epoch": 1.21, + "learning_rate": 0.00035997288910053984, + "loss": 3.1796, + "step": 4167 + }, + { + "epoch": 1.21, + "learning_rate": 0.0003597477044563814, + "loss": 3.2269, + "step": 4168 + }, + { + "epoch": 1.21, + "learning_rate": 0.00035952255068522233, + "loss": 3.3176, + "step": 4169 + }, + { + "epoch": 1.21, + "learning_rate": 0.00035929742783662476, + "loss": 3.2202, + "step": 4170 + }, + { + "epoch": 1.21, + "learning_rate": 0.00035907233596014365, + "loss": 3.241, + "step": 4171 + }, + { + "epoch": 1.21, + "learning_rate": 0.0003588472751053272, + "loss": 3.2567, + "step": 4172 + }, + { + "epoch": 1.21, + "learning_rate": 0.00035862224532171727, + "loss": 3.1592, + "step": 4173 + }, + { + "epoch": 1.21, + "learning_rate": 0.00035839724665884796, + "loss": 3.2479, + "step": 4174 + }, + { + "epoch": 1.21, + "learning_rate": 0.00035817227916624744, + "loss": 3.1329, + "step": 4175 + }, + { + "epoch": 1.21, + "learning_rate": 0.0003579473428934364, + "loss": 3.1178, + "step": 4176 + }, + { + "epoch": 1.21, + "learning_rate": 0.0003577224378899291, + "loss": 3.1493, + "step": 4177 + }, + { + "epoch": 1.21, + "learning_rate": 0.00035749756420523236, + "loss": 3.0414, + "step": 4178 + }, + { + "epoch": 1.21, + "learning_rate": 0.0003572727218888467, + "loss": 3.1915, + "step": 4179 + }, + { + "epoch": 1.21, + "learning_rate": 0.0003570479109902656, + "loss": 3.1355, + "step": 4180 + }, + { + "epoch": 1.21, + "learning_rate": 0.00035682313155897507, + "loss": 3.1475, + "step": 4181 + }, + { + "epoch": 1.21, + "learning_rate": 0.00035659838364445503, + "loss": 3.3453, + "step": 4182 + }, + { + "epoch": 1.21, + "learning_rate": 0.0003563736672961777, + "loss": 3.0462, + "step": 4183 + }, + { + "epoch": 1.21, + "learning_rate": 0.00035614898256360896, + "loss": 3.2848, + "step": 4184 + }, + { + "epoch": 1.21, + "learning_rate": 0.0003559243294962073, + "loss": 3.2593, + "step": 4185 + }, + { + "epoch": 1.21, + "learning_rate": 0.00035569970814342477, + "loss": 3.2589, + "step": 4186 + }, + { + "epoch": 1.21, + "learning_rate": 0.0003554751185547056, + "loss": 3.3488, + "step": 4187 + }, + { + "epoch": 1.21, + "learning_rate": 0.00035525056077948794, + "loss": 3.1833, + "step": 4188 + }, + { + "epoch": 1.21, + "learning_rate": 0.0003550260348672023, + "loss": 3.2401, + "step": 4189 + }, + { + "epoch": 1.21, + "learning_rate": 0.00035480154086727227, + "loss": 3.1203, + "step": 4190 + }, + { + "epoch": 1.21, + "learning_rate": 0.00035457707882911497, + "loss": 3.1669, + "step": 4191 + }, + { + "epoch": 1.21, + "learning_rate": 0.0003543526488021394, + "loss": 3.1721, + "step": 4192 + }, + { + "epoch": 1.21, + "learning_rate": 0.00035412825083574865, + "loss": 3.0992, + "step": 4193 + }, + { + "epoch": 1.21, + "learning_rate": 0.0003539038849793381, + "loss": 3.172, + "step": 4194 + }, + { + "epoch": 1.21, + "learning_rate": 0.0003536795512822964, + "loss": 3.1138, + "step": 4195 + }, + { + "epoch": 1.22, + "learning_rate": 0.00035345524979400457, + "loss": 3.151, + "step": 4196 + }, + { + "epoch": 1.22, + "learning_rate": 0.00035323098056383727, + "loss": 3.1909, + "step": 4197 + }, + { + "epoch": 1.22, + "learning_rate": 0.0003530067436411617, + "loss": 3.1957, + "step": 4198 + }, + { + "epoch": 1.22, + "learning_rate": 0.00035278253907533764, + "loss": 3.1767, + "step": 4199 + }, + { + "epoch": 1.22, + "learning_rate": 0.0003525583669157184, + "loss": 3.298, + "step": 4200 + }, + { + "epoch": 1.22, + "learning_rate": 0.0003523342272116495, + "loss": 3.1666, + "step": 4201 + }, + { + "epoch": 1.22, + "learning_rate": 0.0003521101200124699, + "loss": 3.0668, + "step": 4202 + }, + { + "epoch": 1.22, + "learning_rate": 0.0003518860453675109, + "loss": 3.1048, + "step": 4203 + }, + { + "epoch": 1.22, + "learning_rate": 0.0003516620033260972, + "loss": 3.0785, + "step": 4204 + }, + { + "epoch": 1.22, + "learning_rate": 0.0003514379939375457, + "loss": 3.2453, + "step": 4205 + }, + { + "epoch": 1.22, + "learning_rate": 0.0003512140172511665, + "loss": 3.1934, + "step": 4206 + }, + { + "epoch": 1.22, + "learning_rate": 0.00035099007331626265, + "loss": 3.1732, + "step": 4207 + }, + { + "epoch": 1.22, + "learning_rate": 0.0003507661621821292, + "loss": 3.1776, + "step": 4208 + }, + { + "epoch": 1.22, + "learning_rate": 0.00035054228389805504, + "loss": 3.1875, + "step": 4209 + }, + { + "epoch": 1.22, + "learning_rate": 0.000350318438513321, + "loss": 3.0945, + "step": 4210 + }, + { + "epoch": 1.22, + "learning_rate": 0.0003500946260772013, + "loss": 3.1285, + "step": 4211 + }, + { + "epoch": 1.22, + "learning_rate": 0.0003498708466389622, + "loss": 3.247, + "step": 4212 + }, + { + "epoch": 1.22, + "learning_rate": 0.0003496471002478635, + "loss": 3.1806, + "step": 4213 + }, + { + "epoch": 1.22, + "learning_rate": 0.000349423386953157, + "loss": 3.1901, + "step": 4214 + }, + { + "epoch": 1.22, + "learning_rate": 0.00034919970680408766, + "loss": 3.1186, + "step": 4215 + }, + { + "epoch": 1.22, + "learning_rate": 0.00034897605984989314, + "loss": 3.1894, + "step": 4216 + }, + { + "epoch": 1.22, + "learning_rate": 0.0003487524461398033, + "loss": 3.1216, + "step": 4217 + }, + { + "epoch": 1.22, + "learning_rate": 0.00034852886572304134, + "loss": 3.1955, + "step": 4218 + }, + { + "epoch": 1.22, + "learning_rate": 0.0003483053186488227, + "loss": 3.2126, + "step": 4219 + }, + { + "epoch": 1.22, + "learning_rate": 0.00034808180496635586, + "loss": 3.1729, + "step": 4220 + }, + { + "epoch": 1.22, + "learning_rate": 0.0003478583247248414, + "loss": 3.2972, + "step": 4221 + }, + { + "epoch": 1.22, + "learning_rate": 0.0003476348779734732, + "loss": 3.2511, + "step": 4222 + }, + { + "epoch": 1.22, + "learning_rate": 0.00034741146476143696, + "loss": 3.248, + "step": 4223 + }, + { + "epoch": 1.22, + "learning_rate": 0.00034718808513791187, + "loss": 3.1516, + "step": 4224 + }, + { + "epoch": 1.22, + "learning_rate": 0.00034696473915206915, + "loss": 3.2058, + "step": 4225 + }, + { + "epoch": 1.22, + "learning_rate": 0.00034674142685307266, + "loss": 3.2724, + "step": 4226 + }, + { + "epoch": 1.22, + "learning_rate": 0.00034651814829007914, + "loss": 3.1246, + "step": 4227 + }, + { + "epoch": 1.22, + "learning_rate": 0.0003462949035122376, + "loss": 3.195, + "step": 4228 + }, + { + "epoch": 1.22, + "learning_rate": 0.00034607169256868994, + "loss": 3.3511, + "step": 4229 + }, + { + "epoch": 1.23, + "learning_rate": 0.0003458485155085701, + "loss": 3.1965, + "step": 4230 + }, + { + "epoch": 1.23, + "learning_rate": 0.00034562537238100506, + "loss": 3.2688, + "step": 4231 + }, + { + "epoch": 1.23, + "learning_rate": 0.000345402263235114, + "loss": 3.1221, + "step": 4232 + }, + { + "epoch": 1.23, + "learning_rate": 0.00034517918812000924, + "loss": 3.2228, + "step": 4233 + }, + { + "epoch": 1.23, + "learning_rate": 0.00034495614708479464, + "loss": 3.184, + "step": 4234 + }, + { + "epoch": 1.23, + "learning_rate": 0.00034473314017856706, + "loss": 3.1453, + "step": 4235 + }, + { + "epoch": 1.23, + "learning_rate": 0.00034451016745041597, + "loss": 3.1863, + "step": 4236 + }, + { + "epoch": 1.23, + "learning_rate": 0.0003442872289494231, + "loss": 3.2089, + "step": 4237 + }, + { + "epoch": 1.23, + "learning_rate": 0.00034406432472466306, + "loss": 3.1962, + "step": 4238 + }, + { + "epoch": 1.23, + "learning_rate": 0.000343841454825202, + "loss": 3.1547, + "step": 4239 + }, + { + "epoch": 1.23, + "learning_rate": 0.0003436186193000996, + "loss": 3.3114, + "step": 4240 + }, + { + "epoch": 1.23, + "learning_rate": 0.00034339581819840717, + "loss": 3.2018, + "step": 4241 + }, + { + "epoch": 1.23, + "learning_rate": 0.00034317305156916913, + "loss": 3.0251, + "step": 4242 + }, + { + "epoch": 1.23, + "learning_rate": 0.0003429503194614215, + "loss": 3.1709, + "step": 4243 + }, + { + "epoch": 1.23, + "learning_rate": 0.00034272762192419327, + "loss": 3.1243, + "step": 4244 + }, + { + "epoch": 1.23, + "learning_rate": 0.0003425049590065057, + "loss": 3.2077, + "step": 4245 + }, + { + "epoch": 1.23, + "learning_rate": 0.00034228233075737224, + "loss": 3.1441, + "step": 4246 + }, + { + "epoch": 1.23, + "learning_rate": 0.0003420597372257993, + "loss": 3.2513, + "step": 4247 + }, + { + "epoch": 1.23, + "learning_rate": 0.0003418371784607848, + "loss": 3.0965, + "step": 4248 + }, + { + "epoch": 1.23, + "learning_rate": 0.00034161465451131956, + "loss": 3.2459, + "step": 4249 + }, + { + "epoch": 1.23, + "learning_rate": 0.0003413921654263866, + "loss": 3.1875, + "step": 4250 + }, + { + "epoch": 1.23, + "learning_rate": 0.0003411697112549615, + "loss": 3.1533, + "step": 4251 + }, + { + "epoch": 1.23, + "learning_rate": 0.0003409472920460117, + "loss": 3.1681, + "step": 4252 + }, + { + "epoch": 1.23, + "learning_rate": 0.00034072490784849705, + "loss": 3.2463, + "step": 4253 + }, + { + "epoch": 1.23, + "learning_rate": 0.00034050255871137004, + "loss": 3.137, + "step": 4254 + }, + { + "epoch": 1.23, + "learning_rate": 0.000340280244683575, + "loss": 3.2839, + "step": 4255 + }, + { + "epoch": 1.23, + "learning_rate": 0.00034005796581404907, + "loss": 3.1489, + "step": 4256 + }, + { + "epoch": 1.23, + "learning_rate": 0.000339835722151721, + "loss": 3.1893, + "step": 4257 + }, + { + "epoch": 1.23, + "learning_rate": 0.0003396135137455123, + "loss": 3.2583, + "step": 4258 + }, + { + "epoch": 1.23, + "learning_rate": 0.00033939134064433634, + "loss": 3.1624, + "step": 4259 + }, + { + "epoch": 1.23, + "learning_rate": 0.00033916920289709935, + "loss": 3.0282, + "step": 4260 + }, + { + "epoch": 1.23, + "learning_rate": 0.00033894710055269885, + "loss": 3.1511, + "step": 4261 + }, + { + "epoch": 1.23, + "learning_rate": 0.0003387250336600254, + "loss": 3.2936, + "step": 4262 + }, + { + "epoch": 1.23, + "learning_rate": 0.00033850300226796124, + "loss": 3.2264, + "step": 4263 + }, + { + "epoch": 1.23, + "learning_rate": 0.00033828100642538096, + "loss": 3.0628, + "step": 4264 + }, + { + "epoch": 1.24, + "learning_rate": 0.00033805904618115155, + "loss": 3.2084, + "step": 4265 + }, + { + "epoch": 1.24, + "learning_rate": 0.0003378371215841316, + "loss": 3.2241, + "step": 4266 + }, + { + "epoch": 1.24, + "learning_rate": 0.0003376152326831725, + "loss": 3.154, + "step": 4267 + }, + { + "epoch": 1.24, + "learning_rate": 0.0003373933795271172, + "loss": 3.1831, + "step": 4268 + }, + { + "epoch": 1.24, + "learning_rate": 0.00033717156216480157, + "loss": 3.1617, + "step": 4269 + }, + { + "epoch": 1.24, + "learning_rate": 0.0003369497806450526, + "loss": 3.2192, + "step": 4270 + }, + { + "epoch": 1.24, + "learning_rate": 0.00033672803501669, + "loss": 3.2653, + "step": 4271 + }, + { + "epoch": 1.24, + "learning_rate": 0.00033650632532852576, + "loss": 3.0386, + "step": 4272 + }, + { + "epoch": 1.24, + "learning_rate": 0.00033628465162936316, + "loss": 3.2432, + "step": 4273 + }, + { + "epoch": 1.24, + "learning_rate": 0.0003360630139679986, + "loss": 3.1577, + "step": 4274 + }, + { + "epoch": 1.24, + "learning_rate": 0.00033584141239321953, + "loss": 3.1083, + "step": 4275 + }, + { + "epoch": 1.24, + "learning_rate": 0.00033561984695380624, + "loss": 3.1469, + "step": 4276 + }, + { + "epoch": 1.24, + "learning_rate": 0.0003353983176985306, + "loss": 3.1334, + "step": 4277 + }, + { + "epoch": 1.24, + "learning_rate": 0.0003351768246761571, + "loss": 3.2648, + "step": 4278 + }, + { + "epoch": 1.24, + "learning_rate": 0.00033495536793544113, + "loss": 3.2822, + "step": 4279 + }, + { + "epoch": 1.24, + "learning_rate": 0.0003347339475251313, + "loss": 3.234, + "step": 4280 + }, + { + "epoch": 1.24, + "learning_rate": 0.00033451256349396777, + "loss": 3.1696, + "step": 4281 + }, + { + "epoch": 1.24, + "learning_rate": 0.00033429121589068214, + "loss": 3.1158, + "step": 4282 + }, + { + "epoch": 1.24, + "learning_rate": 0.00033406990476399895, + "loss": 3.3167, + "step": 4283 + }, + { + "epoch": 1.24, + "learning_rate": 0.000333848630162634, + "loss": 3.1526, + "step": 4284 + }, + { + "epoch": 1.24, + "learning_rate": 0.00033362739213529567, + "loss": 3.1677, + "step": 4285 + }, + { + "epoch": 1.24, + "learning_rate": 0.00033340619073068343, + "loss": 3.1768, + "step": 4286 + }, + { + "epoch": 1.24, + "learning_rate": 0.0003331850259974897, + "loss": 3.2288, + "step": 4287 + }, + { + "epoch": 1.24, + "learning_rate": 0.0003329638979843977, + "loss": 3.1919, + "step": 4288 + }, + { + "epoch": 1.24, + "learning_rate": 0.0003327428067400836, + "loss": 3.2671, + "step": 4289 + }, + { + "epoch": 1.24, + "learning_rate": 0.00033252175231321515, + "loss": 3.2077, + "step": 4290 + }, + { + "epoch": 1.24, + "learning_rate": 0.0003323007347524515, + "loss": 3.2547, + "step": 4291 + }, + { + "epoch": 1.24, + "learning_rate": 0.0003320797541064443, + "loss": 3.2235, + "step": 4292 + }, + { + "epoch": 1.24, + "learning_rate": 0.0003318588104238368, + "loss": 3.2003, + "step": 4293 + }, + { + "epoch": 1.24, + "learning_rate": 0.0003316379037532644, + "loss": 3.1254, + "step": 4294 + }, + { + "epoch": 1.24, + "learning_rate": 0.00033141703414335365, + "loss": 3.0023, + "step": 4295 + }, + { + "epoch": 1.24, + "learning_rate": 0.000331196201642724, + "loss": 3.0829, + "step": 4296 + }, + { + "epoch": 1.24, + "learning_rate": 0.00033097540629998563, + "loss": 3.2302, + "step": 4297 + }, + { + "epoch": 1.24, + "learning_rate": 0.0003307546481637413, + "loss": 3.1724, + "step": 4298 + }, + { + "epoch": 1.25, + "learning_rate": 0.0003305339272825855, + "loss": 3.1656, + "step": 4299 + }, + { + "epoch": 1.25, + "learning_rate": 0.00033031324370510394, + "loss": 3.1136, + "step": 4300 + }, + { + "epoch": 1.25, + "learning_rate": 0.00033009259747987486, + "loss": 3.1695, + "step": 4301 + }, + { + "epoch": 1.25, + "learning_rate": 0.0003298719886554677, + "loss": 3.2487, + "step": 4302 + }, + { + "epoch": 1.25, + "learning_rate": 0.0003296514172804442, + "loss": 3.1768, + "step": 4303 + }, + { + "epoch": 1.25, + "learning_rate": 0.0003294308834033574, + "loss": 3.2315, + "step": 4304 + }, + { + "epoch": 1.25, + "learning_rate": 0.0003292103870727524, + "loss": 3.2935, + "step": 4305 + }, + { + "epoch": 1.25, + "learning_rate": 0.0003289899283371657, + "loss": 3.16, + "step": 4306 + }, + { + "epoch": 1.25, + "learning_rate": 0.00032876950724512575, + "loss": 3.1158, + "step": 4307 + }, + { + "epoch": 1.25, + "learning_rate": 0.000328549123845153, + "loss": 3.1635, + "step": 4308 + }, + { + "epoch": 1.25, + "learning_rate": 0.0003283287781857588, + "loss": 3.1948, + "step": 4309 + }, + { + "epoch": 1.25, + "learning_rate": 0.000328108470315447, + "loss": 3.2165, + "step": 4310 + }, + { + "epoch": 1.25, + "learning_rate": 0.0003278882002827127, + "loss": 3.1096, + "step": 4311 + }, + { + "epoch": 1.25, + "learning_rate": 0.0003276679681360429, + "loss": 3.2466, + "step": 4312 + }, + { + "epoch": 1.25, + "learning_rate": 0.000327447773923916, + "loss": 3.1804, + "step": 4313 + }, + { + "epoch": 1.25, + "learning_rate": 0.00032722761769480224, + "loss": 3.1581, + "step": 4314 + }, + { + "epoch": 1.25, + "learning_rate": 0.00032700749949716345, + "loss": 3.2719, + "step": 4315 + }, + { + "epoch": 1.25, + "learning_rate": 0.00032678741937945324, + "loss": 3.0779, + "step": 4316 + }, + { + "epoch": 1.25, + "learning_rate": 0.00032656737739011655, + "loss": 3.2199, + "step": 4317 + }, + { + "epoch": 1.25, + "learning_rate": 0.0003263473735775899, + "loss": 3.1606, + "step": 4318 + }, + { + "epoch": 1.25, + "learning_rate": 0.0003261274079903018, + "loss": 3.1951, + "step": 4319 + }, + { + "epoch": 1.25, + "learning_rate": 0.00032590748067667203, + "loss": 3.2043, + "step": 4320 + }, + { + "epoch": 1.25, + "learning_rate": 0.0003256875916851122, + "loss": 3.0775, + "step": 4321 + }, + { + "epoch": 1.25, + "learning_rate": 0.000325467741064025, + "loss": 3.1817, + "step": 4322 + }, + { + "epoch": 1.25, + "learning_rate": 0.00032524792886180526, + "loss": 3.2576, + "step": 4323 + }, + { + "epoch": 1.25, + "learning_rate": 0.0003250281551268389, + "loss": 3.1821, + "step": 4324 + }, + { + "epoch": 1.25, + "learning_rate": 0.0003248084199075038, + "loss": 3.1587, + "step": 4325 + }, + { + "epoch": 1.25, + "learning_rate": 0.00032458872325216893, + "loss": 3.2003, + "step": 4326 + }, + { + "epoch": 1.25, + "learning_rate": 0.00032436906520919484, + "loss": 3.2474, + "step": 4327 + }, + { + "epoch": 1.25, + "learning_rate": 0.00032414944582693394, + "loss": 3.1876, + "step": 4328 + }, + { + "epoch": 1.25, + "learning_rate": 0.00032392986515372967, + "loss": 3.1252, + "step": 4329 + }, + { + "epoch": 1.25, + "learning_rate": 0.00032371032323791755, + "loss": 3.0773, + "step": 4330 + }, + { + "epoch": 1.25, + "learning_rate": 0.00032349082012782373, + "loss": 3.0792, + "step": 4331 + }, + { + "epoch": 1.25, + "learning_rate": 0.0003232713558717665, + "loss": 3.2049, + "step": 4332 + }, + { + "epoch": 1.25, + "learning_rate": 0.0003230519305180553, + "loss": 3.1554, + "step": 4333 + }, + { + "epoch": 1.26, + "learning_rate": 0.00032283254411499124, + "loss": 3.3109, + "step": 4334 + }, + { + "epoch": 1.26, + "learning_rate": 0.00032261319671086653, + "loss": 3.1282, + "step": 4335 + }, + { + "epoch": 1.26, + "learning_rate": 0.0003223938883539649, + "loss": 3.2183, + "step": 4336 + }, + { + "epoch": 1.26, + "learning_rate": 0.0003221746190925618, + "loss": 3.1729, + "step": 4337 + }, + { + "epoch": 1.26, + "learning_rate": 0.0003219553889749235, + "loss": 3.1048, + "step": 4338 + }, + { + "epoch": 1.26, + "learning_rate": 0.00032173619804930844, + "loss": 3.0901, + "step": 4339 + }, + { + "epoch": 1.26, + "learning_rate": 0.00032151704636396535, + "loss": 3.0831, + "step": 4340 + }, + { + "epoch": 1.26, + "learning_rate": 0.00032129793396713536, + "loss": 3.1518, + "step": 4341 + }, + { + "epoch": 1.26, + "learning_rate": 0.0003210788609070504, + "loss": 3.0302, + "step": 4342 + }, + { + "epoch": 1.26, + "learning_rate": 0.00032085982723193394, + "loss": 3.1618, + "step": 4343 + }, + { + "epoch": 1.26, + "learning_rate": 0.00032064083299000067, + "loss": 3.1068, + "step": 4344 + }, + { + "epoch": 1.26, + "learning_rate": 0.00032042187822945643, + "loss": 3.3139, + "step": 4345 + }, + { + "epoch": 1.26, + "learning_rate": 0.0003202029629984991, + "loss": 3.1033, + "step": 4346 + }, + { + "epoch": 1.26, + "learning_rate": 0.00031998408734531673, + "loss": 3.1995, + "step": 4347 + }, + { + "epoch": 1.26, + "learning_rate": 0.0003197652513180898, + "loss": 3.2368, + "step": 4348 + }, + { + "epoch": 1.26, + "learning_rate": 0.000319546454964989, + "loss": 3.2304, + "step": 4349 + }, + { + "epoch": 1.26, + "learning_rate": 0.0003193276983341773, + "loss": 3.081, + "step": 4350 + }, + { + "epoch": 1.26, + "learning_rate": 0.00031910898147380806, + "loss": 3.1436, + "step": 4351 + }, + { + "epoch": 1.26, + "learning_rate": 0.00031889030443202674, + "loss": 3.1729, + "step": 4352 + }, + { + "epoch": 1.26, + "learning_rate": 0.000318671667256969, + "loss": 3.2123, + "step": 4353 + }, + { + "epoch": 1.26, + "learning_rate": 0.00031845306999676273, + "loss": 3.2762, + "step": 4354 + }, + { + "epoch": 1.26, + "learning_rate": 0.0003182345126995265, + "loss": 3.1385, + "step": 4355 + }, + { + "epoch": 1.26, + "learning_rate": 0.0003180159954133699, + "loss": 3.1416, + "step": 4356 + }, + { + "epoch": 1.26, + "learning_rate": 0.0003177975181863945, + "loss": 3.2634, + "step": 4357 + }, + { + "epoch": 1.26, + "learning_rate": 0.00031757908106669195, + "loss": 3.3148, + "step": 4358 + }, + { + "epoch": 1.26, + "learning_rate": 0.0003173606841023461, + "loss": 3.2412, + "step": 4359 + }, + { + "epoch": 1.26, + "learning_rate": 0.0003171423273414312, + "loss": 3.0294, + "step": 4360 + }, + { + "epoch": 1.26, + "learning_rate": 0.0003169240108320135, + "loss": 3.0657, + "step": 4361 + }, + { + "epoch": 1.26, + "learning_rate": 0.00031670573462214927, + "loss": 3.2326, + "step": 4362 + }, + { + "epoch": 1.26, + "learning_rate": 0.0003164874987598868, + "loss": 3.2638, + "step": 4363 + }, + { + "epoch": 1.26, + "learning_rate": 0.00031626930329326533, + "loss": 3.1991, + "step": 4364 + }, + { + "epoch": 1.26, + "learning_rate": 0.0003160511482703147, + "loss": 3.0151, + "step": 4365 + }, + { + "epoch": 1.26, + "learning_rate": 0.0003158330337390565, + "loss": 3.2689, + "step": 4366 + }, + { + "epoch": 1.26, + "learning_rate": 0.0003156149597475029, + "loss": 3.2691, + "step": 4367 + }, + { + "epoch": 1.27, + "learning_rate": 0.00031539692634365783, + "loss": 3.1791, + "step": 4368 + }, + { + "epoch": 1.27, + "learning_rate": 0.00031517893357551534, + "loss": 3.1898, + "step": 4369 + }, + { + "epoch": 1.27, + "learning_rate": 0.00031496098149106147, + "loss": 3.1655, + "step": 4370 + }, + { + "epoch": 1.27, + "learning_rate": 0.0003147430701382724, + "loss": 3.2061, + "step": 4371 + }, + { + "epoch": 1.27, + "learning_rate": 0.0003145251995651162, + "loss": 3.1195, + "step": 4372 + }, + { + "epoch": 1.27, + "learning_rate": 0.00031430736981955145, + "loss": 3.2595, + "step": 4373 + }, + { + "epoch": 1.27, + "learning_rate": 0.00031408958094952756, + "loss": 3.0659, + "step": 4374 + }, + { + "epoch": 1.27, + "learning_rate": 0.0003138718330029857, + "loss": 3.0981, + "step": 4375 + }, + { + "epoch": 1.27, + "learning_rate": 0.0003136541260278574, + "loss": 3.0637, + "step": 4376 + }, + { + "epoch": 1.27, + "learning_rate": 0.0003134364600720654, + "loss": 3.1966, + "step": 4377 + }, + { + "epoch": 1.27, + "learning_rate": 0.0003132188351835232, + "loss": 3.0912, + "step": 4378 + }, + { + "epoch": 1.27, + "learning_rate": 0.0003130012514101357, + "loss": 3.2594, + "step": 4379 + }, + { + "epoch": 1.27, + "learning_rate": 0.0003127837087997981, + "loss": 3.0932, + "step": 4380 + }, + { + "epoch": 1.27, + "learning_rate": 0.0003125662074003972, + "loss": 3.3003, + "step": 4381 + }, + { + "epoch": 1.27, + "learning_rate": 0.00031234874725981045, + "loss": 3.1608, + "step": 4382 + }, + { + "epoch": 1.27, + "learning_rate": 0.000312131328425906, + "loss": 3.1476, + "step": 4383 + }, + { + "epoch": 1.27, + "learning_rate": 0.00031191395094654317, + "loss": 3.1319, + "step": 4384 + }, + { + "epoch": 1.27, + "learning_rate": 0.0003116966148695721, + "loss": 3.112, + "step": 4385 + }, + { + "epoch": 1.27, + "learning_rate": 0.0003114793202428342, + "loss": 3.2105, + "step": 4386 + }, + { + "epoch": 1.27, + "learning_rate": 0.00031126206711416083, + "loss": 3.1714, + "step": 4387 + }, + { + "epoch": 1.27, + "learning_rate": 0.0003110448555313752, + "loss": 3.1959, + "step": 4388 + }, + { + "epoch": 1.27, + "learning_rate": 0.00031082768554229064, + "loss": 3.1005, + "step": 4389 + }, + { + "epoch": 1.27, + "learning_rate": 0.000310610557194712, + "loss": 3.1843, + "step": 4390 + }, + { + "epoch": 1.27, + "learning_rate": 0.0003103934705364344, + "loss": 3.056, + "step": 4391 + }, + { + "epoch": 1.27, + "learning_rate": 0.00031017642561524385, + "loss": 3.1208, + "step": 4392 + }, + { + "epoch": 1.27, + "learning_rate": 0.00030995942247891747, + "loss": 3.0345, + "step": 4393 + }, + { + "epoch": 1.27, + "learning_rate": 0.00030974246117522285, + "loss": 3.1842, + "step": 4394 + }, + { + "epoch": 1.27, + "learning_rate": 0.00030952554175191894, + "loss": 3.1379, + "step": 4395 + }, + { + "epoch": 1.27, + "learning_rate": 0.00030930866425675453, + "loss": 3.2579, + "step": 4396 + }, + { + "epoch": 1.27, + "learning_rate": 0.0003090918287374701, + "loss": 3.0501, + "step": 4397 + }, + { + "epoch": 1.27, + "learning_rate": 0.00030887503524179637, + "loss": 3.2008, + "step": 4398 + }, + { + "epoch": 1.27, + "learning_rate": 0.0003086582838174551, + "loss": 3.1131, + "step": 4399 + }, + { + "epoch": 1.27, + "learning_rate": 0.0003084415745121586, + "loss": 3.2033, + "step": 4400 + }, + { + "epoch": 1.27, + "learning_rate": 0.0003082249073736097, + "loss": 3.0802, + "step": 4401 + }, + { + "epoch": 1.27, + "learning_rate": 0.0003080082824495024, + "loss": 3.2029, + "step": 4402 + }, + { + "epoch": 1.28, + "learning_rate": 0.0003077916997875211, + "loss": 3.2674, + "step": 4403 + }, + { + "epoch": 1.28, + "learning_rate": 0.0003075751594353413, + "loss": 3.1842, + "step": 4404 + }, + { + "epoch": 1.28, + "learning_rate": 0.00030735866144062843, + "loss": 3.1468, + "step": 4405 + }, + { + "epoch": 1.28, + "learning_rate": 0.00030714220585103933, + "loss": 3.118, + "step": 4406 + }, + { + "epoch": 1.28, + "learning_rate": 0.00030692579271422124, + "loss": 3.1837, + "step": 4407 + }, + { + "epoch": 1.28, + "learning_rate": 0.00030670942207781205, + "loss": 3.0979, + "step": 4408 + }, + { + "epoch": 1.28, + "learning_rate": 0.00030649309398944033, + "loss": 3.1452, + "step": 4409 + }, + { + "epoch": 1.28, + "learning_rate": 0.0003062768084967249, + "loss": 3.2495, + "step": 4410 + }, + { + "epoch": 1.28, + "learning_rate": 0.0003060605656472759, + "loss": 3.1266, + "step": 4411 + }, + { + "epoch": 1.28, + "learning_rate": 0.0003058443654886935, + "loss": 3.1904, + "step": 4412 + }, + { + "epoch": 1.28, + "learning_rate": 0.00030562820806856906, + "loss": 3.1553, + "step": 4413 + }, + { + "epoch": 1.28, + "learning_rate": 0.00030541209343448375, + "loss": 3.0798, + "step": 4414 + }, + { + "epoch": 1.28, + "learning_rate": 0.00030519602163400994, + "loss": 3.1815, + "step": 4415 + }, + { + "epoch": 1.28, + "learning_rate": 0.00030497999271471033, + "loss": 3.0954, + "step": 4416 + }, + { + "epoch": 1.28, + "learning_rate": 0.0003047640067241385, + "loss": 3.101, + "step": 4417 + }, + { + "epoch": 1.28, + "learning_rate": 0.00030454806370983794, + "loss": 3.1149, + "step": 4418 + }, + { + "epoch": 1.28, + "learning_rate": 0.00030433216371934314, + "loss": 3.2756, + "step": 4419 + }, + { + "epoch": 1.28, + "learning_rate": 0.00030411630680017923, + "loss": 3.2381, + "step": 4420 + }, + { + "epoch": 1.28, + "learning_rate": 0.0003039004929998613, + "loss": 3.1258, + "step": 4421 + }, + { + "epoch": 1.28, + "learning_rate": 0.0003036847223658958, + "loss": 3.2076, + "step": 4422 + }, + { + "epoch": 1.28, + "learning_rate": 0.00030346899494577864, + "loss": 3.2223, + "step": 4423 + }, + { + "epoch": 1.28, + "learning_rate": 0.00030325331078699707, + "loss": 3.1679, + "step": 4424 + }, + { + "epoch": 1.28, + "learning_rate": 0.0003030376699370284, + "loss": 3.1179, + "step": 4425 + }, + { + "epoch": 1.28, + "learning_rate": 0.00030282207244334083, + "loss": 3.1891, + "step": 4426 + }, + { + "epoch": 1.28, + "learning_rate": 0.00030260651835339224, + "loss": 3.1348, + "step": 4427 + }, + { + "epoch": 1.28, + "learning_rate": 0.00030239100771463156, + "loss": 3.1525, + "step": 4428 + }, + { + "epoch": 1.28, + "learning_rate": 0.00030217554057449826, + "loss": 3.1918, + "step": 4429 + }, + { + "epoch": 1.28, + "learning_rate": 0.0003019601169804216, + "loss": 3.0102, + "step": 4430 + }, + { + "epoch": 1.28, + "learning_rate": 0.00030174473697982206, + "loss": 3.1344, + "step": 4431 + }, + { + "epoch": 1.28, + "learning_rate": 0.0003015294006201097, + "loss": 3.1769, + "step": 4432 + }, + { + "epoch": 1.28, + "learning_rate": 0.00030131410794868564, + "loss": 3.1818, + "step": 4433 + }, + { + "epoch": 1.28, + "learning_rate": 0.0003010988590129409, + "loss": 3.0946, + "step": 4434 + }, + { + "epoch": 1.28, + "learning_rate": 0.0003008836538602575, + "loss": 3.1929, + "step": 4435 + }, + { + "epoch": 1.28, + "learning_rate": 0.00030066849253800705, + "loss": 3.2949, + "step": 4436 + }, + { + "epoch": 1.28, + "learning_rate": 0.00030045337509355187, + "loss": 3.136, + "step": 4437 + }, + { + "epoch": 1.29, + "learning_rate": 0.00030023830157424505, + "loss": 3.1546, + "step": 4438 + }, + { + "epoch": 1.29, + "learning_rate": 0.00030002327202742897, + "loss": 3.1719, + "step": 4439 + }, + { + "epoch": 1.29, + "learning_rate": 0.0002998082865004374, + "loss": 3.1127, + "step": 4440 + }, + { + "epoch": 1.29, + "learning_rate": 0.00029959334504059377, + "loss": 3.0611, + "step": 4441 + }, + { + "epoch": 1.29, + "learning_rate": 0.0002993784476952123, + "loss": 3.1246, + "step": 4442 + }, + { + "epoch": 1.29, + "learning_rate": 0.00029916359451159677, + "loss": 3.0966, + "step": 4443 + }, + { + "epoch": 1.29, + "learning_rate": 0.0002989487855370421, + "loss": 3.1859, + "step": 4444 + }, + { + "epoch": 1.29, + "learning_rate": 0.00029873402081883276, + "loss": 3.2014, + "step": 4445 + }, + { + "epoch": 1.29, + "learning_rate": 0.0002985193004042438, + "loss": 3.1886, + "step": 4446 + }, + { + "epoch": 1.29, + "learning_rate": 0.00029830462434054085, + "loss": 3.2714, + "step": 4447 + }, + { + "epoch": 1.29, + "learning_rate": 0.0002980899926749788, + "loss": 3.1641, + "step": 4448 + }, + { + "epoch": 1.29, + "learning_rate": 0.00029787540545480397, + "loss": 3.2276, + "step": 4449 + }, + { + "epoch": 1.29, + "learning_rate": 0.00029766086272725194, + "loss": 3.1944, + "step": 4450 + }, + { + "epoch": 1.29, + "learning_rate": 0.0002974463645395492, + "loss": 3.1607, + "step": 4451 + }, + { + "epoch": 1.29, + "learning_rate": 0.0002972319109389118, + "loss": 3.22, + "step": 4452 + }, + { + "epoch": 1.29, + "learning_rate": 0.0002970175019725465, + "loss": 3.1153, + "step": 4453 + }, + { + "epoch": 1.29, + "learning_rate": 0.00029680313768764985, + "loss": 3.079, + "step": 4454 + }, + { + "epoch": 1.29, + "learning_rate": 0.0002965888181314088, + "loss": 3.2596, + "step": 4455 + }, + { + "epoch": 1.29, + "learning_rate": 0.0002963745433510005, + "loss": 3.2678, + "step": 4456 + }, + { + "epoch": 1.29, + "learning_rate": 0.00029616031339359185, + "loss": 3.1018, + "step": 4457 + }, + { + "epoch": 1.29, + "learning_rate": 0.0002959461283063404, + "loss": 3.1338, + "step": 4458 + }, + { + "epoch": 1.29, + "learning_rate": 0.00029573198813639343, + "loss": 3.1867, + "step": 4459 + }, + { + "epoch": 1.29, + "learning_rate": 0.0002955178929308888, + "loss": 3.2485, + "step": 4460 + }, + { + "epoch": 1.29, + "learning_rate": 0.0002953038427369537, + "loss": 3.0609, + "step": 4461 + }, + { + "epoch": 1.29, + "learning_rate": 0.00029508983760170637, + "loss": 3.1474, + "step": 4462 + }, + { + "epoch": 1.29, + "learning_rate": 0.00029487587757225407, + "loss": 3.0304, + "step": 4463 + }, + { + "epoch": 1.29, + "learning_rate": 0.0002946619626956952, + "loss": 3.0586, + "step": 4464 + }, + { + "epoch": 1.29, + "learning_rate": 0.0002944480930191176, + "loss": 3.2352, + "step": 4465 + }, + { + "epoch": 1.29, + "learning_rate": 0.0002942342685895989, + "loss": 3.1061, + "step": 4466 + }, + { + "epoch": 1.29, + "learning_rate": 0.00029402048945420756, + "loss": 3.0077, + "step": 4467 + }, + { + "epoch": 1.29, + "learning_rate": 0.0002938067556600015, + "loss": 3.0736, + "step": 4468 + }, + { + "epoch": 1.29, + "learning_rate": 0.000293593067254029, + "loss": 3.1296, + "step": 4469 + }, + { + "epoch": 1.29, + "learning_rate": 0.00029337942428332784, + "loss": 3.1422, + "step": 4470 + }, + { + "epoch": 1.29, + "learning_rate": 0.0002931658267949264, + "loss": 3.3099, + "step": 4471 + }, + { + "epoch": 1.3, + "learning_rate": 0.00029295227483584253, + "loss": 3.0691, + "step": 4472 + }, + { + "epoch": 1.3, + "learning_rate": 0.0002927387684530846, + "loss": 3.2537, + "step": 4473 + }, + { + "epoch": 1.3, + "learning_rate": 0.0002925253076936505, + "loss": 3.2675, + "step": 4474 + }, + { + "epoch": 1.3, + "learning_rate": 0.00029231189260452807, + "loss": 3.202, + "step": 4475 + }, + { + "epoch": 1.3, + "learning_rate": 0.00029209852323269536, + "loss": 3.1449, + "step": 4476 + }, + { + "epoch": 1.3, + "learning_rate": 0.0002918851996251203, + "loss": 3.1791, + "step": 4477 + }, + { + "epoch": 1.3, + "learning_rate": 0.00029167192182876075, + "loss": 3.1496, + "step": 4478 + }, + { + "epoch": 1.3, + "learning_rate": 0.0002914586898905642, + "loss": 3.1588, + "step": 4479 + }, + { + "epoch": 1.3, + "learning_rate": 0.0002912455038574686, + "loss": 3.1174, + "step": 4480 + }, + { + "epoch": 1.3, + "learning_rate": 0.000291032363776401, + "loss": 3.2206, + "step": 4481 + }, + { + "epoch": 1.3, + "learning_rate": 0.00029081926969427917, + "loss": 3.1654, + "step": 4482 + }, + { + "epoch": 1.3, + "learning_rate": 0.0002906062216580105, + "loss": 3.1472, + "step": 4483 + }, + { + "epoch": 1.3, + "learning_rate": 0.00029039321971449173, + "loss": 3.168, + "step": 4484 + }, + { + "epoch": 1.3, + "learning_rate": 0.00029018026391061023, + "loss": 3.3089, + "step": 4485 + }, + { + "epoch": 1.3, + "learning_rate": 0.00028996735429324255, + "loss": 3.1458, + "step": 4486 + }, + { + "epoch": 1.3, + "learning_rate": 0.00028975449090925566, + "loss": 3.1569, + "step": 4487 + }, + { + "epoch": 1.3, + "learning_rate": 0.0002895416738055057, + "loss": 3.1133, + "step": 4488 + }, + { + "epoch": 1.3, + "learning_rate": 0.0002893289030288393, + "loss": 3.1959, + "step": 4489 + }, + { + "epoch": 1.3, + "learning_rate": 0.00028911617862609223, + "loss": 3.2427, + "step": 4490 + }, + { + "epoch": 1.3, + "learning_rate": 0.00028890350064409066, + "loss": 3.1281, + "step": 4491 + }, + { + "epoch": 1.3, + "learning_rate": 0.0002886908691296504, + "loss": 3.2941, + "step": 4492 + }, + { + "epoch": 1.3, + "learning_rate": 0.0002884782841295764, + "loss": 3.1643, + "step": 4493 + }, + { + "epoch": 1.3, + "learning_rate": 0.00028826574569066455, + "loss": 3.1858, + "step": 4494 + }, + { + "epoch": 1.3, + "learning_rate": 0.0002880532538596992, + "loss": 3.2509, + "step": 4495 + }, + { + "epoch": 1.3, + "learning_rate": 0.00028784080868345547, + "loss": 3.191, + "step": 4496 + }, + { + "epoch": 1.3, + "learning_rate": 0.00028762841020869747, + "loss": 3.0864, + "step": 4497 + }, + { + "epoch": 1.3, + "learning_rate": 0.0002874160584821798, + "loss": 3.1358, + "step": 4498 + }, + { + "epoch": 1.3, + "learning_rate": 0.00028720375355064575, + "loss": 3.2339, + "step": 4499 + }, + { + "epoch": 1.3, + "learning_rate": 0.0002869914954608293, + "loss": 3.144, + "step": 4500 + }, + { + "epoch": 1.3, + "learning_rate": 0.0002867792842594538, + "loss": 3.2085, + "step": 4501 + }, + { + "epoch": 1.3, + "learning_rate": 0.00028656711999323176, + "loss": 3.2408, + "step": 4502 + }, + { + "epoch": 1.3, + "learning_rate": 0.0002863550027088662, + "loss": 2.9579, + "step": 4503 + }, + { + "epoch": 1.3, + "learning_rate": 0.000286142932453049, + "loss": 3.2115, + "step": 4504 + }, + { + "epoch": 1.3, + "learning_rate": 0.00028593090927246244, + "loss": 3.1262, + "step": 4505 + }, + { + "epoch": 1.3, + "learning_rate": 0.00028571893321377776, + "loss": 3.2156, + "step": 4506 + }, + { + "epoch": 1.31, + "learning_rate": 0.00028550700432365645, + "loss": 3.2541, + "step": 4507 + }, + { + "epoch": 1.31, + "learning_rate": 0.00028529512264874893, + "loss": 3.2622, + "step": 4508 + }, + { + "epoch": 1.31, + "learning_rate": 0.00028508328823569567, + "loss": 3.2184, + "step": 4509 + }, + { + "epoch": 1.31, + "learning_rate": 0.00028487150113112706, + "loss": 3.0903, + "step": 4510 + }, + { + "epoch": 1.31, + "learning_rate": 0.0002846597613816622, + "loss": 3.2262, + "step": 4511 + }, + { + "epoch": 1.31, + "learning_rate": 0.0002844480690339106, + "loss": 3.2081, + "step": 4512 + }, + { + "epoch": 1.31, + "learning_rate": 0.0002842364241344707, + "loss": 3.1509, + "step": 4513 + }, + { + "epoch": 1.31, + "learning_rate": 0.000284024826729931, + "loss": 3.0985, + "step": 4514 + }, + { + "epoch": 1.31, + "learning_rate": 0.00028381327686686917, + "loss": 3.2135, + "step": 4515 + }, + { + "epoch": 1.31, + "learning_rate": 0.00028360177459185265, + "loss": 3.0244, + "step": 4516 + }, + { + "epoch": 1.31, + "learning_rate": 0.0002833903199514383, + "loss": 3.0926, + "step": 4517 + }, + { + "epoch": 1.31, + "learning_rate": 0.00028317891299217235, + "loss": 3.0954, + "step": 4518 + }, + { + "epoch": 1.31, + "learning_rate": 0.00028296755376059106, + "loss": 3.2463, + "step": 4519 + }, + { + "epoch": 1.31, + "learning_rate": 0.0002827562423032195, + "loss": 3.1249, + "step": 4520 + }, + { + "epoch": 1.31, + "learning_rate": 0.0002825449786665729, + "loss": 3.2133, + "step": 4521 + }, + { + "epoch": 1.31, + "learning_rate": 0.00028233376289715507, + "loss": 3.1463, + "step": 4522 + }, + { + "epoch": 1.31, + "learning_rate": 0.00028212259504146043, + "loss": 3.1391, + "step": 4523 + }, + { + "epoch": 1.31, + "learning_rate": 0.0002819114751459718, + "loss": 3.2367, + "step": 4524 + }, + { + "epoch": 1.31, + "learning_rate": 0.00028170040325716216, + "loss": 3.0864, + "step": 4525 + }, + { + "epoch": 1.31, + "learning_rate": 0.00028148937942149347, + "loss": 3.1327, + "step": 4526 + }, + { + "epoch": 1.31, + "learning_rate": 0.0002812784036854172, + "loss": 3.2885, + "step": 4527 + }, + { + "epoch": 1.31, + "learning_rate": 0.00028106747609537485, + "loss": 3.2478, + "step": 4528 + }, + { + "epoch": 1.31, + "learning_rate": 0.00028085659669779616, + "loss": 3.0744, + "step": 4529 + }, + { + "epoch": 1.31, + "learning_rate": 0.0002806457655391015, + "loss": 3.163, + "step": 4530 + }, + { + "epoch": 1.31, + "learning_rate": 0.00028043498266569944, + "loss": 3.2394, + "step": 4531 + }, + { + "epoch": 1.31, + "learning_rate": 0.000280224248123989, + "loss": 3.1724, + "step": 4532 + }, + { + "epoch": 1.31, + "learning_rate": 0.00028001356196035766, + "loss": 3.2768, + "step": 4533 + }, + { + "epoch": 1.31, + "learning_rate": 0.0002798029242211828, + "loss": 3.1896, + "step": 4534 + }, + { + "epoch": 1.31, + "learning_rate": 0.0002795923349528311, + "loss": 3.079, + "step": 4535 + }, + { + "epoch": 1.31, + "learning_rate": 0.0002793817942016585, + "loss": 3.1793, + "step": 4536 + }, + { + "epoch": 1.31, + "learning_rate": 0.00027917130201401025, + "loss": 3.1879, + "step": 4537 + }, + { + "epoch": 1.31, + "learning_rate": 0.0002789608584362205, + "loss": 3.2226, + "step": 4538 + }, + { + "epoch": 1.31, + "learning_rate": 0.00027875046351461365, + "loss": 3.0989, + "step": 4539 + }, + { + "epoch": 1.31, + "learning_rate": 0.0002785401172955022, + "loss": 3.2153, + "step": 4540 + }, + { + "epoch": 1.32, + "learning_rate": 0.00027832981982518925, + "loss": 3.2679, + "step": 4541 + }, + { + "epoch": 1.32, + "learning_rate": 0.00027811957114996577, + "loss": 3.2148, + "step": 4542 + }, + { + "epoch": 1.32, + "learning_rate": 0.0002779093713161131, + "loss": 3.3309, + "step": 4543 + }, + { + "epoch": 1.32, + "learning_rate": 0.0002776992203699014, + "loss": 3.2397, + "step": 4544 + }, + { + "epoch": 1.32, + "learning_rate": 0.0002774891183575902, + "loss": 3.2486, + "step": 4545 + }, + { + "epoch": 1.32, + "learning_rate": 0.0002772790653254278, + "loss": 3.1203, + "step": 4546 + }, + { + "epoch": 1.32, + "learning_rate": 0.0002770690613196525, + "loss": 3.2269, + "step": 4547 + }, + { + "epoch": 1.32, + "learning_rate": 0.0002768591063864911, + "loss": 3.1521, + "step": 4548 + }, + { + "epoch": 1.32, + "learning_rate": 0.00027664920057215975, + "loss": 3.2765, + "step": 4549 + }, + { + "epoch": 1.32, + "learning_rate": 0.00027643934392286427, + "loss": 3.1786, + "step": 4550 + }, + { + "epoch": 1.32, + "learning_rate": 0.000276229536484799, + "loss": 3.1451, + "step": 4551 + }, + { + "epoch": 1.32, + "learning_rate": 0.0002760197783041477, + "loss": 3.2161, + "step": 4552 + }, + { + "epoch": 1.32, + "learning_rate": 0.00027581006942708363, + "loss": 3.2235, + "step": 4553 + }, + { + "epoch": 1.32, + "learning_rate": 0.00027560040989976894, + "loss": 3.1718, + "step": 4554 + }, + { + "epoch": 1.32, + "learning_rate": 0.00027539079976835444, + "loss": 3.1093, + "step": 4555 + }, + { + "epoch": 1.32, + "learning_rate": 0.00027518123907898105, + "loss": 3.0281, + "step": 4556 + }, + { + "epoch": 1.32, + "learning_rate": 0.0002749717278777779, + "loss": 3.0718, + "step": 4557 + }, + { + "epoch": 1.32, + "learning_rate": 0.00027476226621086353, + "loss": 3.2629, + "step": 4558 + }, + { + "epoch": 1.32, + "learning_rate": 0.00027455285412434597, + "loss": 3.1003, + "step": 4559 + }, + { + "epoch": 1.32, + "learning_rate": 0.0002743434916643216, + "loss": 3.1979, + "step": 4560 + }, + { + "epoch": 1.32, + "learning_rate": 0.0002741341788768764, + "loss": 3.0718, + "step": 4561 + }, + { + "epoch": 1.32, + "learning_rate": 0.0002739249158080855, + "loss": 3.2098, + "step": 4562 + }, + { + "epoch": 1.32, + "learning_rate": 0.0002737157025040129, + "loss": 3.1909, + "step": 4563 + }, + { + "epoch": 1.32, + "learning_rate": 0.0002735065390107112, + "loss": 3.2881, + "step": 4564 + }, + { + "epoch": 1.32, + "learning_rate": 0.00027329742537422297, + "loss": 3.258, + "step": 4565 + }, + { + "epoch": 1.32, + "learning_rate": 0.0002730883616405791, + "loss": 3.1274, + "step": 4566 + }, + { + "epoch": 1.32, + "learning_rate": 0.00027287934785579936, + "loss": 3.2338, + "step": 4567 + }, + { + "epoch": 1.32, + "learning_rate": 0.0002726703840658934, + "loss": 3.2258, + "step": 4568 + }, + { + "epoch": 1.32, + "learning_rate": 0.00027246147031685886, + "loss": 3.2384, + "step": 4569 + }, + { + "epoch": 1.32, + "learning_rate": 0.000272252606654683, + "loss": 3.0686, + "step": 4570 + }, + { + "epoch": 1.32, + "learning_rate": 0.0002720437931253419, + "loss": 3.3101, + "step": 4571 + }, + { + "epoch": 1.32, + "learning_rate": 0.0002718350297748008, + "loss": 3.0962, + "step": 4572 + }, + { + "epoch": 1.32, + "learning_rate": 0.00027162631664901323, + "loss": 3.0311, + "step": 4573 + }, + { + "epoch": 1.32, + "learning_rate": 0.0002714176537939225, + "loss": 3.1796, + "step": 4574 + }, + { + "epoch": 1.32, + "learning_rate": 0.0002712090412554604, + "loss": 3.1838, + "step": 4575 + }, + { + "epoch": 1.33, + "learning_rate": 0.00027100047907954733, + "loss": 3.3009, + "step": 4576 + }, + { + "epoch": 1.33, + "learning_rate": 0.00027079196731209353, + "loss": 3.1507, + "step": 4577 + }, + { + "epoch": 1.33, + "learning_rate": 0.00027058350599899706, + "loss": 3.1066, + "step": 4578 + }, + { + "epoch": 1.33, + "learning_rate": 0.0002703750951861458, + "loss": 3.1002, + "step": 4579 + }, + { + "epoch": 1.33, + "learning_rate": 0.0002701667349194159, + "loss": 3.0537, + "step": 4580 + }, + { + "epoch": 1.33, + "learning_rate": 0.000269958425244673, + "loss": 3.2584, + "step": 4581 + }, + { + "epoch": 1.33, + "learning_rate": 0.0002697501662077707, + "loss": 3.0661, + "step": 4582 + }, + { + "epoch": 1.33, + "learning_rate": 0.00026954195785455244, + "loss": 3.1509, + "step": 4583 + }, + { + "epoch": 1.33, + "learning_rate": 0.00026933380023084976, + "loss": 3.0836, + "step": 4584 + }, + { + "epoch": 1.33, + "learning_rate": 0.00026912569338248315, + "loss": 3.1636, + "step": 4585 + }, + { + "epoch": 1.33, + "learning_rate": 0.0002689176373552622, + "loss": 3.2025, + "step": 4586 + }, + { + "epoch": 1.33, + "learning_rate": 0.0002687096321949853, + "loss": 3.2062, + "step": 4587 + }, + { + "epoch": 1.33, + "learning_rate": 0.00026850167794743965, + "loss": 3.0706, + "step": 4588 + }, + { + "epoch": 1.33, + "learning_rate": 0.00026829377465840075, + "loss": 3.14, + "step": 4589 + }, + { + "epoch": 1.33, + "learning_rate": 0.0002680859223736336, + "loss": 3.1051, + "step": 4590 + }, + { + "epoch": 1.33, + "learning_rate": 0.00026787812113889144, + "loss": 3.2444, + "step": 4591 + }, + { + "epoch": 1.33, + "learning_rate": 0.0002676703709999165, + "loss": 3.0053, + "step": 4592 + }, + { + "epoch": 1.33, + "learning_rate": 0.0002674626720024399, + "loss": 3.1174, + "step": 4593 + }, + { + "epoch": 1.33, + "learning_rate": 0.00026725502419218085, + "loss": 3.1158, + "step": 4594 + }, + { + "epoch": 1.33, + "learning_rate": 0.0002670474276148481, + "loss": 3.1198, + "step": 4595 + }, + { + "epoch": 1.33, + "learning_rate": 0.0002668398823161389, + "loss": 3.1602, + "step": 4596 + }, + { + "epoch": 1.33, + "learning_rate": 0.00026663238834173897, + "loss": 3.0769, + "step": 4597 + }, + { + "epoch": 1.33, + "learning_rate": 0.0002664249457373228, + "loss": 3.1638, + "step": 4598 + }, + { + "epoch": 1.33, + "learning_rate": 0.00026621755454855385, + "loss": 3.1523, + "step": 4599 + }, + { + "epoch": 1.33, + "learning_rate": 0.00026601021482108373, + "loss": 3.2395, + "step": 4600 + }, + { + "epoch": 1.33, + "learning_rate": 0.0002658029266005534, + "loss": 3.1136, + "step": 4601 + }, + { + "epoch": 1.33, + "learning_rate": 0.000265595689932592, + "loss": 3.0708, + "step": 4602 + }, + { + "epoch": 1.33, + "learning_rate": 0.00026538850486281705, + "loss": 3.2708, + "step": 4603 + }, + { + "epoch": 1.33, + "learning_rate": 0.00026518137143683563, + "loss": 3.1723, + "step": 4604 + }, + { + "epoch": 1.33, + "learning_rate": 0.0002649742897002426, + "loss": 3.272, + "step": 4605 + }, + { + "epoch": 1.33, + "learning_rate": 0.00026476725969862226, + "loss": 3.1536, + "step": 4606 + }, + { + "epoch": 1.33, + "learning_rate": 0.0002645602814775464, + "loss": 3.1652, + "step": 4607 + }, + { + "epoch": 1.33, + "learning_rate": 0.0002643533550825766, + "loss": 3.2234, + "step": 4608 + }, + { + "epoch": 1.33, + "learning_rate": 0.00026414648055926197, + "loss": 3.0745, + "step": 4609 + }, + { + "epoch": 1.34, + "learning_rate": 0.0002639396579531412, + "loss": 3.1498, + "step": 4610 + }, + { + "epoch": 1.34, + "learning_rate": 0.00026373288730974085, + "loss": 3.2241, + "step": 4611 + }, + { + "epoch": 1.34, + "learning_rate": 0.00026352616867457597, + "loss": 3.1241, + "step": 4612 + }, + { + "epoch": 1.34, + "learning_rate": 0.0002633195020931507, + "loss": 3.1765, + "step": 4613 + }, + { + "epoch": 1.34, + "learning_rate": 0.0002631128876109575, + "loss": 3.1491, + "step": 4614 + }, + { + "epoch": 1.34, + "learning_rate": 0.00026290632527347746, + "loss": 3.1721, + "step": 4615 + }, + { + "epoch": 1.34, + "learning_rate": 0.0002626998151261798, + "loss": 3.3024, + "step": 4616 + }, + { + "epoch": 1.34, + "learning_rate": 0.0002624933572145227, + "loss": 3.1746, + "step": 4617 + }, + { + "epoch": 1.34, + "learning_rate": 0.0002622869515839524, + "loss": 3.0736, + "step": 4618 + }, + { + "epoch": 1.34, + "learning_rate": 0.0002620805982799042, + "loss": 3.1496, + "step": 4619 + }, + { + "epoch": 1.34, + "learning_rate": 0.00026187429734780143, + "loss": 3.1626, + "step": 4620 + }, + { + "epoch": 1.34, + "learning_rate": 0.0002616680488330558, + "loss": 3.1653, + "step": 4621 + }, + { + "epoch": 1.34, + "learning_rate": 0.00026146185278106803, + "loss": 3.1045, + "step": 4622 + }, + { + "epoch": 1.34, + "learning_rate": 0.00026125570923722673, + "loss": 3.1786, + "step": 4623 + }, + { + "epoch": 1.34, + "learning_rate": 0.00026104961824690963, + "loss": 3.3294, + "step": 4624 + }, + { + "epoch": 1.34, + "learning_rate": 0.0002608435798554819, + "loss": 3.1639, + "step": 4625 + }, + { + "epoch": 1.34, + "learning_rate": 0.0002606375941082981, + "loss": 3.2586, + "step": 4626 + }, + { + "epoch": 1.34, + "learning_rate": 0.0002604316610507005, + "loss": 3.1593, + "step": 4627 + }, + { + "epoch": 1.34, + "learning_rate": 0.0002602257807280203, + "loss": 3.1559, + "step": 4628 + }, + { + "epoch": 1.34, + "learning_rate": 0.0002600199531855768, + "loss": 3.1758, + "step": 4629 + }, + { + "epoch": 1.34, + "learning_rate": 0.0002598141784686775, + "loss": 3.3047, + "step": 4630 + }, + { + "epoch": 1.34, + "learning_rate": 0.00025960845662261855, + "loss": 3.2599, + "step": 4631 + }, + { + "epoch": 1.34, + "learning_rate": 0.00025940278769268467, + "loss": 3.1531, + "step": 4632 + }, + { + "epoch": 1.34, + "learning_rate": 0.0002591971717241487, + "loss": 3.2921, + "step": 4633 + }, + { + "epoch": 1.34, + "learning_rate": 0.00025899160876227147, + "loss": 3.0657, + "step": 4634 + }, + { + "epoch": 1.34, + "learning_rate": 0.00025878609885230275, + "loss": 3.1355, + "step": 4635 + }, + { + "epoch": 1.34, + "learning_rate": 0.0002585806420394802, + "loss": 3.149, + "step": 4636 + }, + { + "epoch": 1.34, + "learning_rate": 0.0002583752383690301, + "loss": 3.1341, + "step": 4637 + }, + { + "epoch": 1.34, + "learning_rate": 0.00025816988788616665, + "loss": 3.116, + "step": 4638 + }, + { + "epoch": 1.34, + "learning_rate": 0.00025796459063609267, + "loss": 3.1596, + "step": 4639 + }, + { + "epoch": 1.34, + "learning_rate": 0.00025775934666399936, + "loss": 3.2647, + "step": 4640 + }, + { + "epoch": 1.34, + "learning_rate": 0.00025755415601506573, + "loss": 3.2792, + "step": 4641 + }, + { + "epoch": 1.34, + "learning_rate": 0.0002573490187344596, + "loss": 3.3075, + "step": 4642 + }, + { + "epoch": 1.34, + "learning_rate": 0.00025714393486733636, + "loss": 3.2197, + "step": 4643 + }, + { + "epoch": 1.34, + "learning_rate": 0.0002569389044588404, + "loss": 3.1239, + "step": 4644 + }, + { + "epoch": 1.35, + "learning_rate": 0.0002567339275541037, + "loss": 3.0941, + "step": 4645 + }, + { + "epoch": 1.35, + "learning_rate": 0.0002565290041982471, + "loss": 3.1523, + "step": 4646 + }, + { + "epoch": 1.35, + "learning_rate": 0.00025632413443637885, + "loss": 3.1606, + "step": 4647 + }, + { + "epoch": 1.35, + "learning_rate": 0.00025611931831359615, + "loss": 3.1903, + "step": 4648 + }, + { + "epoch": 1.35, + "learning_rate": 0.0002559145558749842, + "loss": 3.0856, + "step": 4649 + }, + { + "epoch": 1.35, + "learning_rate": 0.00025570984716561594, + "loss": 3.0458, + "step": 4650 + }, + { + "epoch": 1.35, + "learning_rate": 0.0002555051922305532, + "loss": 3.185, + "step": 4651 + }, + { + "epoch": 1.35, + "learning_rate": 0.00025530059111484514, + "loss": 3.1185, + "step": 4652 + }, + { + "epoch": 1.35, + "learning_rate": 0.00025509604386353, + "loss": 3.1567, + "step": 4653 + }, + { + "epoch": 1.35, + "learning_rate": 0.0002548915505216333, + "loss": 3.2298, + "step": 4654 + }, + { + "epoch": 1.35, + "learning_rate": 0.00025468711113416944, + "loss": 3.0566, + "step": 4655 + }, + { + "epoch": 1.35, + "learning_rate": 0.00025448272574614015, + "loss": 3.2068, + "step": 4656 + }, + { + "epoch": 1.35, + "learning_rate": 0.0002542783944025361, + "loss": 3.1994, + "step": 4657 + }, + { + "epoch": 1.35, + "learning_rate": 0.0002540741171483356, + "loss": 3.1709, + "step": 4658 + }, + { + "epoch": 1.35, + "learning_rate": 0.00025386989402850486, + "loss": 3.1098, + "step": 4659 + }, + { + "epoch": 1.35, + "learning_rate": 0.00025366572508799884, + "loss": 3.0496, + "step": 4660 + }, + { + "epoch": 1.35, + "learning_rate": 0.00025346161037175977, + "loss": 3.1481, + "step": 4661 + }, + { + "epoch": 1.35, + "learning_rate": 0.00025325754992471883, + "loss": 3.2612, + "step": 4662 + }, + { + "epoch": 1.35, + "learning_rate": 0.0002530535437917942, + "loss": 3.2384, + "step": 4663 + }, + { + "epoch": 1.35, + "learning_rate": 0.00025284959201789313, + "loss": 3.1182, + "step": 4664 + }, + { + "epoch": 1.35, + "learning_rate": 0.00025264569464791023, + "loss": 3.2053, + "step": 4665 + }, + { + "epoch": 1.35, + "learning_rate": 0.00025244185172672827, + "loss": 3.1141, + "step": 4666 + }, + { + "epoch": 1.35, + "learning_rate": 0.0002522380632992185, + "loss": 3.3139, + "step": 4667 + }, + { + "epoch": 1.35, + "learning_rate": 0.0002520343294102394, + "loss": 3.0521, + "step": 4668 + }, + { + "epoch": 1.35, + "learning_rate": 0.0002518306501046381, + "loss": 3.1451, + "step": 4669 + }, + { + "epoch": 1.35, + "learning_rate": 0.00025162702542724926, + "loss": 3.1, + "step": 4670 + }, + { + "epoch": 1.35, + "learning_rate": 0.0002514234554228959, + "loss": 3.2367, + "step": 4671 + }, + { + "epoch": 1.35, + "learning_rate": 0.0002512199401363886, + "loss": 3.0206, + "step": 4672 + }, + { + "epoch": 1.35, + "learning_rate": 0.00025101647961252635, + "loss": 3.147, + "step": 4673 + }, + { + "epoch": 1.35, + "learning_rate": 0.0002508130738960955, + "loss": 3.2224, + "step": 4674 + }, + { + "epoch": 1.35, + "learning_rate": 0.0002506097230318709, + "loss": 3.2643, + "step": 4675 + }, + { + "epoch": 1.35, + "learning_rate": 0.00025040642706461533, + "loss": 3.1765, + "step": 4676 + }, + { + "epoch": 1.35, + "learning_rate": 0.0002502031860390788, + "loss": 3.1892, + "step": 4677 + }, + { + "epoch": 1.35, + "learning_rate": 0.0002500000000000001, + "loss": 3.3372, + "step": 4678 + }, + { + "epoch": 1.36, + "learning_rate": 0.00024979686899210515, + "loss": 3.2664, + "step": 4679 + }, + { + "epoch": 1.36, + "learning_rate": 0.0002495937930601084, + "loss": 3.1426, + "step": 4680 + }, + { + "epoch": 1.36, + "learning_rate": 0.00024939077224871164, + "loss": 3.1751, + "step": 4681 + }, + { + "epoch": 1.36, + "learning_rate": 0.00024918780660260507, + "loss": 3.1972, + "step": 4682 + }, + { + "epoch": 1.36, + "learning_rate": 0.00024898489616646607, + "loss": 3.1065, + "step": 4683 + }, + { + "epoch": 1.36, + "learning_rate": 0.00024878204098496045, + "loss": 3.1524, + "step": 4684 + }, + { + "epoch": 1.36, + "learning_rate": 0.00024857924110274185, + "loss": 3.1291, + "step": 4685 + }, + { + "epoch": 1.36, + "learning_rate": 0.00024837649656445115, + "loss": 3.3093, + "step": 4686 + }, + { + "epoch": 1.36, + "learning_rate": 0.0002481738074147178, + "loss": 3.2211, + "step": 4687 + }, + { + "epoch": 1.36, + "learning_rate": 0.0002479711736981584, + "loss": 3.03, + "step": 4688 + }, + { + "epoch": 1.36, + "learning_rate": 0.00024776859545937787, + "loss": 3.2136, + "step": 4689 + }, + { + "epoch": 1.36, + "learning_rate": 0.00024756607274296845, + "loss": 3.0667, + "step": 4690 + }, + { + "epoch": 1.36, + "learning_rate": 0.0002473636055935105, + "loss": 3.1628, + "step": 4691 + }, + { + "epoch": 1.36, + "learning_rate": 0.00024716119405557215, + "loss": 3.2409, + "step": 4692 + }, + { + "epoch": 1.36, + "learning_rate": 0.0002469588381737093, + "loss": 3.1614, + "step": 4693 + }, + { + "epoch": 1.36, + "learning_rate": 0.0002467565379924653, + "loss": 3.1096, + "step": 4694 + }, + { + "epoch": 1.36, + "learning_rate": 0.0002465542935563712, + "loss": 3.1729, + "step": 4695 + }, + { + "epoch": 1.36, + "learning_rate": 0.0002463521049099465, + "loss": 3.2557, + "step": 4696 + }, + { + "epoch": 1.36, + "learning_rate": 0.0002461499720976974, + "loss": 3.1858, + "step": 4697 + }, + { + "epoch": 1.36, + "learning_rate": 0.0002459478951641188, + "loss": 3.2453, + "step": 4698 + }, + { + "epoch": 1.36, + "learning_rate": 0.0002457458741536924, + "loss": 3.1961, + "step": 4699 + }, + { + "epoch": 1.36, + "learning_rate": 0.0002455439091108883, + "loss": 3.1637, + "step": 4700 + }, + { + "epoch": 1.36, + "learning_rate": 0.00024534200008016397, + "loss": 3.1196, + "step": 4701 + }, + { + "epoch": 1.36, + "learning_rate": 0.00024514014710596464, + "loss": 3.2624, + "step": 4702 + }, + { + "epoch": 1.36, + "learning_rate": 0.00024493835023272313, + "loss": 3.0744, + "step": 4703 + }, + { + "epoch": 1.36, + "learning_rate": 0.0002447366095048597, + "loss": 3.2355, + "step": 4704 + }, + { + "epoch": 1.36, + "learning_rate": 0.0002445349249667828, + "loss": 3.1455, + "step": 4705 + }, + { + "epoch": 1.36, + "learning_rate": 0.00024433329666288773, + "loss": 3.2215, + "step": 4706 + }, + { + "epoch": 1.36, + "learning_rate": 0.0002441317246375584, + "loss": 3.1803, + "step": 4707 + }, + { + "epoch": 1.36, + "learning_rate": 0.00024393020893516537, + "loss": 3.2079, + "step": 4708 + }, + { + "epoch": 1.36, + "learning_rate": 0.00024372874960006742, + "loss": 3.0395, + "step": 4709 + }, + { + "epoch": 1.36, + "learning_rate": 0.0002435273466766107, + "loss": 3.2556, + "step": 4710 + }, + { + "epoch": 1.36, + "learning_rate": 0.00024332600020912916, + "loss": 3.0952, + "step": 4711 + }, + { + "epoch": 1.36, + "learning_rate": 0.000243124710241944, + "loss": 3.0976, + "step": 4712 + }, + { + "epoch": 1.36, + "learning_rate": 0.00024292347681936395, + "loss": 3.1068, + "step": 4713 + }, + { + "epoch": 1.37, + "learning_rate": 0.00024272229998568574, + "loss": 3.0871, + "step": 4714 + }, + { + "epoch": 1.37, + "learning_rate": 0.00024252117978519304, + "loss": 3.1564, + "step": 4715 + }, + { + "epoch": 1.37, + "learning_rate": 0.00024232011626215783, + "loss": 3.0593, + "step": 4716 + }, + { + "epoch": 1.37, + "learning_rate": 0.0002421191094608387, + "loss": 3.1595, + "step": 4717 + }, + { + "epoch": 1.37, + "learning_rate": 0.0002419181594254824, + "loss": 3.1244, + "step": 4718 + }, + { + "epoch": 1.37, + "learning_rate": 0.00024171726620032309, + "loss": 3.2051, + "step": 4719 + }, + { + "epoch": 1.37, + "learning_rate": 0.00024151642982958243, + "loss": 3.1475, + "step": 4720 + }, + { + "epoch": 1.37, + "learning_rate": 0.00024131565035746932, + "loss": 3.1166, + "step": 4721 + }, + { + "epoch": 1.37, + "learning_rate": 0.00024111492782818013, + "loss": 3.2291, + "step": 4722 + }, + { + "epoch": 1.37, + "learning_rate": 0.0002409142622858992, + "loss": 3.1165, + "step": 4723 + }, + { + "epoch": 1.37, + "learning_rate": 0.00024071365377479766, + "loss": 3.1898, + "step": 4724 + }, + { + "epoch": 1.37, + "learning_rate": 0.00024051310233903466, + "loss": 3.1328, + "step": 4725 + }, + { + "epoch": 1.37, + "learning_rate": 0.00024031260802275623, + "loss": 3.177, + "step": 4726 + }, + { + "epoch": 1.37, + "learning_rate": 0.00024011217087009635, + "loss": 3.0635, + "step": 4727 + }, + { + "epoch": 1.37, + "learning_rate": 0.00023991179092517612, + "loss": 3.2857, + "step": 4728 + }, + { + "epoch": 1.37, + "learning_rate": 0.00023971146823210438, + "loss": 3.2692, + "step": 4729 + }, + { + "epoch": 1.37, + "learning_rate": 0.00023951120283497668, + "loss": 3.1513, + "step": 4730 + }, + { + "epoch": 1.37, + "learning_rate": 0.0002393109947778768, + "loss": 3.1462, + "step": 4731 + }, + { + "epoch": 1.37, + "learning_rate": 0.00023911084410487533, + "loss": 3.0955, + "step": 4732 + }, + { + "epoch": 1.37, + "learning_rate": 0.0002389107508600301, + "loss": 3.2386, + "step": 4733 + }, + { + "epoch": 1.37, + "learning_rate": 0.000238710715087387, + "loss": 3.0783, + "step": 4734 + }, + { + "epoch": 1.37, + "learning_rate": 0.00023851073683097852, + "loss": 3.2868, + "step": 4735 + }, + { + "epoch": 1.37, + "learning_rate": 0.00023831081613482496, + "loss": 3.2019, + "step": 4736 + }, + { + "epoch": 1.37, + "learning_rate": 0.00023811095304293383, + "loss": 3.0669, + "step": 4737 + }, + { + "epoch": 1.37, + "learning_rate": 0.0002379111475993001, + "loss": 3.0716, + "step": 4738 + }, + { + "epoch": 1.37, + "learning_rate": 0.0002377113998479056, + "loss": 3.2232, + "step": 4739 + }, + { + "epoch": 1.37, + "learning_rate": 0.00023751170983271996, + "loss": 3.1723, + "step": 4740 + }, + { + "epoch": 1.37, + "learning_rate": 0.00023731207759769985, + "loss": 3.2145, + "step": 4741 + }, + { + "epoch": 1.37, + "learning_rate": 0.00023711250318678907, + "loss": 3.1971, + "step": 4742 + }, + { + "epoch": 1.37, + "learning_rate": 0.00023691298664391903, + "loss": 3.2075, + "step": 4743 + }, + { + "epoch": 1.37, + "learning_rate": 0.00023671352801300823, + "loss": 3.0842, + "step": 4744 + }, + { + "epoch": 1.37, + "learning_rate": 0.0002365141273379627, + "loss": 3.0985, + "step": 4745 + }, + { + "epoch": 1.37, + "learning_rate": 0.00023631478466267498, + "loss": 3.1266, + "step": 4746 + }, + { + "epoch": 1.37, + "learning_rate": 0.00023611550003102582, + "loss": 3.2164, + "step": 4747 + }, + { + "epoch": 1.38, + "learning_rate": 0.00023591627348688228, + "loss": 3.2108, + "step": 4748 + }, + { + "epoch": 1.38, + "learning_rate": 0.00023571710507409944, + "loss": 3.1917, + "step": 4749 + }, + { + "epoch": 1.38, + "learning_rate": 0.0002355179948365189, + "loss": 3.177, + "step": 4750 + }, + { + "epoch": 1.38, + "learning_rate": 0.00023531894281796973, + "loss": 3.2635, + "step": 4751 + }, + { + "epoch": 1.38, + "learning_rate": 0.0002351199490622683, + "loss": 3.066, + "step": 4752 + }, + { + "epoch": 1.38, + "learning_rate": 0.00023492101361321805, + "loss": 3.2129, + "step": 4753 + }, + { + "epoch": 1.38, + "learning_rate": 0.00023472213651460976, + "loss": 3.209, + "step": 4754 + }, + { + "epoch": 1.38, + "learning_rate": 0.0002345233178102209, + "loss": 3.3059, + "step": 4755 + }, + { + "epoch": 1.38, + "learning_rate": 0.00023432455754381675, + "loss": 3.1397, + "step": 4756 + }, + { + "epoch": 1.38, + "learning_rate": 0.0002341258557591489, + "loss": 3.1549, + "step": 4757 + }, + { + "epoch": 1.38, + "learning_rate": 0.00023392721249995696, + "loss": 3.1978, + "step": 4758 + }, + { + "epoch": 1.38, + "learning_rate": 0.00023372862780996707, + "loss": 3.2501, + "step": 4759 + }, + { + "epoch": 1.38, + "learning_rate": 0.0002335301017328924, + "loss": 3.1446, + "step": 4760 + }, + { + "epoch": 1.38, + "learning_rate": 0.00023333163431243364, + "loss": 3.137, + "step": 4761 + }, + { + "epoch": 1.38, + "learning_rate": 0.0002331332255922784, + "loss": 3.1342, + "step": 4762 + }, + { + "epoch": 1.38, + "learning_rate": 0.0002329348756161015, + "loss": 3.1787, + "step": 4763 + }, + { + "epoch": 1.38, + "learning_rate": 0.0002327365844275643, + "loss": 3.2193, + "step": 4764 + }, + { + "epoch": 1.38, + "learning_rate": 0.00023253835207031603, + "loss": 3.2207, + "step": 4765 + }, + { + "epoch": 1.38, + "learning_rate": 0.00023234017858799212, + "loss": 3.1829, + "step": 4766 + }, + { + "epoch": 1.38, + "learning_rate": 0.00023214206402421585, + "loss": 3.0381, + "step": 4767 + }, + { + "epoch": 1.38, + "learning_rate": 0.0002319440084225969, + "loss": 3.0953, + "step": 4768 + }, + { + "epoch": 1.38, + "learning_rate": 0.00023174601182673204, + "loss": 3.0973, + "step": 4769 + }, + { + "epoch": 1.38, + "learning_rate": 0.0002315480742802054, + "loss": 3.2272, + "step": 4770 + }, + { + "epoch": 1.38, + "learning_rate": 0.000231350195826588, + "loss": 3.1671, + "step": 4771 + }, + { + "epoch": 1.38, + "learning_rate": 0.00023115237650943792, + "loss": 3.1089, + "step": 4772 + }, + { + "epoch": 1.38, + "learning_rate": 0.0002309546163722997, + "loss": 3.2299, + "step": 4773 + }, + { + "epoch": 1.38, + "learning_rate": 0.0002307569154587056, + "loss": 3.3094, + "step": 4774 + }, + { + "epoch": 1.38, + "learning_rate": 0.00023055927381217413, + "loss": 3.1787, + "step": 4775 + }, + { + "epoch": 1.38, + "learning_rate": 0.0002303616914762115, + "loss": 3.2224, + "step": 4776 + }, + { + "epoch": 1.38, + "learning_rate": 0.0002301641684943102, + "loss": 3.2235, + "step": 4777 + }, + { + "epoch": 1.38, + "learning_rate": 0.00022996670490994987, + "loss": 3.1893, + "step": 4778 + }, + { + "epoch": 1.38, + "learning_rate": 0.00022976930076659718, + "loss": 3.1835, + "step": 4779 + }, + { + "epoch": 1.38, + "learning_rate": 0.00022957195610770576, + "loss": 3.0309, + "step": 4780 + }, + { + "epoch": 1.38, + "learning_rate": 0.0002293746709767162, + "loss": 3.0467, + "step": 4781 + }, + { + "epoch": 1.38, + "learning_rate": 0.00022917744541705543, + "loss": 3.2067, + "step": 4782 + }, + { + "epoch": 1.39, + "learning_rate": 0.00022898027947213813, + "loss": 3.1588, + "step": 4783 + }, + { + "epoch": 1.39, + "learning_rate": 0.000228783173185365, + "loss": 3.1309, + "step": 4784 + }, + { + "epoch": 1.39, + "learning_rate": 0.00022858612660012434, + "loss": 3.2307, + "step": 4785 + }, + { + "epoch": 1.39, + "learning_rate": 0.0002283891397597908, + "loss": 3.055, + "step": 4786 + }, + { + "epoch": 1.39, + "learning_rate": 0.0002281922127077259, + "loss": 3.2264, + "step": 4787 + }, + { + "epoch": 1.39, + "learning_rate": 0.00022799534548727834, + "loss": 3.1983, + "step": 4788 + }, + { + "epoch": 1.39, + "learning_rate": 0.00022779853814178347, + "loss": 3.1529, + "step": 4789 + }, + { + "epoch": 1.39, + "learning_rate": 0.00022760179071456356, + "loss": 3.0728, + "step": 4790 + }, + { + "epoch": 1.39, + "learning_rate": 0.00022740510324892733, + "loss": 3.2164, + "step": 4791 + }, + { + "epoch": 1.39, + "learning_rate": 0.0002272084757881709, + "loss": 3.1414, + "step": 4792 + }, + { + "epoch": 1.39, + "learning_rate": 0.0002270119083755764, + "loss": 3.1329, + "step": 4793 + }, + { + "epoch": 1.39, + "learning_rate": 0.00022681540105441356, + "loss": 3.082, + "step": 4794 + }, + { + "epoch": 1.39, + "learning_rate": 0.00022661895386793824, + "loss": 3.0959, + "step": 4795 + }, + { + "epoch": 1.39, + "learning_rate": 0.00022642256685939338, + "loss": 3.199, + "step": 4796 + }, + { + "epoch": 1.39, + "learning_rate": 0.00022622624007200887, + "loss": 3.1239, + "step": 4797 + }, + { + "epoch": 1.39, + "learning_rate": 0.00022602997354900073, + "loss": 3.2793, + "step": 4798 + }, + { + "epoch": 1.39, + "learning_rate": 0.0002258337673335724, + "loss": 3.1101, + "step": 4799 + }, + { + "epoch": 1.39, + "learning_rate": 0.00022563762146891344, + "loss": 3.139, + "step": 4800 + }, + { + "epoch": 1.39, + "learning_rate": 0.0002254415359982006, + "loss": 3.1757, + "step": 4801 + }, + { + "epoch": 1.39, + "learning_rate": 0.000225245510964597, + "loss": 3.1062, + "step": 4802 + }, + { + "epoch": 1.39, + "learning_rate": 0.0002250495464112528, + "loss": 3.2204, + "step": 4803 + }, + { + "epoch": 1.39, + "learning_rate": 0.00022485364238130436, + "loss": 3.2032, + "step": 4804 + }, + { + "epoch": 1.39, + "learning_rate": 0.00022465779891787512, + "loss": 3.2247, + "step": 4805 + }, + { + "epoch": 1.39, + "learning_rate": 0.00022446201606407534, + "loss": 3.2433, + "step": 4806 + }, + { + "epoch": 1.39, + "learning_rate": 0.00022426629386300124, + "loss": 3.0789, + "step": 4807 + }, + { + "epoch": 1.39, + "learning_rate": 0.00022407063235773645, + "loss": 3.2201, + "step": 4808 + }, + { + "epoch": 1.39, + "learning_rate": 0.00022387503159135064, + "loss": 3.087, + "step": 4809 + }, + { + "epoch": 1.39, + "learning_rate": 0.0002236794916069007, + "loss": 3.1565, + "step": 4810 + }, + { + "epoch": 1.39, + "learning_rate": 0.00022348401244742943, + "loss": 3.1342, + "step": 4811 + }, + { + "epoch": 1.39, + "learning_rate": 0.00022328859415596703, + "loss": 3.2827, + "step": 4812 + }, + { + "epoch": 1.39, + "learning_rate": 0.00022309323677552944, + "loss": 3.1224, + "step": 4813 + }, + { + "epoch": 1.39, + "learning_rate": 0.00022289794034911997, + "loss": 3.2083, + "step": 4814 + }, + { + "epoch": 1.39, + "learning_rate": 0.00022270270491972833, + "loss": 3.0468, + "step": 4815 + }, + { + "epoch": 1.39, + "learning_rate": 0.00022250753053033024, + "loss": 3.082, + "step": 4816 + }, + { + "epoch": 1.4, + "learning_rate": 0.00022231241722388883, + "loss": 3.1906, + "step": 4817 + }, + { + "epoch": 1.4, + "learning_rate": 0.00022211736504335305, + "loss": 3.3461, + "step": 4818 + }, + { + "epoch": 1.4, + "learning_rate": 0.000221922374031659, + "loss": 3.2171, + "step": 4819 + }, + { + "epoch": 1.4, + "learning_rate": 0.00022172744423172865, + "loss": 3.2047, + "step": 4820 + }, + { + "epoch": 1.4, + "learning_rate": 0.00022153257568647124, + "loss": 3.15, + "step": 4821 + }, + { + "epoch": 1.4, + "learning_rate": 0.00022133776843878184, + "loss": 3.1991, + "step": 4822 + }, + { + "epoch": 1.4, + "learning_rate": 0.0002211430225315425, + "loss": 3.1327, + "step": 4823 + }, + { + "epoch": 1.4, + "learning_rate": 0.0002209483380076218, + "loss": 3.1133, + "step": 4824 + }, + { + "epoch": 1.4, + "learning_rate": 0.00022075371490987424, + "loss": 3.0977, + "step": 4825 + }, + { + "epoch": 1.4, + "learning_rate": 0.00022055915328114158, + "loss": 3.1069, + "step": 4826 + }, + { + "epoch": 1.4, + "learning_rate": 0.0002203646531642512, + "loss": 3.1309, + "step": 4827 + }, + { + "epoch": 1.4, + "learning_rate": 0.00022017021460201785, + "loss": 3.0878, + "step": 4828 + }, + { + "epoch": 1.4, + "learning_rate": 0.00021997583763724187, + "loss": 3.1742, + "step": 4829 + }, + { + "epoch": 1.4, + "learning_rate": 0.00021978152231271077, + "loss": 3.0155, + "step": 4830 + }, + { + "epoch": 1.4, + "learning_rate": 0.00021958726867119782, + "loss": 3.2554, + "step": 4831 + }, + { + "epoch": 1.4, + "learning_rate": 0.00021939307675546322, + "loss": 3.2394, + "step": 4832 + }, + { + "epoch": 1.4, + "learning_rate": 0.00021919894660825363, + "loss": 3.0463, + "step": 4833 + }, + { + "epoch": 1.4, + "learning_rate": 0.00021900487827230148, + "loss": 3.134, + "step": 4834 + }, + { + "epoch": 1.4, + "learning_rate": 0.00021881087179032643, + "loss": 3.1376, + "step": 4835 + }, + { + "epoch": 1.4, + "learning_rate": 0.00021861692720503368, + "loss": 3.1092, + "step": 4836 + }, + { + "epoch": 1.4, + "learning_rate": 0.00021842304455911565, + "loss": 3.1688, + "step": 4837 + }, + { + "epoch": 1.4, + "learning_rate": 0.00021822922389525036, + "loss": 3.1174, + "step": 4838 + }, + { + "epoch": 1.4, + "learning_rate": 0.0002180354652561028, + "loss": 3.147, + "step": 4839 + }, + { + "epoch": 1.4, + "learning_rate": 0.00021784176868432377, + "loss": 3.1493, + "step": 4840 + }, + { + "epoch": 1.4, + "learning_rate": 0.0002176481342225508, + "loss": 3.0886, + "step": 4841 + }, + { + "epoch": 1.4, + "learning_rate": 0.0002174545619134079, + "loss": 3.1193, + "step": 4842 + }, + { + "epoch": 1.4, + "learning_rate": 0.0002172610517995046, + "loss": 3.0278, + "step": 4843 + }, + { + "epoch": 1.4, + "learning_rate": 0.00021706760392343778, + "loss": 3.2115, + "step": 4844 + }, + { + "epoch": 1.4, + "learning_rate": 0.00021687421832778964, + "loss": 3.1312, + "step": 4845 + }, + { + "epoch": 1.4, + "learning_rate": 0.00021668089505512956, + "loss": 3.0539, + "step": 4846 + }, + { + "epoch": 1.4, + "learning_rate": 0.00021668089505512956, + "loss": 3.236, + "step": 4847 + }, + { + "epoch": 1.4, + "learning_rate": 0.0002164876341480123, + "loss": 3.1148, + "step": 4848 + }, + { + "epoch": 1.4, + "learning_rate": 0.0002162944356489797, + "loss": 3.144, + "step": 4849 + }, + { + "epoch": 1.4, + "learning_rate": 0.00021610129960055946, + "loss": 2.9799, + "step": 4850 + }, + { + "epoch": 1.4, + "learning_rate": 0.00021590822604526576, + "loss": 3.0892, + "step": 4851 + }, + { + "epoch": 1.41, + "learning_rate": 0.00021571521502559876, + "loss": 3.0612, + "step": 4852 + }, + { + "epoch": 1.41, + "learning_rate": 0.00021552226658404466, + "loss": 3.1319, + "step": 4853 + }, + { + "epoch": 1.41, + "learning_rate": 0.0002153293807630766, + "loss": 3.1463, + "step": 4854 + }, + { + "epoch": 1.41, + "learning_rate": 0.0002151365576051532, + "loss": 3.0752, + "step": 4855 + }, + { + "epoch": 1.41, + "learning_rate": 0.00021494379715272, + "loss": 3.2536, + "step": 4856 + }, + { + "epoch": 1.41, + "learning_rate": 0.0002147510994482078, + "loss": 3.1966, + "step": 4857 + }, + { + "epoch": 1.41, + "learning_rate": 0.00021455846453403456, + "loss": 3.1548, + "step": 4858 + }, + { + "epoch": 1.41, + "learning_rate": 0.00021436589245260373, + "loss": 3.2177, + "step": 4859 + }, + { + "epoch": 1.41, + "learning_rate": 0.00021417338324630558, + "loss": 3.1875, + "step": 4860 + }, + { + "epoch": 1.41, + "learning_rate": 0.00021398093695751587, + "loss": 3.1237, + "step": 4861 + }, + { + "epoch": 1.41, + "learning_rate": 0.00021378855362859663, + "loss": 3.1576, + "step": 4862 + }, + { + "epoch": 1.41, + "learning_rate": 0.00021359623330189653, + "loss": 3.186, + "step": 4863 + }, + { + "epoch": 1.41, + "learning_rate": 0.0002134039760197497, + "loss": 3.1699, + "step": 4864 + }, + { + "epoch": 1.41, + "learning_rate": 0.00021321178182447708, + "loss": 3.1358, + "step": 4865 + }, + { + "epoch": 1.41, + "learning_rate": 0.00021301965075838498, + "loss": 3.1838, + "step": 4866 + }, + { + "epoch": 1.41, + "learning_rate": 0.0002128275828637664, + "loss": 3.0968, + "step": 4867 + }, + { + "epoch": 1.41, + "learning_rate": 0.00021263557818290024, + "loss": 3.1526, + "step": 4868 + }, + { + "epoch": 1.41, + "learning_rate": 0.00021244363675805168, + "loss": 3.0926, + "step": 4869 + }, + { + "epoch": 1.41, + "learning_rate": 0.0002122517586314715, + "loss": 3.1612, + "step": 4870 + }, + { + "epoch": 1.41, + "learning_rate": 0.0002120599438453968, + "loss": 3.0952, + "step": 4871 + }, + { + "epoch": 1.41, + "learning_rate": 0.0002118681924420509, + "loss": 3.124, + "step": 4872 + }, + { + "epoch": 1.41, + "learning_rate": 0.0002116765044636429, + "loss": 3.2084, + "step": 4873 + }, + { + "epoch": 1.41, + "learning_rate": 0.0002114848799523683, + "loss": 3.3064, + "step": 4874 + }, + { + "epoch": 1.41, + "learning_rate": 0.00021129331895040803, + "loss": 3.1184, + "step": 4875 + }, + { + "epoch": 1.41, + "learning_rate": 0.00021110182149992963, + "loss": 3.047, + "step": 4876 + }, + { + "epoch": 1.41, + "learning_rate": 0.0002109103876430864, + "loss": 3.1922, + "step": 4877 + }, + { + "epoch": 1.41, + "learning_rate": 0.00021071901742201782, + "loss": 3.1065, + "step": 4878 + }, + { + "epoch": 1.41, + "learning_rate": 0.0002105277108788491, + "loss": 3.1321, + "step": 4879 + }, + { + "epoch": 1.41, + "learning_rate": 0.00021033646805569133, + "loss": 3.1027, + "step": 4880 + }, + { + "epoch": 1.41, + "learning_rate": 0.00021014528899464214, + "loss": 3.148, + "step": 4881 + }, + { + "epoch": 1.41, + "learning_rate": 0.00020995417373778435, + "loss": 3.1258, + "step": 4882 + }, + { + "epoch": 1.41, + "learning_rate": 0.00020976312232718765, + "loss": 3.115, + "step": 4883 + }, + { + "epoch": 1.41, + "learning_rate": 0.00020957213480490673, + "loss": 3.2527, + "step": 4884 + }, + { + "epoch": 1.41, + "learning_rate": 0.0002093812112129828, + "loss": 3.0708, + "step": 4885 + }, + { + "epoch": 1.42, + "learning_rate": 0.00020919035159344302, + "loss": 3.1619, + "step": 4886 + }, + { + "epoch": 1.42, + "learning_rate": 0.0002089995559883004, + "loss": 3.0303, + "step": 4887 + }, + { + "epoch": 1.42, + "learning_rate": 0.00020880882443955352, + "loss": 3.0762, + "step": 4888 + }, + { + "epoch": 1.42, + "learning_rate": 0.00020861815698918707, + "loss": 3.2248, + "step": 4889 + }, + { + "epoch": 1.42, + "learning_rate": 0.00020842755367917194, + "loss": 3.1526, + "step": 4890 + }, + { + "epoch": 1.42, + "learning_rate": 0.00020823701455146431, + "loss": 3.1506, + "step": 4891 + }, + { + "epoch": 1.42, + "learning_rate": 0.0002080465396480069, + "loss": 3.2438, + "step": 4892 + }, + { + "epoch": 1.42, + "learning_rate": 0.00020785612901072763, + "loss": 3.2399, + "step": 4893 + }, + { + "epoch": 1.42, + "learning_rate": 0.00020766578268154072, + "loss": 3.1575, + "step": 4894 + }, + { + "epoch": 1.42, + "learning_rate": 0.0002074755007023461, + "loss": 3.1926, + "step": 4895 + }, + { + "epoch": 1.42, + "learning_rate": 0.00020728528311502974, + "loss": 3.0671, + "step": 4896 + }, + { + "epoch": 1.42, + "learning_rate": 0.00020709512996146314, + "loss": 3.05, + "step": 4897 + }, + { + "epoch": 1.42, + "learning_rate": 0.00020690504128350335, + "loss": 3.2301, + "step": 4898 + }, + { + "epoch": 1.42, + "learning_rate": 0.00020671501712299413, + "loss": 3.1826, + "step": 4899 + }, + { + "epoch": 1.42, + "learning_rate": 0.00020652505752176398, + "loss": 3.0602, + "step": 4900 + }, + { + "epoch": 1.42, + "learning_rate": 0.00020633516252162803, + "loss": 3.1879, + "step": 4901 + }, + { + "epoch": 1.42, + "learning_rate": 0.00020614533216438673, + "loss": 3.1159, + "step": 4902 + }, + { + "epoch": 1.42, + "learning_rate": 0.00020595556649182677, + "loss": 3.2745, + "step": 4903 + }, + { + "epoch": 1.42, + "learning_rate": 0.0002057658655457198, + "loss": 3.1159, + "step": 4904 + }, + { + "epoch": 1.42, + "learning_rate": 0.00020557622936782412, + "loss": 3.2713, + "step": 4905 + }, + { + "epoch": 1.42, + "learning_rate": 0.00020538665799988304, + "loss": 3.1568, + "step": 4906 + }, + { + "epoch": 1.42, + "learning_rate": 0.00020519715148362584, + "loss": 3.0868, + "step": 4907 + }, + { + "epoch": 1.42, + "learning_rate": 0.00020500770986076796, + "loss": 3.1231, + "step": 4908 + }, + { + "epoch": 1.42, + "learning_rate": 0.00020481833317300975, + "loss": 3.0659, + "step": 4909 + }, + { + "epoch": 1.42, + "learning_rate": 0.00020462902146203794, + "loss": 3.2051, + "step": 4910 + }, + { + "epoch": 1.42, + "learning_rate": 0.00020443977476952469, + "loss": 3.2312, + "step": 4911 + }, + { + "epoch": 1.42, + "learning_rate": 0.00020425059313712807, + "loss": 3.0831, + "step": 4912 + }, + { + "epoch": 1.42, + "learning_rate": 0.00020406147660649132, + "loss": 3.1583, + "step": 4913 + }, + { + "epoch": 1.42, + "learning_rate": 0.00020387242521924392, + "loss": 3.03, + "step": 4914 + }, + { + "epoch": 1.42, + "learning_rate": 0.00020368343901700054, + "loss": 3.1212, + "step": 4915 + }, + { + "epoch": 1.42, + "learning_rate": 0.00020349451804136192, + "loss": 3.0455, + "step": 4916 + }, + { + "epoch": 1.42, + "learning_rate": 0.00020330566233391417, + "loss": 3.1794, + "step": 4917 + }, + { + "epoch": 1.42, + "learning_rate": 0.0002031168719362289, + "loss": 2.9961, + "step": 4918 + }, + { + "epoch": 1.42, + "learning_rate": 0.00020292814688986372, + "loss": 3.1809, + "step": 4919 + }, + { + "epoch": 1.42, + "learning_rate": 0.00020273948723636165, + "loss": 3.2329, + "step": 4920 + }, + { + "epoch": 1.43, + "learning_rate": 0.0002025508930172516, + "loss": 3.2684, + "step": 4921 + }, + { + "epoch": 1.43, + "learning_rate": 0.00020236236427404735, + "loss": 3.1181, + "step": 4922 + }, + { + "epoch": 1.43, + "learning_rate": 0.00020217390104824924, + "loss": 3.2072, + "step": 4923 + }, + { + "epoch": 1.43, + "learning_rate": 0.00020198550338134218, + "loss": 3.2338, + "step": 4924 + }, + { + "epoch": 1.43, + "learning_rate": 0.0002017971713147977, + "loss": 3.1974, + "step": 4925 + }, + { + "epoch": 1.43, + "learning_rate": 0.00020160890489007193, + "loss": 3.1437, + "step": 4926 + }, + { + "epoch": 1.43, + "learning_rate": 0.00020142070414860702, + "loss": 3.1719, + "step": 4927 + }, + { + "epoch": 1.43, + "learning_rate": 0.00020123256913183064, + "loss": 3.1549, + "step": 4928 + }, + { + "epoch": 1.43, + "learning_rate": 0.00020104449988115597, + "loss": 3.0832, + "step": 4929 + }, + { + "epoch": 1.43, + "learning_rate": 0.000200856496437982, + "loss": 3.2204, + "step": 4930 + }, + { + "epoch": 1.43, + "learning_rate": 0.00020066855884369244, + "loss": 3.2044, + "step": 4931 + }, + { + "epoch": 1.43, + "learning_rate": 0.00020048068713965743, + "loss": 3.3526, + "step": 4932 + }, + { + "epoch": 1.43, + "learning_rate": 0.00020029288136723177, + "loss": 3.0929, + "step": 4933 + }, + { + "epoch": 1.43, + "learning_rate": 0.0002001051415677566, + "loss": 3.2433, + "step": 4934 + }, + { + "epoch": 1.43, + "learning_rate": 0.00019991746778255786, + "loss": 3.1197, + "step": 4935 + }, + { + "epoch": 1.43, + "learning_rate": 0.00019972986005294707, + "loss": 3.0751, + "step": 4936 + }, + { + "epoch": 1.43, + "learning_rate": 0.0001995423184202214, + "loss": 3.1883, + "step": 4937 + }, + { + "epoch": 1.43, + "learning_rate": 0.00019935484292566346, + "loss": 3.0802, + "step": 4938 + }, + { + "epoch": 1.43, + "learning_rate": 0.00019916743361054145, + "loss": 3.0523, + "step": 4939 + }, + { + "epoch": 1.43, + "learning_rate": 0.00019898009051610844, + "loss": 3.1174, + "step": 4940 + }, + { + "epoch": 1.43, + "learning_rate": 0.00019879281368360359, + "loss": 3.15, + "step": 4941 + }, + { + "epoch": 1.43, + "learning_rate": 0.00019860560315425086, + "loss": 3.2163, + "step": 4942 + }, + { + "epoch": 1.43, + "learning_rate": 0.0001984184589692602, + "loss": 3.1737, + "step": 4943 + }, + { + "epoch": 1.43, + "learning_rate": 0.00019823138116982652, + "loss": 3.1692, + "step": 4944 + }, + { + "epoch": 1.43, + "learning_rate": 0.0001980443697971302, + "loss": 3.146, + "step": 4945 + }, + { + "epoch": 1.43, + "learning_rate": 0.0001978574248923371, + "loss": 3.1343, + "step": 4946 + }, + { + "epoch": 1.43, + "learning_rate": 0.00019767054649659849, + "loss": 3.1502, + "step": 4947 + }, + { + "epoch": 1.43, + "learning_rate": 0.00019748373465105107, + "loss": 3.0044, + "step": 4948 + }, + { + "epoch": 1.43, + "learning_rate": 0.00019729698939681645, + "loss": 3.2191, + "step": 4949 + }, + { + "epoch": 1.43, + "learning_rate": 0.00019711031077500218, + "loss": 3.2671, + "step": 4950 + }, + { + "epoch": 1.43, + "learning_rate": 0.00019692369882670053, + "loss": 3.0781, + "step": 4951 + }, + { + "epoch": 1.43, + "learning_rate": 0.00019673715359298972, + "loss": 3.1761, + "step": 4952 + }, + { + "epoch": 1.43, + "learning_rate": 0.00019655067511493263, + "loss": 3.2169, + "step": 4953 + }, + { + "epoch": 1.43, + "learning_rate": 0.00019636426343357794, + "loss": 3.1785, + "step": 4954 + }, + { + "epoch": 1.44, + "learning_rate": 0.0001961779185899597, + "loss": 3.2021, + "step": 4955 + }, + { + "epoch": 1.44, + "learning_rate": 0.00019599164062509666, + "loss": 3.2001, + "step": 4956 + }, + { + "epoch": 1.44, + "learning_rate": 0.00019580542957999353, + "loss": 3.1405, + "step": 4957 + }, + { + "epoch": 1.44, + "learning_rate": 0.00019561928549563967, + "loss": 3.2605, + "step": 4958 + }, + { + "epoch": 1.44, + "learning_rate": 0.00019543320841301028, + "loss": 3.0594, + "step": 4959 + }, + { + "epoch": 1.44, + "learning_rate": 0.00019524719837306514, + "loss": 3.1016, + "step": 4960 + }, + { + "epoch": 1.44, + "learning_rate": 0.00019506125541675018, + "loss": 3.0993, + "step": 4961 + }, + { + "epoch": 1.44, + "learning_rate": 0.00019487537958499552, + "loss": 3.1634, + "step": 4962 + }, + { + "epoch": 1.44, + "learning_rate": 0.0001946895709187173, + "loss": 3.1763, + "step": 4963 + }, + { + "epoch": 1.44, + "learning_rate": 0.00019450382945881674, + "loss": 3.2498, + "step": 4964 + }, + { + "epoch": 1.44, + "learning_rate": 0.0001943181552461798, + "loss": 3.1754, + "step": 4965 + }, + { + "epoch": 1.44, + "learning_rate": 0.0001941325483216783, + "loss": 3.099, + "step": 4966 + }, + { + "epoch": 1.44, + "learning_rate": 0.00019394700872616856, + "loss": 3.1702, + "step": 4967 + }, + { + "epoch": 1.44, + "learning_rate": 0.00019376153650049283, + "loss": 3.1546, + "step": 4968 + }, + { + "epoch": 1.44, + "learning_rate": 0.00019357613168547773, + "loss": 2.973, + "step": 4969 + }, + { + "epoch": 1.44, + "learning_rate": 0.0001933907943219358, + "loss": 3.1945, + "step": 4970 + }, + { + "epoch": 1.44, + "learning_rate": 0.00019320552445066408, + "loss": 3.1176, + "step": 4971 + }, + { + "epoch": 1.44, + "learning_rate": 0.0001930203221124452, + "loss": 3.2118, + "step": 4972 + }, + { + "epoch": 1.44, + "learning_rate": 0.00019283518734804694, + "loss": 3.2346, + "step": 4973 + }, + { + "epoch": 1.44, + "learning_rate": 0.00019265012019822165, + "loss": 3.1857, + "step": 4974 + }, + { + "epoch": 1.44, + "learning_rate": 0.00019246512070370764, + "loss": 3.3577, + "step": 4975 + }, + { + "epoch": 1.44, + "learning_rate": 0.00019228018890522742, + "loss": 3.162, + "step": 4976 + }, + { + "epoch": 1.44, + "learning_rate": 0.00019209532484348947, + "loss": 3.1579, + "step": 4977 + }, + { + "epoch": 1.44, + "learning_rate": 0.00019191052855918657, + "loss": 3.116, + "step": 4978 + }, + { + "epoch": 1.44, + "learning_rate": 0.00019172580009299735, + "loss": 3.0441, + "step": 4979 + }, + { + "epoch": 1.44, + "learning_rate": 0.00019154113948558466, + "loss": 3.2342, + "step": 4980 + }, + { + "epoch": 1.44, + "learning_rate": 0.0001913565467775971, + "loss": 3.1608, + "step": 4981 + }, + { + "epoch": 1.44, + "learning_rate": 0.00019117202200966832, + "loss": 3.2481, + "step": 4982 + }, + { + "epoch": 1.44, + "learning_rate": 0.00019098756522241634, + "loss": 3.0637, + "step": 4983 + }, + { + "epoch": 1.44, + "learning_rate": 0.00019080317645644507, + "loss": 3.1949, + "step": 4984 + }, + { + "epoch": 1.44, + "learning_rate": 0.00019061885575234272, + "loss": 3.0564, + "step": 4985 + }, + { + "epoch": 1.44, + "learning_rate": 0.00019043460315068306, + "loss": 3.1937, + "step": 4986 + }, + { + "epoch": 1.44, + "learning_rate": 0.00019025041869202448, + "loss": 3.218, + "step": 4987 + }, + { + "epoch": 1.44, + "learning_rate": 0.0001900663024169107, + "loss": 2.9805, + "step": 4988 + }, + { + "epoch": 1.44, + "learning_rate": 0.00018988225436587002, + "loss": 3.0926, + "step": 4989 + }, + { + "epoch": 1.45, + "learning_rate": 0.00018969827457941614, + "loss": 3.0241, + "step": 4990 + }, + { + "epoch": 1.45, + "learning_rate": 0.00018951436309804764, + "loss": 3.1611, + "step": 4991 + }, + { + "epoch": 1.45, + "learning_rate": 0.00018933051996224782, + "loss": 3.2516, + "step": 4992 + }, + { + "epoch": 1.45, + "learning_rate": 0.00018914674521248532, + "loss": 3.1694, + "step": 4993 + }, + { + "epoch": 1.45, + "learning_rate": 0.0001889630388892131, + "loss": 3.0599, + "step": 4994 + }, + { + "epoch": 1.45, + "learning_rate": 0.00018877940103287, + "loss": 3.0429, + "step": 4995 + }, + { + "epoch": 1.45, + "learning_rate": 0.00018859583168387878, + "loss": 3.1682, + "step": 4996 + }, + { + "epoch": 1.45, + "learning_rate": 0.00018841233088264797, + "loss": 2.9952, + "step": 4997 + }, + { + "epoch": 1.45, + "learning_rate": 0.00018822889866957027, + "loss": 3.1523, + "step": 4998 + }, + { + "epoch": 1.45, + "learning_rate": 0.00018804553508502386, + "loss": 3.1523, + "step": 4999 + }, + { + "epoch": 1.45, + "learning_rate": 0.0001878622401693717, + "loss": 3.0525, + "step": 5000 + }, + { + "epoch": 1.45, + "learning_rate": 0.00018767901396296127, + "loss": 3.3394, + "step": 5001 + }, + { + "epoch": 1.45, + "learning_rate": 0.00018749585650612555, + "loss": 3.1009, + "step": 5002 + }, + { + "epoch": 1.45, + "learning_rate": 0.0001873127678391816, + "loss": 3.0961, + "step": 5003 + }, + { + "epoch": 1.45, + "learning_rate": 0.00018712974800243215, + "loss": 3.0347, + "step": 5004 + }, + { + "epoch": 1.45, + "learning_rate": 0.00018694679703616402, + "loss": 3.1738, + "step": 5005 + }, + { + "epoch": 1.45, + "learning_rate": 0.0001867639149806495, + "loss": 3.2247, + "step": 5006 + }, + { + "epoch": 1.45, + "learning_rate": 0.00018658110187614536, + "loss": 3.1574, + "step": 5007 + }, + { + "epoch": 1.45, + "learning_rate": 0.0001863983577628935, + "loss": 3.1298, + "step": 5008 + }, + { + "epoch": 1.45, + "learning_rate": 0.0001862156826811202, + "loss": 3.1571, + "step": 5009 + }, + { + "epoch": 1.45, + "learning_rate": 0.0001860330766710367, + "loss": 3.2516, + "step": 5010 + }, + { + "epoch": 1.45, + "learning_rate": 0.00018585053977283934, + "loss": 3.0246, + "step": 5011 + }, + { + "epoch": 1.45, + "learning_rate": 0.00018566807202670865, + "loss": 3.2718, + "step": 5012 + }, + { + "epoch": 1.45, + "learning_rate": 0.00018548567347281077, + "loss": 3.3197, + "step": 5013 + }, + { + "epoch": 1.45, + "learning_rate": 0.00018530334415129568, + "loss": 3.2324, + "step": 5014 + }, + { + "epoch": 1.45, + "learning_rate": 0.00018512108410229878, + "loss": 3.178, + "step": 5015 + }, + { + "epoch": 1.45, + "learning_rate": 0.00018493889336593995, + "loss": 3.091, + "step": 5016 + }, + { + "epoch": 1.45, + "learning_rate": 0.00018475677198232415, + "loss": 3.0006, + "step": 5017 + }, + { + "epoch": 1.45, + "learning_rate": 0.00018457471999154063, + "loss": 3.2251, + "step": 5018 + }, + { + "epoch": 1.45, + "learning_rate": 0.00018439273743366325, + "loss": 3.1379, + "step": 5019 + }, + { + "epoch": 1.45, + "learning_rate": 0.00018421082434875132, + "loss": 3.1103, + "step": 5020 + }, + { + "epoch": 1.45, + "learning_rate": 0.00018402898077684804, + "loss": 3.1625, + "step": 5021 + }, + { + "epoch": 1.45, + "learning_rate": 0.000183847206757982, + "loss": 3.0827, + "step": 5022 + }, + { + "epoch": 1.45, + "learning_rate": 0.00018366550233216584, + "loss": 3.1919, + "step": 5023 + }, + { + "epoch": 1.45, + "learning_rate": 0.00018348386753939733, + "loss": 3.2462, + "step": 5024 + }, + { + "epoch": 1.46, + "learning_rate": 0.00018330230241965883, + "loss": 3.1634, + "step": 5025 + }, + { + "epoch": 1.46, + "learning_rate": 0.0001831208070129175, + "loss": 3.1601, + "step": 5026 + }, + { + "epoch": 1.46, + "learning_rate": 0.00018293938135912474, + "loss": 3.1203, + "step": 5027 + }, + { + "epoch": 1.46, + "learning_rate": 0.0001827580254982167, + "loss": 3.1295, + "step": 5028 + }, + { + "epoch": 1.46, + "learning_rate": 0.00018257673947011456, + "loss": 3.1419, + "step": 5029 + }, + { + "epoch": 1.46, + "learning_rate": 0.00018239552331472358, + "loss": 3.1273, + "step": 5030 + }, + { + "epoch": 1.46, + "learning_rate": 0.0001822143770719342, + "loss": 3.2123, + "step": 5031 + }, + { + "epoch": 1.46, + "learning_rate": 0.000182033300781621, + "loss": 3.2782, + "step": 5032 + }, + { + "epoch": 1.46, + "learning_rate": 0.00018185229448364337, + "loss": 3.0794, + "step": 5033 + }, + { + "epoch": 1.46, + "learning_rate": 0.0001816713582178453, + "loss": 3.2066, + "step": 5034 + }, + { + "epoch": 1.46, + "learning_rate": 0.00018149049202405553, + "loss": 3.2021, + "step": 5035 + }, + { + "epoch": 1.46, + "learning_rate": 0.00018130969594208702, + "loss": 3.1035, + "step": 5036 + }, + { + "epoch": 1.46, + "learning_rate": 0.0001811289700117373, + "loss": 3.2057, + "step": 5037 + }, + { + "epoch": 1.46, + "learning_rate": 0.0001809483142727889, + "loss": 3.0486, + "step": 5038 + }, + { + "epoch": 1.46, + "learning_rate": 0.0001807677287650083, + "loss": 3.0078, + "step": 5039 + }, + { + "epoch": 1.46, + "learning_rate": 0.00018058721352814721, + "loss": 3.0885, + "step": 5040 + }, + { + "epoch": 1.46, + "learning_rate": 0.00018040676860194116, + "loss": 2.9607, + "step": 5041 + }, + { + "epoch": 1.46, + "learning_rate": 0.0001802263940261107, + "loss": 3.0293, + "step": 5042 + }, + { + "epoch": 1.46, + "learning_rate": 0.00018004608984036069, + "loss": 3.1549, + "step": 5043 + }, + { + "epoch": 1.46, + "learning_rate": 0.0001798658560843807, + "loss": 3.1456, + "step": 5044 + }, + { + "epoch": 1.46, + "learning_rate": 0.00017968569279784463, + "loss": 3.0543, + "step": 5045 + }, + { + "epoch": 1.46, + "learning_rate": 0.0001795056000204105, + "loss": 3.1443, + "step": 5046 + }, + { + "epoch": 1.46, + "learning_rate": 0.0001793255777917217, + "loss": 3.0805, + "step": 5047 + }, + { + "epoch": 1.46, + "learning_rate": 0.00017914562615140505, + "loss": 3.1715, + "step": 5048 + }, + { + "epoch": 1.46, + "learning_rate": 0.00017896574513907287, + "loss": 3.1215, + "step": 5049 + }, + { + "epoch": 1.46, + "learning_rate": 0.00017878593479432104, + "loss": 3.2094, + "step": 5050 + }, + { + "epoch": 1.46, + "learning_rate": 0.0001786061951567303, + "loss": 3.1217, + "step": 5051 + }, + { + "epoch": 1.46, + "learning_rate": 0.00017842652626586598, + "loss": 3.2132, + "step": 5052 + }, + { + "epoch": 1.46, + "learning_rate": 0.00017824692816127775, + "loss": 3.1405, + "step": 5053 + }, + { + "epoch": 1.46, + "learning_rate": 0.00017806740088249933, + "loss": 3.1858, + "step": 5054 + }, + { + "epoch": 1.46, + "learning_rate": 0.00017788794446904906, + "loss": 3.1055, + "step": 5055 + }, + { + "epoch": 1.46, + "learning_rate": 0.00017770855896043002, + "loss": 3.0878, + "step": 5056 + }, + { + "epoch": 1.46, + "learning_rate": 0.00017752924439612912, + "loss": 3.104, + "step": 5057 + }, + { + "epoch": 1.46, + "learning_rate": 0.00017735000081561797, + "loss": 3.2313, + "step": 5058 + }, + { + "epoch": 1.47, + "learning_rate": 0.00017717082825835268, + "loss": 3.1515, + "step": 5059 + }, + { + "epoch": 1.47, + "learning_rate": 0.00017699172676377363, + "loss": 2.9477, + "step": 5060 + }, + { + "epoch": 1.47, + "learning_rate": 0.0001768126963713052, + "loss": 3.1655, + "step": 5061 + }, + { + "epoch": 1.47, + "learning_rate": 0.00017663373712035668, + "loss": 3.1422, + "step": 5062 + }, + { + "epoch": 1.47, + "learning_rate": 0.00017645484905032128, + "loss": 3.131, + "step": 5063 + }, + { + "epoch": 1.47, + "learning_rate": 0.00017627603220057657, + "loss": 3.0871, + "step": 5064 + }, + { + "epoch": 1.47, + "learning_rate": 0.00017609728661048492, + "loss": 3.0861, + "step": 5065 + }, + { + "epoch": 1.47, + "learning_rate": 0.00017591861231939216, + "loss": 3.2191, + "step": 5066 + }, + { + "epoch": 1.47, + "learning_rate": 0.00017574000936662927, + "loss": 3.0206, + "step": 5067 + }, + { + "epoch": 1.47, + "learning_rate": 0.0001755614777915111, + "loss": 3.1692, + "step": 5068 + }, + { + "epoch": 1.47, + "learning_rate": 0.00017538301763333702, + "loss": 3.0007, + "step": 5069 + }, + { + "epoch": 1.47, + "learning_rate": 0.00017520462893139022, + "loss": 3.0352, + "step": 5070 + }, + { + "epoch": 1.47, + "learning_rate": 0.00017502631172493878, + "loss": 3.1687, + "step": 5071 + }, + { + "epoch": 1.47, + "learning_rate": 0.00017484806605323457, + "loss": 3.1945, + "step": 5072 + }, + { + "epoch": 1.47, + "learning_rate": 0.0001746698919555137, + "loss": 3.1269, + "step": 5073 + }, + { + "epoch": 1.47, + "learning_rate": 0.00017449178947099702, + "loss": 3.2012, + "step": 5074 + }, + { + "epoch": 1.47, + "learning_rate": 0.00017431375863888898, + "loss": 3.0409, + "step": 5075 + }, + { + "epoch": 1.47, + "learning_rate": 0.0001741357994983787, + "loss": 3.1089, + "step": 5076 + }, + { + "epoch": 1.47, + "learning_rate": 0.00017395791208863948, + "loss": 3.2255, + "step": 5077 + }, + { + "epoch": 1.47, + "learning_rate": 0.0001737800964488288, + "loss": 3.0954, + "step": 5078 + }, + { + "epoch": 1.47, + "learning_rate": 0.000173602352618088, + "loss": 3.0814, + "step": 5079 + }, + { + "epoch": 1.47, + "learning_rate": 0.00017342468063554322, + "loss": 3.1429, + "step": 5080 + }, + { + "epoch": 1.47, + "learning_rate": 0.00017324708054030431, + "loss": 3.0447, + "step": 5081 + }, + { + "epoch": 1.47, + "learning_rate": 0.00017306955237146522, + "loss": 3.0767, + "step": 5082 + }, + { + "epoch": 1.47, + "learning_rate": 0.00017289209616810469, + "loss": 3.1377, + "step": 5083 + }, + { + "epoch": 1.47, + "learning_rate": 0.00017271471196928485, + "loss": 3.1865, + "step": 5084 + }, + { + "epoch": 1.47, + "learning_rate": 0.00017253739981405253, + "loss": 3.1529, + "step": 5085 + }, + { + "epoch": 1.47, + "learning_rate": 0.00017236015974143848, + "loss": 3.1991, + "step": 5086 + }, + { + "epoch": 1.47, + "learning_rate": 0.00017218299179045787, + "loss": 3.1605, + "step": 5087 + }, + { + "epoch": 1.47, + "learning_rate": 0.00017200589600010934, + "loss": 3.0345, + "step": 5088 + }, + { + "epoch": 1.47, + "learning_rate": 0.00017182887240937644, + "loss": 3.0794, + "step": 5089 + }, + { + "epoch": 1.47, + "learning_rate": 0.00017165192105722622, + "loss": 3.2268, + "step": 5090 + }, + { + "epoch": 1.47, + "learning_rate": 0.00017147504198260994, + "loss": 3.1163, + "step": 5091 + }, + { + "epoch": 1.47, + "learning_rate": 0.00017129823522446337, + "loss": 3.0638, + "step": 5092 + }, + { + "epoch": 1.47, + "learning_rate": 0.00017112150082170568, + "loss": 3.1321, + "step": 5093 + }, + { + "epoch": 1.48, + "learning_rate": 0.00017094483881324073, + "loss": 3.2566, + "step": 5094 + }, + { + "epoch": 1.48, + "learning_rate": 0.00017076824923795614, + "loss": 3.2599, + "step": 5095 + }, + { + "epoch": 1.48, + "learning_rate": 0.00017059173213472385, + "loss": 3.1829, + "step": 5096 + }, + { + "epoch": 1.48, + "learning_rate": 0.00017041528754239927, + "loss": 3.0713, + "step": 5097 + }, + { + "epoch": 1.48, + "learning_rate": 0.00017023891549982256, + "loss": 3.1495, + "step": 5098 + }, + { + "epoch": 1.48, + "learning_rate": 0.00017006261604581724, + "loss": 3.2171, + "step": 5099 + }, + { + "epoch": 1.48, + "learning_rate": 0.00016988638921919147, + "loss": 3.2043, + "step": 5100 + }, + { + "epoch": 1.48, + "learning_rate": 0.00016971023505873707, + "loss": 3.1246, + "step": 5101 + }, + { + "epoch": 1.48, + "learning_rate": 0.0001695341536032297, + "loss": 3.2132, + "step": 5102 + }, + { + "epoch": 1.48, + "learning_rate": 0.00016935814489142935, + "loss": 3.0695, + "step": 5103 + }, + { + "epoch": 1.48, + "learning_rate": 0.00016918220896208004, + "loss": 3.1494, + "step": 5104 + }, + { + "epoch": 1.48, + "learning_rate": 0.00016900634585390967, + "loss": 3.2162, + "step": 5105 + }, + { + "epoch": 1.48, + "learning_rate": 0.00016883055560562978, + "loss": 3.0525, + "step": 5106 + }, + { + "epoch": 1.48, + "learning_rate": 0.0001686548382559364, + "loss": 3.0624, + "step": 5107 + }, + { + "epoch": 1.48, + "learning_rate": 0.000168479193843509, + "loss": 3.1044, + "step": 5108 + }, + { + "epoch": 1.48, + "learning_rate": 0.00016830362240701164, + "loss": 3.066, + "step": 5109 + }, + { + "epoch": 1.48, + "learning_rate": 0.00016812812398509154, + "loss": 3.0577, + "step": 5110 + }, + { + "epoch": 1.48, + "learning_rate": 0.0001679526986163804, + "loss": 3.0099, + "step": 5111 + }, + { + "epoch": 1.48, + "learning_rate": 0.00016777734633949393, + "loss": 3.1244, + "step": 5112 + }, + { + "epoch": 1.48, + "learning_rate": 0.00016760206719303107, + "loss": 3.3101, + "step": 5113 + }, + { + "epoch": 1.48, + "learning_rate": 0.00016742686121557542, + "loss": 3.0983, + "step": 5114 + }, + { + "epoch": 1.48, + "learning_rate": 0.00016725172844569392, + "loss": 3.1898, + "step": 5115 + }, + { + "epoch": 1.48, + "learning_rate": 0.0001670766689219379, + "loss": 3.192, + "step": 5116 + }, + { + "epoch": 1.48, + "learning_rate": 0.00016690168268284196, + "loss": 3.2193, + "step": 5117 + }, + { + "epoch": 1.48, + "learning_rate": 0.00016672676976692524, + "loss": 3.1499, + "step": 5118 + }, + { + "epoch": 1.48, + "learning_rate": 0.00016655193021269005, + "loss": 3.0394, + "step": 5119 + }, + { + "epoch": 1.48, + "learning_rate": 0.00016637716405862307, + "loss": 3.2218, + "step": 5120 + }, + { + "epoch": 1.48, + "learning_rate": 0.0001662024713431949, + "loss": 3.0087, + "step": 5121 + }, + { + "epoch": 1.48, + "learning_rate": 0.00016602785210485926, + "loss": 3.0303, + "step": 5122 + }, + { + "epoch": 1.48, + "learning_rate": 0.00016585330638205453, + "loss": 3.2245, + "step": 5123 + }, + { + "epoch": 1.48, + "learning_rate": 0.00016567883421320228, + "loss": 3.2196, + "step": 5124 + }, + { + "epoch": 1.48, + "learning_rate": 0.00016550443563670848, + "loss": 3.2211, + "step": 5125 + }, + { + "epoch": 1.48, + "learning_rate": 0.00016533011069096217, + "loss": 3.1984, + "step": 5126 + }, + { + "epoch": 1.48, + "learning_rate": 0.00016515585941433692, + "loss": 3.2623, + "step": 5127 + }, + { + "epoch": 1.49, + "learning_rate": 0.0001649816818451894, + "loss": 3.1305, + "step": 5128 + }, + { + "epoch": 1.49, + "learning_rate": 0.0001648075780218607, + "loss": 3.1474, + "step": 5129 + }, + { + "epoch": 1.49, + "learning_rate": 0.00016463354798267537, + "loss": 3.1244, + "step": 5130 + }, + { + "epoch": 1.49, + "learning_rate": 0.00016445959176594155, + "loss": 3.1782, + "step": 5131 + }, + { + "epoch": 1.49, + "learning_rate": 0.00016428570940995154, + "loss": 3.2158, + "step": 5132 + }, + { + "epoch": 1.49, + "learning_rate": 0.00016411190095298077, + "loss": 3.0847, + "step": 5133 + }, + { + "epoch": 1.49, + "learning_rate": 0.00016393816643328924, + "loss": 3.1619, + "step": 5134 + }, + { + "epoch": 1.49, + "learning_rate": 0.00016376450588911985, + "loss": 3.1613, + "step": 5135 + }, + { + "epoch": 1.49, + "learning_rate": 0.00016359091935869984, + "loss": 3.2884, + "step": 5136 + }, + { + "epoch": 1.49, + "learning_rate": 0.00016341740688023966, + "loss": 3.1123, + "step": 5137 + }, + { + "epoch": 1.49, + "learning_rate": 0.00016324396849193374, + "loss": 3.1471, + "step": 5138 + }, + { + "epoch": 1.49, + "learning_rate": 0.00016307060423196047, + "loss": 3.1733, + "step": 5139 + }, + { + "epoch": 1.49, + "learning_rate": 0.0001628973141384812, + "loss": 3.1481, + "step": 5140 + }, + { + "epoch": 1.49, + "learning_rate": 0.00016272409824964168, + "loss": 3.1135, + "step": 5141 + }, + { + "epoch": 1.49, + "learning_rate": 0.00016255095660357076, + "loss": 3.1053, + "step": 5142 + }, + { + "epoch": 1.49, + "learning_rate": 0.00016237788923838148, + "loss": 3.3294, + "step": 5143 + }, + { + "epoch": 1.49, + "learning_rate": 0.0001622048961921699, + "loss": 3.1823, + "step": 5144 + }, + { + "epoch": 1.49, + "learning_rate": 0.0001620319775030164, + "loss": 3.2064, + "step": 5145 + }, + { + "epoch": 1.49, + "learning_rate": 0.00016185913320898438, + "loss": 3.1228, + "step": 5146 + }, + { + "epoch": 1.49, + "learning_rate": 0.00016168636334812125, + "loss": 3.2693, + "step": 5147 + }, + { + "epoch": 1.49, + "learning_rate": 0.00016151366795845817, + "loss": 3.1474, + "step": 5148 + }, + { + "epoch": 1.49, + "learning_rate": 0.00016134104707800928, + "loss": 3.2661, + "step": 5149 + }, + { + "epoch": 1.49, + "learning_rate": 0.000161168500744773, + "loss": 3.0609, + "step": 5150 + }, + { + "epoch": 1.49, + "learning_rate": 0.00016099602899673083, + "loss": 3.0265, + "step": 5151 + }, + { + "epoch": 1.49, + "learning_rate": 0.0001608236318718483, + "loss": 3.1289, + "step": 5152 + }, + { + "epoch": 1.49, + "learning_rate": 0.000160651309408074, + "loss": 3.0658, + "step": 5153 + }, + { + "epoch": 1.49, + "learning_rate": 0.00016047906164334065, + "loss": 3.2073, + "step": 5154 + }, + { + "epoch": 1.49, + "learning_rate": 0.00016030688861556392, + "loss": 3.0244, + "step": 5155 + }, + { + "epoch": 1.49, + "learning_rate": 0.00016013479036264356, + "loss": 3.2324, + "step": 5156 + }, + { + "epoch": 1.49, + "learning_rate": 0.0001599627669224628, + "loss": 3.1937, + "step": 5157 + }, + { + "epoch": 1.49, + "learning_rate": 0.00015979081833288796, + "loss": 3.1276, + "step": 5158 + }, + { + "epoch": 1.49, + "learning_rate": 0.00015961894463176941, + "loss": 3.1273, + "step": 5159 + }, + { + "epoch": 1.49, + "learning_rate": 0.0001594471458569406, + "loss": 3.108, + "step": 5160 + }, + { + "epoch": 1.49, + "learning_rate": 0.00015927542204621886, + "loss": 3.1275, + "step": 5161 + }, + { + "epoch": 1.49, + "learning_rate": 0.00015910377323740473, + "loss": 3.1144, + "step": 5162 + }, + { + "epoch": 1.5, + "learning_rate": 0.0001589321994682823, + "loss": 3.2121, + "step": 5163 + }, + { + "epoch": 1.5, + "learning_rate": 0.00015876070077661952, + "loss": 3.17, + "step": 5164 + }, + { + "epoch": 1.5, + "learning_rate": 0.00015858927720016709, + "loss": 3.1923, + "step": 5165 + }, + { + "epoch": 1.5, + "learning_rate": 0.00015841792877665994, + "loss": 3.182, + "step": 5166 + }, + { + "epoch": 1.5, + "learning_rate": 0.00015824665554381578, + "loss": 2.9975, + "step": 5167 + }, + { + "epoch": 1.5, + "learning_rate": 0.0001580754575393364, + "loss": 3.1815, + "step": 5168 + }, + { + "epoch": 1.5, + "learning_rate": 0.00015790433480090632, + "loss": 3.1293, + "step": 5169 + }, + { + "epoch": 1.5, + "learning_rate": 0.0001577332873661943, + "loss": 3.1856, + "step": 5170 + }, + { + "epoch": 1.5, + "learning_rate": 0.0001575623152728518, + "loss": 3.1244, + "step": 5171 + }, + { + "epoch": 1.5, + "learning_rate": 0.00015739141855851413, + "loss": 3.1271, + "step": 5172 + }, + { + "epoch": 1.5, + "learning_rate": 0.00015722059726080002, + "loss": 3.2803, + "step": 5173 + }, + { + "epoch": 1.5, + "learning_rate": 0.00015704985141731115, + "loss": 3.0374, + "step": 5174 + }, + { + "epoch": 1.5, + "learning_rate": 0.00015687918106563326, + "loss": 3.2125, + "step": 5175 + }, + { + "epoch": 1.5, + "learning_rate": 0.00015670858624333478, + "loss": 3.234, + "step": 5176 + }, + { + "epoch": 1.5, + "learning_rate": 0.0001565380669879682, + "loss": 3.1614, + "step": 5177 + }, + { + "epoch": 1.5, + "learning_rate": 0.00015636762333706868, + "loss": 3.1564, + "step": 5178 + }, + { + "epoch": 1.5, + "learning_rate": 0.00015619725532815533, + "loss": 3.1641, + "step": 5179 + }, + { + "epoch": 1.5, + "learning_rate": 0.00015602696299873016, + "loss": 3.0884, + "step": 5180 + }, + { + "epoch": 1.5, + "learning_rate": 0.00015585674638627883, + "loss": 3.1131, + "step": 5181 + }, + { + "epoch": 1.5, + "learning_rate": 0.00015568660552827036, + "loss": 3.1779, + "step": 5182 + }, + { + "epoch": 1.5, + "learning_rate": 0.00015551654046215668, + "loss": 3.0806, + "step": 5183 + }, + { + "epoch": 1.5, + "learning_rate": 0.00015534655122537367, + "loss": 3.0105, + "step": 5184 + }, + { + "epoch": 1.5, + "learning_rate": 0.00015517663785533982, + "loss": 3.0174, + "step": 5185 + }, + { + "epoch": 1.5, + "learning_rate": 0.00015500680038945752, + "loss": 3.0993, + "step": 5186 + }, + { + "epoch": 1.5, + "learning_rate": 0.0001548370388651119, + "loss": 3.3187, + "step": 5187 + }, + { + "epoch": 1.5, + "learning_rate": 0.00015466735331967206, + "loss": 3.094, + "step": 5188 + }, + { + "epoch": 1.5, + "learning_rate": 0.0001544977437904897, + "loss": 3.1944, + "step": 5189 + }, + { + "epoch": 1.5, + "learning_rate": 0.0001543282103149001, + "loss": 3.2544, + "step": 5190 + }, + { + "epoch": 1.5, + "learning_rate": 0.0001541587529302218, + "loss": 3.1072, + "step": 5191 + }, + { + "epoch": 1.5, + "learning_rate": 0.00015398937167375676, + "loss": 3.1428, + "step": 5192 + }, + { + "epoch": 1.5, + "learning_rate": 0.00015382006658278986, + "loss": 3.0977, + "step": 5193 + }, + { + "epoch": 1.5, + "learning_rate": 0.0001536508376945891, + "loss": 3.0319, + "step": 5194 + }, + { + "epoch": 1.5, + "learning_rate": 0.0001534816850464063, + "loss": 3.1062, + "step": 5195 + }, + { + "epoch": 1.5, + "learning_rate": 0.0001533126086754758, + "loss": 3.1383, + "step": 5196 + }, + { + "epoch": 1.51, + "learning_rate": 0.0001531436086190159, + "loss": 3.2505, + "step": 5197 + }, + { + "epoch": 1.51, + "learning_rate": 0.00015297468491422722, + "loss": 3.1718, + "step": 5198 + }, + { + "epoch": 1.51, + "learning_rate": 0.00015280583759829435, + "loss": 3.265, + "step": 5199 + }, + { + "epoch": 1.51, + "learning_rate": 0.00015263706670838467, + "loss": 3.1504, + "step": 5200 + }, + { + "epoch": 1.51, + "learning_rate": 0.00015246837228164906, + "loss": 3.1196, + "step": 5201 + }, + { + "epoch": 1.51, + "learning_rate": 0.00015229975435522108, + "loss": 3.0123, + "step": 5202 + }, + { + "epoch": 1.51, + "learning_rate": 0.00015213121296621767, + "loss": 3.0216, + "step": 5203 + }, + { + "epoch": 1.51, + "learning_rate": 0.00015196274815173916, + "loss": 3.1087, + "step": 5204 + }, + { + "epoch": 1.51, + "learning_rate": 0.00015179435994886858, + "loss": 3.2677, + "step": 5205 + }, + { + "epoch": 1.51, + "learning_rate": 0.00015162604839467266, + "loss": 3.2803, + "step": 5206 + }, + { + "epoch": 1.51, + "learning_rate": 0.00015145781352620053, + "loss": 3.0644, + "step": 5207 + }, + { + "epoch": 1.51, + "learning_rate": 0.00015128965538048505, + "loss": 3.3286, + "step": 5208 + }, + { + "epoch": 1.51, + "learning_rate": 0.00015112157399454207, + "loss": 3.2309, + "step": 5209 + }, + { + "epoch": 1.51, + "learning_rate": 0.00015095356940537054, + "loss": 3.03, + "step": 5210 + }, + { + "epoch": 1.51, + "learning_rate": 0.00015078564164995223, + "loss": 3.0838, + "step": 5211 + }, + { + "epoch": 1.51, + "learning_rate": 0.0001506177907652521, + "loss": 3.1295, + "step": 5212 + }, + { + "epoch": 1.51, + "learning_rate": 0.00015045001678821863, + "loss": 3.182, + "step": 5213 + }, + { + "epoch": 1.51, + "learning_rate": 0.0001502823197557826, + "loss": 3.2146, + "step": 5214 + }, + { + "epoch": 1.51, + "learning_rate": 0.00015011469970485853, + "loss": 3.2906, + "step": 5215 + }, + { + "epoch": 1.51, + "learning_rate": 0.00014994715667234366, + "loss": 3.1762, + "step": 5216 + }, + { + "epoch": 1.51, + "learning_rate": 0.0001497796906951186, + "loss": 3.1908, + "step": 5217 + }, + { + "epoch": 1.51, + "learning_rate": 0.0001496123018100463, + "loss": 3.0245, + "step": 5218 + }, + { + "epoch": 1.51, + "learning_rate": 0.00014944499005397371, + "loss": 3.162, + "step": 5219 + }, + { + "epoch": 1.51, + "learning_rate": 0.00014927775546372994, + "loss": 3.0931, + "step": 5220 + }, + { + "epoch": 1.51, + "learning_rate": 0.00014911059807612743, + "loss": 3.0925, + "step": 5221 + }, + { + "epoch": 1.51, + "learning_rate": 0.0001489435179279619, + "loss": 3.0957, + "step": 5222 + }, + { + "epoch": 1.51, + "learning_rate": 0.00014877651505601158, + "loss": 3.0923, + "step": 5223 + }, + { + "epoch": 1.51, + "learning_rate": 0.000148609589497038, + "loss": 3.1732, + "step": 5224 + }, + { + "epoch": 1.51, + "learning_rate": 0.00014844274128778562, + "loss": 3.0558, + "step": 5225 + }, + { + "epoch": 1.51, + "learning_rate": 0.0001482759704649821, + "loss": 3.292, + "step": 5226 + }, + { + "epoch": 1.51, + "learning_rate": 0.0001481092770653374, + "loss": 3.1974, + "step": 5227 + }, + { + "epoch": 1.51, + "learning_rate": 0.0001479426611255453, + "loss": 3.253, + "step": 5228 + }, + { + "epoch": 1.51, + "learning_rate": 0.00014777612268228175, + "loss": 3.1121, + "step": 5229 + }, + { + "epoch": 1.51, + "learning_rate": 0.00014760966177220598, + "loss": 3.0561, + "step": 5230 + }, + { + "epoch": 1.51, + "learning_rate": 0.00014744327843196043, + "loss": 3.1365, + "step": 5231 + }, + { + "epoch": 1.52, + "learning_rate": 0.00014727697269816987, + "loss": 3.2947, + "step": 5232 + }, + { + "epoch": 1.52, + "learning_rate": 0.00014711074460744246, + "loss": 3.1352, + "step": 5233 + }, + { + "epoch": 1.52, + "learning_rate": 0.00014694459419636906, + "loss": 3.3585, + "step": 5234 + }, + { + "epoch": 1.52, + "learning_rate": 0.00014677852150152376, + "loss": 3.2597, + "step": 5235 + }, + { + "epoch": 1.52, + "learning_rate": 0.00014661252655946285, + "loss": 3.0481, + "step": 5236 + }, + { + "epoch": 1.52, + "learning_rate": 0.00014644660940672628, + "loss": 3.0324, + "step": 5237 + }, + { + "epoch": 1.52, + "learning_rate": 0.00014628077007983637, + "loss": 3.287, + "step": 5238 + }, + { + "epoch": 1.52, + "learning_rate": 0.00014611500861529825, + "loss": 3.145, + "step": 5239 + }, + { + "epoch": 1.52, + "learning_rate": 0.00014594932504960053, + "loss": 3.1928, + "step": 5240 + }, + { + "epoch": 1.52, + "learning_rate": 0.00014578371941921381, + "loss": 3.1074, + "step": 5241 + }, + { + "epoch": 1.52, + "learning_rate": 0.00014561819176059227, + "loss": 3.1368, + "step": 5242 + }, + { + "epoch": 1.52, + "learning_rate": 0.00014545274211017262, + "loss": 3.1952, + "step": 5243 + }, + { + "epoch": 1.52, + "learning_rate": 0.00014528737050437452, + "loss": 2.9415, + "step": 5244 + }, + { + "epoch": 1.52, + "learning_rate": 0.00014512207697960005, + "loss": 3.1715, + "step": 5245 + }, + { + "epoch": 1.52, + "learning_rate": 0.00014495686157223477, + "loss": 3.1022, + "step": 5246 + }, + { + "epoch": 1.52, + "learning_rate": 0.00014479172431864645, + "loss": 3.1012, + "step": 5247 + }, + { + "epoch": 1.52, + "learning_rate": 0.00014462666525518576, + "loss": 3.1904, + "step": 5248 + }, + { + "epoch": 1.52, + "learning_rate": 0.00014446168441818665, + "loss": 3.1091, + "step": 5249 + }, + { + "epoch": 1.52, + "learning_rate": 0.00014429678184396506, + "loss": 3.257, + "step": 5250 + }, + { + "epoch": 1.52, + "learning_rate": 0.00014413195756882036, + "loss": 3.1502, + "step": 5251 + }, + { + "epoch": 1.52, + "learning_rate": 0.00014396721162903442, + "loss": 3.169, + "step": 5252 + }, + { + "epoch": 1.52, + "learning_rate": 0.00014380254406087207, + "loss": 3.1429, + "step": 5253 + }, + { + "epoch": 1.52, + "learning_rate": 0.0001436379549005804, + "loss": 3.1733, + "step": 5254 + }, + { + "epoch": 1.52, + "learning_rate": 0.0001434734441843899, + "loss": 3.2021, + "step": 5255 + }, + { + "epoch": 1.52, + "learning_rate": 0.0001433090119485132, + "loss": 3.1254, + "step": 5256 + }, + { + "epoch": 1.52, + "learning_rate": 0.00014314465822914585, + "loss": 3.1646, + "step": 5257 + }, + { + "epoch": 1.52, + "learning_rate": 0.00014298038306246642, + "loss": 3.0537, + "step": 5258 + }, + { + "epoch": 1.52, + "learning_rate": 0.00014281618648463563, + "loss": 3.2739, + "step": 5259 + }, + { + "epoch": 1.52, + "learning_rate": 0.00014265206853179735, + "loss": 3.1048, + "step": 5260 + }, + { + "epoch": 1.52, + "learning_rate": 0.00014248802924007802, + "loss": 3.1322, + "step": 5261 + }, + { + "epoch": 1.52, + "learning_rate": 0.00014232406864558695, + "loss": 3.2404, + "step": 5262 + }, + { + "epoch": 1.52, + "learning_rate": 0.00014216018678441555, + "loss": 3.1883, + "step": 5263 + }, + { + "epoch": 1.52, + "learning_rate": 0.00014199638369263856, + "loss": 3.0812, + "step": 5264 + }, + { + "epoch": 1.52, + "learning_rate": 0.00014183265940631302, + "loss": 3.0961, + "step": 5265 + }, + { + "epoch": 1.53, + "learning_rate": 0.00014166901396147851, + "loss": 3.1166, + "step": 5266 + }, + { + "epoch": 1.53, + "learning_rate": 0.00014150544739415755, + "loss": 3.1536, + "step": 5267 + }, + { + "epoch": 1.53, + "learning_rate": 0.00014134195974035524, + "loss": 3.0609, + "step": 5268 + }, + { + "epoch": 1.53, + "learning_rate": 0.00014117855103605936, + "loss": 3.0596, + "step": 5269 + }, + { + "epoch": 1.53, + "learning_rate": 0.00014101522131723993, + "loss": 3.2519, + "step": 5270 + }, + { + "epoch": 1.53, + "learning_rate": 0.00014085197061985022, + "loss": 3.3013, + "step": 5271 + }, + { + "epoch": 1.53, + "learning_rate": 0.00014068879897982534, + "loss": 3.1412, + "step": 5272 + }, + { + "epoch": 1.53, + "learning_rate": 0.00014052570643308376, + "loss": 3.1466, + "step": 5273 + }, + { + "epoch": 1.53, + "learning_rate": 0.00014036269301552606, + "loss": 3.2268, + "step": 5274 + }, + { + "epoch": 1.53, + "learning_rate": 0.00014019975876303532, + "loss": 3.2264, + "step": 5275 + }, + { + "epoch": 1.53, + "learning_rate": 0.00014003690371147754, + "loss": 3.1332, + "step": 5276 + }, + { + "epoch": 1.53, + "learning_rate": 0.00013987412789670124, + "loss": 3.1265, + "step": 5277 + }, + { + "epoch": 1.53, + "learning_rate": 0.00013971143135453745, + "loss": 3.1487, + "step": 5278 + }, + { + "epoch": 1.53, + "learning_rate": 0.00013954881412079945, + "loss": 3.0509, + "step": 5279 + }, + { + "epoch": 1.53, + "learning_rate": 0.00013938627623128364, + "loss": 3.1947, + "step": 5280 + }, + { + "epoch": 1.53, + "learning_rate": 0.0001392238177217683, + "loss": 3.2064, + "step": 5281 + }, + { + "epoch": 1.53, + "learning_rate": 0.00013906143862801485, + "loss": 3.1461, + "step": 5282 + }, + { + "epoch": 1.53, + "learning_rate": 0.00013889913898576677, + "loss": 3.1508, + "step": 5283 + }, + { + "epoch": 1.53, + "learning_rate": 0.00013873691883075018, + "loss": 2.9904, + "step": 5284 + }, + { + "epoch": 1.53, + "learning_rate": 0.0001385747781986738, + "loss": 3.0992, + "step": 5285 + }, + { + "epoch": 1.53, + "learning_rate": 0.00013841271712522886, + "loss": 3.2263, + "step": 5286 + }, + { + "epoch": 1.53, + "learning_rate": 0.00013825073564608907, + "loss": 3.1719, + "step": 5287 + }, + { + "epoch": 1.53, + "learning_rate": 0.0001380888337969104, + "loss": 3.0805, + "step": 5288 + }, + { + "epoch": 1.53, + "learning_rate": 0.00013792701161333164, + "loss": 3.124, + "step": 5289 + }, + { + "epoch": 1.53, + "learning_rate": 0.0001377652691309736, + "loss": 3.1584, + "step": 5290 + }, + { + "epoch": 1.53, + "learning_rate": 0.00013760360638544013, + "loss": 3.0419, + "step": 5291 + }, + { + "epoch": 1.53, + "learning_rate": 0.00013744202341231675, + "loss": 3.0817, + "step": 5292 + }, + { + "epoch": 1.53, + "learning_rate": 0.00013728052024717237, + "loss": 3.176, + "step": 5293 + }, + { + "epoch": 1.53, + "learning_rate": 0.0001371190969255574, + "loss": 3.2068, + "step": 5294 + }, + { + "epoch": 1.53, + "learning_rate": 0.00013695775348300532, + "loss": 3.1377, + "step": 5295 + }, + { + "epoch": 1.53, + "learning_rate": 0.0001367964899550319, + "loss": 3.1348, + "step": 5296 + }, + { + "epoch": 1.53, + "learning_rate": 0.000136635306377135, + "loss": 3.2, + "step": 5297 + }, + { + "epoch": 1.53, + "learning_rate": 0.0001364742027847954, + "loss": 3.1097, + "step": 5298 + }, + { + "epoch": 1.53, + "learning_rate": 0.00013631317921347562, + "loss": 3.1914, + "step": 5299 + }, + { + "epoch": 1.53, + "learning_rate": 0.00013615223569862135, + "loss": 3.2135, + "step": 5300 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013599137227565984, + "loss": 3.1536, + "step": 5301 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013583058898000139, + "loss": 3.0547, + "step": 5302 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013566988584703816, + "loss": 3.1536, + "step": 5303 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013550926291214498, + "loss": 3.1743, + "step": 5304 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013534872021067913, + "loss": 3.1049, + "step": 5305 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013518825777797971, + "loss": 3.0741, + "step": 5306 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013502787564936874, + "loss": 3.2195, + "step": 5307 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013486757386015004, + "loss": 3.0371, + "step": 5308 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013470735244561026, + "loss": 3.129, + "step": 5309 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013454721144101782, + "loss": 3.0907, + "step": 5310 + }, + { + "epoch": 1.54, + "learning_rate": 0.000134387150881624, + "loss": 3.2764, + "step": 5311 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013422717080266178, + "loss": 3.1031, + "step": 5312 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013406727123934693, + "loss": 3.288, + "step": 5313 + }, + { + "epoch": 1.54, + "learning_rate": 0.0001339074522268774, + "loss": 3.2172, + "step": 5314 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013374771380043305, + "loss": 3.1777, + "step": 5315 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013358805599517654, + "loss": 3.1742, + "step": 5316 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013342847884625214, + "loss": 2.9946, + "step": 5317 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013326898238878715, + "loss": 3.1068, + "step": 5318 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013310956665789025, + "loss": 3.1322, + "step": 5319 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013295023168865305, + "loss": 3.0843, + "step": 5320 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013279097751614928, + "loss": 3.0899, + "step": 5321 + }, + { + "epoch": 1.54, + "learning_rate": 0.0001326318041754343, + "loss": 3.1408, + "step": 5322 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013247271170154645, + "loss": 3.166, + "step": 5323 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013231370012950567, + "loss": 3.1185, + "step": 5324 + }, + { + "epoch": 1.54, + "learning_rate": 0.0001321547694943146, + "loss": 3.1159, + "step": 5325 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013199591983095748, + "loss": 3.2484, + "step": 5326 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013183715117440144, + "loss": 3.2348, + "step": 5327 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013167846355959502, + "loss": 3.1438, + "step": 5328 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013151985702146942, + "loss": 3.1333, + "step": 5329 + }, + { + "epoch": 1.54, + "learning_rate": 0.000131361331594938, + "loss": 3.0621, + "step": 5330 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013120288731489592, + "loss": 3.2499, + "step": 5331 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013104452421622092, + "loss": 3.0724, + "step": 5332 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013088624233377239, + "loss": 3.0089, + "step": 5333 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013072804170239228, + "loss": 3.1741, + "step": 5334 + }, + { + "epoch": 1.55, + "learning_rate": 0.0001305699223569043, + "loss": 3.0532, + "step": 5335 + }, + { + "epoch": 1.55, + "learning_rate": 0.0001304118843321146, + "loss": 3.2063, + "step": 5336 + }, + { + "epoch": 1.55, + "learning_rate": 0.00013025392766281103, + "loss": 3.1054, + "step": 5337 + }, + { + "epoch": 1.55, + "learning_rate": 0.00013009605238376387, + "loss": 3.1746, + "step": 5338 + }, + { + "epoch": 1.55, + "learning_rate": 0.00012993825852972558, + "loss": 3.2553, + "step": 5339 + }, + { + "epoch": 1.55, + "learning_rate": 0.00012978054613543017, + "loss": 3.2672, + "step": 5340 + }, + { + "epoch": 1.55, + "learning_rate": 0.00012962291523559426, + "loss": 3.104, + "step": 5341 + }, + { + "epoch": 1.55, + "learning_rate": 0.00012946536586491618, + "loss": 3.1966, + "step": 5342 + }, + { + "epoch": 1.55, + "learning_rate": 0.00012930789805807658, + "loss": 3.1915, + "step": 5343 + }, + { + "epoch": 1.55, + "learning_rate": 0.0001291505118497378, + "loss": 3.1312, + "step": 5344 + }, + { + "epoch": 1.55, + "learning_rate": 0.0001289932072745447, + "loss": 3.1569, + "step": 5345 + }, + { + "epoch": 1.55, + "learning_rate": 0.00012883598436712367, + "loss": 3.0799, + "step": 5346 + }, + { + "epoch": 1.55, + "learning_rate": 0.00012867884316208344, + "loss": 3.0748, + "step": 5347 + }, + { + "epoch": 1.55, + "learning_rate": 0.00012852178369401486, + "loss": 3.1858, + "step": 5348 + }, + { + "epoch": 1.55, + "learning_rate": 0.00012836480599749027, + "loss": 3.1225, + "step": 5349 + }, + { + "epoch": 1.55, + "learning_rate": 0.00012820791010706472, + "loss": 3.0755, + "step": 5350 + }, + { + "epoch": 1.55, + "learning_rate": 0.0001280510960572745, + "loss": 3.0321, + "step": 5351 + }, + { + "epoch": 1.55, + "learning_rate": 0.0001278943638826386, + "loss": 3.1258, + "step": 5352 + }, + { + "epoch": 1.55, + "learning_rate": 0.00012773771361765734, + "loss": 3.0121, + "step": 5353 + }, + { + "epoch": 1.55, + "learning_rate": 0.00012758114529681357, + "loss": 3.2094, + "step": 5354 + }, + { + "epoch": 1.55, + "learning_rate": 0.00012742465895457166, + "loss": 3.1055, + "step": 5355 + }, + { + "epoch": 1.55, + "learning_rate": 0.00012726825462537822, + "loss": 3.2514, + "step": 5356 + }, + { + "epoch": 1.55, + "learning_rate": 0.0001271119323436618, + "loss": 3.0504, + "step": 5357 + }, + { + "epoch": 1.55, + "learning_rate": 0.00012695569214383256, + "loss": 3.0434, + "step": 5358 + }, + { + "epoch": 1.55, + "learning_rate": 0.00012679953406028317, + "loss": 3.1434, + "step": 5359 + }, + { + "epoch": 1.55, + "learning_rate": 0.00012664345812738752, + "loss": 3.1578, + "step": 5360 + }, + { + "epoch": 1.55, + "learning_rate": 0.00012648746437950208, + "loss": 3.0658, + "step": 5361 + }, + { + "epoch": 1.55, + "learning_rate": 0.0001263315528509647, + "loss": 3.0547, + "step": 5362 + }, + { + "epoch": 1.55, + "learning_rate": 0.00012617572357609562, + "loss": 3.1036, + "step": 5363 + }, + { + "epoch": 1.55, + "learning_rate": 0.00012601997658919645, + "loss": 3.1853, + "step": 5364 + }, + { + "epoch": 1.55, + "learning_rate": 0.000125864311924551, + "loss": 3.1244, + "step": 5365 + }, + { + "epoch": 1.55, + "learning_rate": 0.0001257087296164252, + "loss": 3.211, + "step": 5366 + }, + { + "epoch": 1.55, + "learning_rate": 0.00012555322969906617, + "loss": 2.9349, + "step": 5367 + }, + { + "epoch": 1.55, + "learning_rate": 0.00012539781220670366, + "loss": 3.3625, + "step": 5368 + }, + { + "epoch": 1.55, + "learning_rate": 0.00012524247717354852, + "loss": 3.1634, + "step": 5369 + }, + { + "epoch": 1.56, + "learning_rate": 0.00012508722463379425, + "loss": 3.0458, + "step": 5370 + }, + { + "epoch": 1.56, + "learning_rate": 0.00012493205462161533, + "loss": 3.073, + "step": 5371 + }, + { + "epoch": 1.56, + "learning_rate": 0.00012477696717116875, + "loss": 3.1837, + "step": 5372 + }, + { + "epoch": 1.56, + "learning_rate": 0.0001246219623165931, + "loss": 3.2856, + "step": 5373 + }, + { + "epoch": 1.56, + "learning_rate": 0.00012446704009200894, + "loss": 3.2806, + "step": 5374 + }, + { + "epoch": 1.56, + "learning_rate": 0.00012431220053151832, + "loss": 3.0416, + "step": 5375 + }, + { + "epoch": 1.56, + "learning_rate": 0.00012415744366920506, + "loss": 3.0895, + "step": 5376 + }, + { + "epoch": 1.56, + "learning_rate": 0.0001240027695391353, + "loss": 3.1812, + "step": 5377 + }, + { + "epoch": 1.56, + "learning_rate": 0.00012384817817535638, + "loss": 3.1306, + "step": 5378 + }, + { + "epoch": 1.56, + "learning_rate": 0.000123693669611898, + "loss": 2.9844, + "step": 5379 + }, + { + "epoch": 1.56, + "learning_rate": 0.00012353924388277092, + "loss": 3.1007, + "step": 5380 + }, + { + "epoch": 1.56, + "learning_rate": 0.00012338490102196825, + "loss": 3.0952, + "step": 5381 + }, + { + "epoch": 1.56, + "learning_rate": 0.00012323064106346476, + "loss": 3.2096, + "step": 5382 + }, + { + "epoch": 1.56, + "learning_rate": 0.00012307646404121692, + "loss": 3.05, + "step": 5383 + }, + { + "epoch": 1.56, + "learning_rate": 0.00012292236998916263, + "loss": 3.1101, + "step": 5384 + }, + { + "epoch": 1.56, + "learning_rate": 0.00012276835894122214, + "loss": 3.0353, + "step": 5385 + }, + { + "epoch": 1.56, + "learning_rate": 0.00012261443093129691, + "loss": 3.0664, + "step": 5386 + }, + { + "epoch": 1.56, + "learning_rate": 0.0001224605859932702, + "loss": 3.0404, + "step": 5387 + }, + { + "epoch": 1.56, + "learning_rate": 0.0001224605859932702, + "loss": 3.0942, + "step": 5388 + }, + { + "epoch": 1.56, + "learning_rate": 0.0001223068241610073, + "loss": 3.2222, + "step": 5389 + }, + { + "epoch": 1.56, + "learning_rate": 0.00012215314546835482, + "loss": 3.1991, + "step": 5390 + }, + { + "epoch": 1.56, + "learning_rate": 0.00012199954994914125, + "loss": 3.2577, + "step": 5391 + }, + { + "epoch": 1.56, + "learning_rate": 0.00012184603763717683, + "loss": 3.1187, + "step": 5392 + }, + { + "epoch": 1.56, + "learning_rate": 0.00012169260856625358, + "loss": 3.0335, + "step": 5393 + }, + { + "epoch": 1.56, + "learning_rate": 0.0001215392627701447, + "loss": 3.0217, + "step": 5394 + }, + { + "epoch": 1.56, + "learning_rate": 0.00012138600028260577, + "loss": 3.2296, + "step": 5395 + }, + { + "epoch": 1.56, + "learning_rate": 0.00012123282113737338, + "loss": 3.0723, + "step": 5396 + }, + { + "epoch": 1.56, + "learning_rate": 0.00012107972536816597, + "loss": 3.1711, + "step": 5397 + }, + { + "epoch": 1.56, + "learning_rate": 0.00012092671300868402, + "loss": 3.0709, + "step": 5398 + }, + { + "epoch": 1.56, + "learning_rate": 0.00012077378409260903, + "loss": 3.1338, + "step": 5399 + }, + { + "epoch": 1.56, + "learning_rate": 0.00012062093865360457, + "loss": 3.1171, + "step": 5400 + }, + { + "epoch": 1.56, + "learning_rate": 0.00012046817672531568, + "loss": 3.1967, + "step": 5401 + }, + { + "epoch": 1.56, + "learning_rate": 0.00012031549834136923, + "loss": 3.0787, + "step": 5402 + }, + { + "epoch": 1.56, + "learning_rate": 0.00012016290353537318, + "loss": 2.9692, + "step": 5403 + }, + { + "epoch": 1.57, + "learning_rate": 0.0001200103923409177, + "loss": 3.1746, + "step": 5404 + }, + { + "epoch": 1.57, + "learning_rate": 0.00011985796479157424, + "loss": 3.1496, + "step": 5405 + }, + { + "epoch": 1.57, + "learning_rate": 0.0001197056209208956, + "loss": 3.1575, + "step": 5406 + }, + { + "epoch": 1.57, + "learning_rate": 0.00011955336076241679, + "loss": 3.1932, + "step": 5407 + }, + { + "epoch": 1.57, + "learning_rate": 0.0001194011843496537, + "loss": 3.0708, + "step": 5408 + }, + { + "epoch": 1.57, + "learning_rate": 0.00011924909171610433, + "loss": 3.0858, + "step": 5409 + }, + { + "epoch": 1.57, + "learning_rate": 0.00011909708289524801, + "loss": 3.1443, + "step": 5410 + }, + { + "epoch": 1.57, + "learning_rate": 0.00011894515792054572, + "loss": 3.0889, + "step": 5411 + }, + { + "epoch": 1.57, + "learning_rate": 0.0001187933168254397, + "loss": 3.1304, + "step": 5412 + }, + { + "epoch": 1.57, + "learning_rate": 0.00011864155964335422, + "loss": 3.2459, + "step": 5413 + }, + { + "epoch": 1.57, + "learning_rate": 0.0001184898864076946, + "loss": 3.1676, + "step": 5414 + }, + { + "epoch": 1.57, + "learning_rate": 0.00011833829715184774, + "loss": 3.1648, + "step": 5415 + }, + { + "epoch": 1.57, + "learning_rate": 0.00011818679190918247, + "loss": 3.0778, + "step": 5416 + }, + { + "epoch": 1.57, + "learning_rate": 0.00011803537071304855, + "loss": 3.0615, + "step": 5417 + }, + { + "epoch": 1.57, + "learning_rate": 0.00011788403359677769, + "loss": 3.3043, + "step": 5418 + }, + { + "epoch": 1.57, + "learning_rate": 0.00011773278059368292, + "loss": 3.3182, + "step": 5419 + }, + { + "epoch": 1.57, + "learning_rate": 0.00011758161173705894, + "loss": 3.0587, + "step": 5420 + }, + { + "epoch": 1.57, + "learning_rate": 0.00011743052706018142, + "loss": 3.1818, + "step": 5421 + }, + { + "epoch": 1.57, + "learning_rate": 0.0001172795265963082, + "loss": 3.0737, + "step": 5422 + }, + { + "epoch": 1.57, + "learning_rate": 0.00011712861037867795, + "loss": 3.0625, + "step": 5423 + }, + { + "epoch": 1.57, + "learning_rate": 0.00011697777844051105, + "loss": 3.1656, + "step": 5424 + }, + { + "epoch": 1.57, + "learning_rate": 0.0001168270308150094, + "loss": 3.0614, + "step": 5425 + }, + { + "epoch": 1.57, + "learning_rate": 0.00011667636753535632, + "loss": 3.2706, + "step": 5426 + }, + { + "epoch": 1.57, + "learning_rate": 0.00011652578863471664, + "loss": 3.0948, + "step": 5427 + }, + { + "epoch": 1.57, + "learning_rate": 0.00011637529414623621, + "loss": 3.1239, + "step": 5428 + }, + { + "epoch": 1.57, + "learning_rate": 0.00011622488410304288, + "loss": 3.0686, + "step": 5429 + }, + { + "epoch": 1.57, + "learning_rate": 0.00011607455853824539, + "loss": 3.053, + "step": 5430 + }, + { + "epoch": 1.57, + "learning_rate": 0.00011592431748493432, + "loss": 3.1994, + "step": 5431 + }, + { + "epoch": 1.57, + "learning_rate": 0.00011577416097618138, + "loss": 3.2508, + "step": 5432 + }, + { + "epoch": 1.57, + "learning_rate": 0.00011562408904503952, + "loss": 3.2567, + "step": 5433 + }, + { + "epoch": 1.57, + "learning_rate": 0.00011547410172454342, + "loss": 3.2176, + "step": 5434 + }, + { + "epoch": 1.57, + "learning_rate": 0.00011532419904770908, + "loss": 3.2951, + "step": 5435 + }, + { + "epoch": 1.57, + "learning_rate": 0.00011517438104753386, + "loss": 3.0762, + "step": 5436 + }, + { + "epoch": 1.57, + "learning_rate": 0.00011502464775699617, + "loss": 3.2401, + "step": 5437 + }, + { + "epoch": 1.57, + "learning_rate": 0.00011487499920905625, + "loss": 3.0554, + "step": 5438 + }, + { + "epoch": 1.58, + "learning_rate": 0.0001147254354366552, + "loss": 3.1294, + "step": 5439 + }, + { + "epoch": 1.58, + "learning_rate": 0.00011457595647271602, + "loss": 3.0859, + "step": 5440 + }, + { + "epoch": 1.58, + "learning_rate": 0.00011442656235014248, + "loss": 3.1748, + "step": 5441 + }, + { + "epoch": 1.58, + "learning_rate": 0.00011427725310181981, + "loss": 3.0671, + "step": 5442 + }, + { + "epoch": 1.58, + "learning_rate": 0.0001141280287606149, + "loss": 3.0693, + "step": 5443 + }, + { + "epoch": 1.58, + "learning_rate": 0.00011397888935937561, + "loss": 3.2975, + "step": 5444 + }, + { + "epoch": 1.58, + "learning_rate": 0.00011382983493093146, + "loss": 3.0962, + "step": 5445 + }, + { + "epoch": 1.58, + "learning_rate": 0.00011368086550809264, + "loss": 3.0998, + "step": 5446 + }, + { + "epoch": 1.58, + "learning_rate": 0.00011353198112365132, + "loss": 3.2048, + "step": 5447 + }, + { + "epoch": 1.58, + "learning_rate": 0.00011338318181038037, + "loss": 3.1093, + "step": 5448 + }, + { + "epoch": 1.58, + "learning_rate": 0.00011323446760103445, + "loss": 2.993, + "step": 5449 + }, + { + "epoch": 1.58, + "learning_rate": 0.00011308583852834914, + "loss": 3.2219, + "step": 5450 + }, + { + "epoch": 1.58, + "learning_rate": 0.00011293729462504115, + "loss": 3.0682, + "step": 5451 + }, + { + "epoch": 1.58, + "learning_rate": 0.0001127888359238089, + "loss": 3.1374, + "step": 5452 + }, + { + "epoch": 1.58, + "learning_rate": 0.00011264046245733178, + "loss": 3.1466, + "step": 5453 + }, + { + "epoch": 1.58, + "learning_rate": 0.00011249217425827063, + "loss": 3.083, + "step": 5454 + }, + { + "epoch": 1.58, + "learning_rate": 0.00011234397135926705, + "loss": 3.1164, + "step": 5455 + }, + { + "epoch": 1.58, + "learning_rate": 0.00011219585379294444, + "loss": 3.1787, + "step": 5456 + }, + { + "epoch": 1.58, + "learning_rate": 0.00011204782159190685, + "loss": 3.2134, + "step": 5457 + }, + { + "epoch": 1.58, + "learning_rate": 0.00011189987478874014, + "loss": 3.0724, + "step": 5458 + }, + { + "epoch": 1.58, + "learning_rate": 0.00011175201341601088, + "loss": 3.1622, + "step": 5459 + }, + { + "epoch": 1.58, + "learning_rate": 0.00011160423750626692, + "loss": 3.2536, + "step": 5460 + }, + { + "epoch": 1.58, + "learning_rate": 0.00011145654709203746, + "loss": 3.1974, + "step": 5461 + }, + { + "epoch": 1.58, + "learning_rate": 0.00011130894220583293, + "loss": 3.0959, + "step": 5462 + }, + { + "epoch": 1.58, + "learning_rate": 0.00011116142288014486, + "loss": 3.1825, + "step": 5463 + }, + { + "epoch": 1.58, + "learning_rate": 0.00011101398914744565, + "loss": 3.0801, + "step": 5464 + }, + { + "epoch": 1.58, + "learning_rate": 0.0001108666410401894, + "loss": 3.0428, + "step": 5465 + }, + { + "epoch": 1.58, + "learning_rate": 0.00011071937859081077, + "loss": 3.0085, + "step": 5466 + }, + { + "epoch": 1.58, + "learning_rate": 0.00011057220183172623, + "loss": 2.9627, + "step": 5467 + }, + { + "epoch": 1.58, + "learning_rate": 0.00011042511079533273, + "loss": 3.2802, + "step": 5468 + }, + { + "epoch": 1.58, + "learning_rate": 0.00011027810551400868, + "loss": 3.1886, + "step": 5469 + }, + { + "epoch": 1.58, + "learning_rate": 0.00011013118602011357, + "loss": 3.0375, + "step": 5470 + }, + { + "epoch": 1.58, + "learning_rate": 0.00010998435234598814, + "loss": 3.2455, + "step": 5471 + }, + { + "epoch": 1.58, + "learning_rate": 0.00010983760452395414, + "loss": 3.0744, + "step": 5472 + }, + { + "epoch": 1.59, + "learning_rate": 0.00010969094258631418, + "loss": 3.283, + "step": 5473 + }, + { + "epoch": 1.59, + "learning_rate": 0.00010954436656535249, + "loss": 3.1899, + "step": 5474 + }, + { + "epoch": 1.59, + "learning_rate": 0.00010939787649333372, + "loss": 3.2315, + "step": 5475 + }, + { + "epoch": 1.59, + "learning_rate": 0.00010925147240250427, + "loss": 3.1046, + "step": 5476 + }, + { + "epoch": 1.59, + "learning_rate": 0.00010910515432509104, + "loss": 3.1273, + "step": 5477 + }, + { + "epoch": 1.59, + "learning_rate": 0.0001089589222933024, + "loss": 3.1227, + "step": 5478 + }, + { + "epoch": 1.59, + "learning_rate": 0.00010881277633932779, + "loss": 3.1894, + "step": 5479 + }, + { + "epoch": 1.59, + "learning_rate": 0.00010866671649533722, + "loss": 3.1886, + "step": 5480 + }, + { + "epoch": 1.59, + "learning_rate": 0.00010852074279348235, + "loss": 3.1275, + "step": 5481 + }, + { + "epoch": 1.59, + "learning_rate": 0.00010837485526589535, + "loss": 3.0769, + "step": 5482 + }, + { + "epoch": 1.59, + "learning_rate": 0.00010822905394468995, + "loss": 3.0361, + "step": 5483 + }, + { + "epoch": 1.59, + "learning_rate": 0.00010808333886196037, + "loss": 3.137, + "step": 5484 + }, + { + "epoch": 1.59, + "learning_rate": 0.00010793771004978236, + "loss": 3.1749, + "step": 5485 + }, + { + "epoch": 1.59, + "learning_rate": 0.00010779216754021215, + "loss": 3.1162, + "step": 5486 + }, + { + "epoch": 1.59, + "learning_rate": 0.00010764671136528741, + "loss": 3.1732, + "step": 5487 + }, + { + "epoch": 1.59, + "learning_rate": 0.00010750134155702673, + "loss": 3.1083, + "step": 5488 + }, + { + "epoch": 1.59, + "learning_rate": 0.00010735605814742933, + "loss": 3.1178, + "step": 5489 + }, + { + "epoch": 1.59, + "learning_rate": 0.000107210861168476, + "loss": 3.1654, + "step": 5490 + }, + { + "epoch": 1.59, + "learning_rate": 0.00010706575065212792, + "loss": 3.0944, + "step": 5491 + }, + { + "epoch": 1.59, + "learning_rate": 0.0001069207266303277, + "loss": 3.1974, + "step": 5492 + }, + { + "epoch": 1.59, + "learning_rate": 0.00010677578913499852, + "loss": 3.0679, + "step": 5493 + }, + { + "epoch": 1.59, + "learning_rate": 0.00010663093819804493, + "loss": 3.1568, + "step": 5494 + }, + { + "epoch": 1.59, + "learning_rate": 0.00010648617385135196, + "loss": 3.1797, + "step": 5495 + }, + { + "epoch": 1.59, + "learning_rate": 0.0001063414961267859, + "loss": 3.0465, + "step": 5496 + }, + { + "epoch": 1.59, + "learning_rate": 0.00010619690505619417, + "loss": 3.1455, + "step": 5497 + }, + { + "epoch": 1.59, + "learning_rate": 0.00010605240067140443, + "loss": 3.1708, + "step": 5498 + }, + { + "epoch": 1.59, + "learning_rate": 0.00010590798300422599, + "loss": 3.1423, + "step": 5499 + }, + { + "epoch": 1.59, + "learning_rate": 0.0001057636520864485, + "loss": 3.0129, + "step": 5500 + }, + { + "epoch": 1.59, + "learning_rate": 0.00010561940794984298, + "loss": 3.0769, + "step": 5501 + }, + { + "epoch": 1.59, + "learning_rate": 0.00010547525062616093, + "loss": 3.2102, + "step": 5502 + }, + { + "epoch": 1.59, + "learning_rate": 0.00010533118014713522, + "loss": 3.1989, + "step": 5503 + }, + { + "epoch": 1.59, + "learning_rate": 0.00010518719654447894, + "loss": 3.2395, + "step": 5504 + }, + { + "epoch": 1.59, + "learning_rate": 0.00010504329984988676, + "loss": 3.0653, + "step": 5505 + }, + { + "epoch": 1.59, + "learning_rate": 0.00010489949009503385, + "loss": 3.1303, + "step": 5506 + }, + { + "epoch": 1.59, + "learning_rate": 0.0001047557673115761, + "loss": 3.1334, + "step": 5507 + }, + { + "epoch": 1.6, + "learning_rate": 0.00010461213153115079, + "loss": 3.02, + "step": 5508 + }, + { + "epoch": 1.6, + "learning_rate": 0.00010446858278537535, + "loss": 3.1165, + "step": 5509 + }, + { + "epoch": 1.6, + "learning_rate": 0.00010432512110584869, + "loss": 2.9439, + "step": 5510 + }, + { + "epoch": 1.6, + "learning_rate": 0.00010418174652415002, + "loss": 3.1927, + "step": 5511 + }, + { + "epoch": 1.6, + "learning_rate": 0.00010403845907183989, + "loss": 3.1363, + "step": 5512 + }, + { + "epoch": 1.6, + "learning_rate": 0.00010389525878045919, + "loss": 3.0676, + "step": 5513 + }, + { + "epoch": 1.6, + "learning_rate": 0.00010375214568152997, + "loss": 3.1188, + "step": 5514 + }, + { + "epoch": 1.6, + "learning_rate": 0.00010360911980655502, + "loss": 3.055, + "step": 5515 + }, + { + "epoch": 1.6, + "learning_rate": 0.00010346618118701768, + "loss": 3.0288, + "step": 5516 + }, + { + "epoch": 1.6, + "learning_rate": 0.00010332332985438247, + "loss": 3.2292, + "step": 5517 + }, + { + "epoch": 1.6, + "learning_rate": 0.00010318056584009433, + "loss": 3.0528, + "step": 5518 + }, + { + "epoch": 1.6, + "learning_rate": 0.00010303788917557938, + "loss": 3.0817, + "step": 5519 + }, + { + "epoch": 1.6, + "learning_rate": 0.000102895299892244, + "loss": 3.1629, + "step": 5520 + }, + { + "epoch": 1.6, + "learning_rate": 0.00010275279802147591, + "loss": 3.1764, + "step": 5521 + }, + { + "epoch": 1.6, + "learning_rate": 0.00010261038359464304, + "loss": 3.17, + "step": 5522 + }, + { + "epoch": 1.6, + "learning_rate": 0.00010246805664309439, + "loss": 3.0638, + "step": 5523 + }, + { + "epoch": 1.6, + "learning_rate": 0.00010232581719815982, + "loss": 3.0679, + "step": 5524 + }, + { + "epoch": 1.6, + "learning_rate": 0.00010218366529114948, + "loss": 3.1037, + "step": 5525 + }, + { + "epoch": 1.6, + "learning_rate": 0.0001020416009533548, + "loss": 3.0619, + "step": 5526 + }, + { + "epoch": 1.6, + "learning_rate": 0.00010189962421604738, + "loss": 3.1645, + "step": 5527 + }, + { + "epoch": 1.6, + "learning_rate": 0.00010175773511048009, + "loss": 3.0997, + "step": 5528 + }, + { + "epoch": 1.6, + "learning_rate": 0.00010161593366788596, + "loss": 3.2458, + "step": 5529 + }, + { + "epoch": 1.6, + "learning_rate": 0.00010147421991947914, + "loss": 3.2387, + "step": 5530 + }, + { + "epoch": 1.6, + "learning_rate": 0.00010133259389645427, + "loss": 3.2158, + "step": 5531 + }, + { + "epoch": 1.6, + "learning_rate": 0.00010119105562998699, + "loss": 3.0364, + "step": 5532 + }, + { + "epoch": 1.6, + "learning_rate": 0.00010104960515123307, + "loss": 3.1223, + "step": 5533 + }, + { + "epoch": 1.6, + "learning_rate": 0.00010090824249132929, + "loss": 3.3009, + "step": 5534 + }, + { + "epoch": 1.6, + "learning_rate": 0.00010076696768139326, + "loss": 3.0936, + "step": 5535 + }, + { + "epoch": 1.6, + "learning_rate": 0.00010062578075252282, + "loss": 3.1831, + "step": 5536 + }, + { + "epoch": 1.6, + "learning_rate": 0.00010048468173579695, + "loss": 3.1967, + "step": 5537 + }, + { + "epoch": 1.6, + "learning_rate": 0.00010034367066227474, + "loss": 3.0485, + "step": 5538 + }, + { + "epoch": 1.6, + "learning_rate": 0.00010020274756299641, + "loss": 3.2266, + "step": 5539 + }, + { + "epoch": 1.6, + "learning_rate": 0.00010006191246898255, + "loss": 3.1436, + "step": 5540 + }, + { + "epoch": 1.6, + "learning_rate": 9.992116541123464e-05, + "loss": 3.2538, + "step": 5541 + }, + { + "epoch": 1.61, + "learning_rate": 9.97805064207345e-05, + "loss": 3.0057, + "step": 5542 + }, + { + "epoch": 1.61, + "learning_rate": 9.963993552844436e-05, + "loss": 3.0449, + "step": 5543 + }, + { + "epoch": 1.61, + "learning_rate": 9.949945276530781e-05, + "loss": 3.1463, + "step": 5544 + }, + { + "epoch": 1.61, + "learning_rate": 9.935905816224817e-05, + "loss": 3.1752, + "step": 5545 + }, + { + "epoch": 1.61, + "learning_rate": 9.921875175017003e-05, + "loss": 3.0902, + "step": 5546 + }, + { + "epoch": 1.61, + "learning_rate": 9.907853355995817e-05, + "loss": 3.1715, + "step": 5547 + }, + { + "epoch": 1.61, + "learning_rate": 9.893840362247809e-05, + "loss": 3.0875, + "step": 5548 + }, + { + "epoch": 1.61, + "learning_rate": 9.879836196857595e-05, + "loss": 3.0317, + "step": 5549 + }, + { + "epoch": 1.61, + "learning_rate": 9.865840862907837e-05, + "loss": 3.1437, + "step": 5550 + }, + { + "epoch": 1.61, + "learning_rate": 9.851854363479257e-05, + "loss": 3.1242, + "step": 5551 + }, + { + "epoch": 1.61, + "learning_rate": 9.837876701650605e-05, + "loss": 3.0736, + "step": 5552 + }, + { + "epoch": 1.61, + "learning_rate": 9.823907880498744e-05, + "loss": 3.2624, + "step": 5553 + }, + { + "epoch": 1.61, + "learning_rate": 9.809947903098526e-05, + "loss": 3.1708, + "step": 5554 + }, + { + "epoch": 1.61, + "learning_rate": 9.795996772522919e-05, + "loss": 3.0965, + "step": 5555 + }, + { + "epoch": 1.61, + "learning_rate": 9.782054491842879e-05, + "loss": 2.9626, + "step": 5556 + }, + { + "epoch": 1.61, + "learning_rate": 9.768121064127461e-05, + "loss": 3.0937, + "step": 5557 + }, + { + "epoch": 1.61, + "learning_rate": 9.754196492443763e-05, + "loss": 3.1123, + "step": 5558 + }, + { + "epoch": 1.61, + "learning_rate": 9.740280779856936e-05, + "loss": 3.1006, + "step": 5559 + }, + { + "epoch": 1.61, + "learning_rate": 9.726373929430155e-05, + "loss": 3.1795, + "step": 5560 + }, + { + "epoch": 1.61, + "learning_rate": 9.712475944224658e-05, + "loss": 3.1755, + "step": 5561 + }, + { + "epoch": 1.61, + "learning_rate": 9.69858682729976e-05, + "loss": 3.047, + "step": 5562 + }, + { + "epoch": 1.61, + "learning_rate": 9.684706581712766e-05, + "loss": 3.2352, + "step": 5563 + }, + { + "epoch": 1.61, + "learning_rate": 9.670835210519091e-05, + "loss": 3.1047, + "step": 5564 + }, + { + "epoch": 1.61, + "learning_rate": 9.656972716772144e-05, + "loss": 3.0115, + "step": 5565 + }, + { + "epoch": 1.61, + "learning_rate": 9.643119103523413e-05, + "loss": 3.0072, + "step": 5566 + }, + { + "epoch": 1.61, + "learning_rate": 9.629274373822422e-05, + "loss": 3.131, + "step": 5567 + }, + { + "epoch": 1.61, + "learning_rate": 9.615438530716752e-05, + "loss": 3.1, + "step": 5568 + }, + { + "epoch": 1.61, + "learning_rate": 9.601611577251984e-05, + "loss": 3.0952, + "step": 5569 + }, + { + "epoch": 1.61, + "learning_rate": 9.587793516471805e-05, + "loss": 3.197, + "step": 5570 + }, + { + "epoch": 1.61, + "learning_rate": 9.573984351417897e-05, + "loss": 3.2412, + "step": 5571 + }, + { + "epoch": 1.61, + "learning_rate": 9.56018408512998e-05, + "loss": 3.1781, + "step": 5572 + }, + { + "epoch": 1.61, + "learning_rate": 9.546392720645868e-05, + "loss": 3.1386, + "step": 5573 + }, + { + "epoch": 1.61, + "learning_rate": 9.53261026100135e-05, + "loss": 3.1958, + "step": 5574 + }, + { + "epoch": 1.61, + "learning_rate": 9.518836709230299e-05, + "loss": 3.049, + "step": 5575 + }, + { + "epoch": 1.61, + "learning_rate": 9.505072068364623e-05, + "loss": 3.1813, + "step": 5576 + }, + { + "epoch": 1.62, + "learning_rate": 9.491316341434264e-05, + "loss": 3.0863, + "step": 5577 + }, + { + "epoch": 1.62, + "learning_rate": 9.477569531467172e-05, + "loss": 3.0816, + "step": 5578 + }, + { + "epoch": 1.62, + "learning_rate": 9.46383164148939e-05, + "loss": 3.2418, + "step": 5579 + }, + { + "epoch": 1.62, + "learning_rate": 9.450102674524952e-05, + "loss": 3.2126, + "step": 5580 + }, + { + "epoch": 1.62, + "learning_rate": 9.43638263359593e-05, + "loss": 2.9755, + "step": 5581 + }, + { + "epoch": 1.62, + "learning_rate": 9.422671521722459e-05, + "loss": 3.0607, + "step": 5582 + }, + { + "epoch": 1.62, + "learning_rate": 9.408969341922696e-05, + "loss": 3.0805, + "step": 5583 + }, + { + "epoch": 1.62, + "learning_rate": 9.395276097212841e-05, + "loss": 3.1012, + "step": 5584 + }, + { + "epoch": 1.62, + "learning_rate": 9.381591790607081e-05, + "loss": 3.2185, + "step": 5585 + }, + { + "epoch": 1.62, + "learning_rate": 9.367916425117712e-05, + "loss": 3.2632, + "step": 5586 + }, + { + "epoch": 1.62, + "learning_rate": 9.354250003754983e-05, + "loss": 3.08, + "step": 5587 + }, + { + "epoch": 1.62, + "learning_rate": 9.340592529527236e-05, + "loss": 3.011, + "step": 5588 + }, + { + "epoch": 1.62, + "learning_rate": 9.326944005440819e-05, + "loss": 3.0774, + "step": 5589 + }, + { + "epoch": 1.62, + "learning_rate": 9.31330443450008e-05, + "loss": 3.0464, + "step": 5590 + }, + { + "epoch": 1.62, + "learning_rate": 9.299673819707454e-05, + "loss": 3.1466, + "step": 5591 + }, + { + "epoch": 1.62, + "learning_rate": 9.286052164063369e-05, + "loss": 3.0501, + "step": 5592 + }, + { + "epoch": 1.62, + "learning_rate": 9.272439470566301e-05, + "loss": 3.0369, + "step": 5593 + }, + { + "epoch": 1.62, + "learning_rate": 9.258835742212718e-05, + "loss": 3.0716, + "step": 5594 + }, + { + "epoch": 1.62, + "learning_rate": 9.245240981997155e-05, + "loss": 3.1176, + "step": 5595 + }, + { + "epoch": 1.62, + "learning_rate": 9.231655192912136e-05, + "loss": 3.1047, + "step": 5596 + }, + { + "epoch": 1.62, + "learning_rate": 9.218078377948257e-05, + "loss": 3.1662, + "step": 5597 + }, + { + "epoch": 1.62, + "learning_rate": 9.204510540094096e-05, + "loss": 3.0296, + "step": 5598 + }, + { + "epoch": 1.62, + "learning_rate": 9.190951682336251e-05, + "loss": 3.1674, + "step": 5599 + }, + { + "epoch": 1.62, + "learning_rate": 9.177401807659386e-05, + "loss": 3.1993, + "step": 5600 + }, + { + "epoch": 1.62, + "learning_rate": 9.163860919046152e-05, + "loss": 3.0883, + "step": 5601 + }, + { + "epoch": 1.62, + "learning_rate": 9.150329019477254e-05, + "loss": 3.1375, + "step": 5602 + }, + { + "epoch": 1.62, + "learning_rate": 9.136806111931367e-05, + "loss": 3.0646, + "step": 5603 + }, + { + "epoch": 1.62, + "learning_rate": 9.123292199385246e-05, + "loss": 3.1663, + "step": 5604 + }, + { + "epoch": 1.62, + "learning_rate": 9.109787284813615e-05, + "loss": 3.2081, + "step": 5605 + }, + { + "epoch": 1.62, + "learning_rate": 9.096291371189258e-05, + "loss": 3.3023, + "step": 5606 + }, + { + "epoch": 1.62, + "learning_rate": 9.082804461482952e-05, + "loss": 3.2478, + "step": 5607 + }, + { + "epoch": 1.62, + "learning_rate": 9.069326558663487e-05, + "loss": 3.1789, + "step": 5608 + }, + { + "epoch": 1.62, + "learning_rate": 9.055857665697693e-05, + "loss": 3.1448, + "step": 5609 + }, + { + "epoch": 1.62, + "learning_rate": 9.042397785550405e-05, + "loss": 3.1976, + "step": 5610 + }, + { + "epoch": 1.62, + "learning_rate": 9.028946921184494e-05, + "loss": 3.0925, + "step": 5611 + }, + { + "epoch": 1.63, + "learning_rate": 9.015505075560798e-05, + "loss": 3.1302, + "step": 5612 + }, + { + "epoch": 1.63, + "learning_rate": 9.002072251638221e-05, + "loss": 3.2994, + "step": 5613 + }, + { + "epoch": 1.63, + "learning_rate": 8.98864845237365e-05, + "loss": 3.2657, + "step": 5614 + }, + { + "epoch": 1.63, + "learning_rate": 8.975233680722e-05, + "loss": 3.0787, + "step": 5615 + }, + { + "epoch": 1.63, + "learning_rate": 8.961827939636197e-05, + "loss": 3.2262, + "step": 5616 + }, + { + "epoch": 1.63, + "learning_rate": 8.948431232067156e-05, + "loss": 3.1494, + "step": 5617 + }, + { + "epoch": 1.63, + "learning_rate": 8.93504356096384e-05, + "loss": 3.2401, + "step": 5618 + }, + { + "epoch": 1.63, + "learning_rate": 8.921664929273204e-05, + "loss": 3.0623, + "step": 5619 + }, + { + "epoch": 1.63, + "learning_rate": 8.908295339940231e-05, + "loss": 3.1681, + "step": 5620 + }, + { + "epoch": 1.63, + "learning_rate": 8.894934795907872e-05, + "loss": 3.1344, + "step": 5621 + }, + { + "epoch": 1.63, + "learning_rate": 8.881583300117136e-05, + "loss": 3.1869, + "step": 5622 + }, + { + "epoch": 1.63, + "learning_rate": 8.868240855506993e-05, + "loss": 3.0666, + "step": 5623 + }, + { + "epoch": 1.63, + "learning_rate": 8.854907465014478e-05, + "loss": 3.1513, + "step": 5624 + }, + { + "epoch": 1.63, + "learning_rate": 8.841583131574576e-05, + "loss": 3.2698, + "step": 5625 + }, + { + "epoch": 1.63, + "learning_rate": 8.828267858120293e-05, + "loss": 3.1768, + "step": 5626 + }, + { + "epoch": 1.63, + "learning_rate": 8.81496164758267e-05, + "loss": 2.9771, + "step": 5627 + }, + { + "epoch": 1.63, + "learning_rate": 8.801664502890722e-05, + "loss": 3.2075, + "step": 5628 + }, + { + "epoch": 1.63, + "learning_rate": 8.7883764269715e-05, + "loss": 3.0786, + "step": 5629 + }, + { + "epoch": 1.63, + "learning_rate": 8.775097422750011e-05, + "loss": 3.1317, + "step": 5630 + }, + { + "epoch": 1.63, + "learning_rate": 8.761827493149321e-05, + "loss": 3.1709, + "step": 5631 + }, + { + "epoch": 1.63, + "learning_rate": 8.748566641090433e-05, + "loss": 3.0478, + "step": 5632 + }, + { + "epoch": 1.63, + "learning_rate": 8.735314869492428e-05, + "loss": 3.2399, + "step": 5633 + }, + { + "epoch": 1.63, + "learning_rate": 8.722072181272311e-05, + "loss": 3.0607, + "step": 5634 + }, + { + "epoch": 1.63, + "learning_rate": 8.708838579345147e-05, + "loss": 3.0165, + "step": 5635 + }, + { + "epoch": 1.63, + "learning_rate": 8.695614066623991e-05, + "loss": 3.1985, + "step": 5636 + }, + { + "epoch": 1.63, + "learning_rate": 8.682398646019857e-05, + "loss": 3.1545, + "step": 5637 + }, + { + "epoch": 1.63, + "learning_rate": 8.66919232044181e-05, + "loss": 3.0805, + "step": 5638 + }, + { + "epoch": 1.63, + "learning_rate": 8.655995092796865e-05, + "loss": 3.1113, + "step": 5639 + }, + { + "epoch": 1.63, + "learning_rate": 8.642806965990079e-05, + "loss": 3.0384, + "step": 5640 + }, + { + "epoch": 1.63, + "learning_rate": 8.629627942924473e-05, + "loss": 3.0908, + "step": 5641 + }, + { + "epoch": 1.63, + "learning_rate": 8.61645802650109e-05, + "loss": 3.1961, + "step": 5642 + }, + { + "epoch": 1.63, + "learning_rate": 8.603297219618934e-05, + "loss": 3.1144, + "step": 5643 + }, + { + "epoch": 1.63, + "learning_rate": 8.590145525175031e-05, + "loss": 3.1511, + "step": 5644 + }, + { + "epoch": 1.63, + "learning_rate": 8.577002946064416e-05, + "loss": 3.1327, + "step": 5645 + }, + { + "epoch": 1.64, + "learning_rate": 8.563869485180059e-05, + "loss": 3.0826, + "step": 5646 + }, + { + "epoch": 1.64, + "learning_rate": 8.550745145412997e-05, + "loss": 3.2003, + "step": 5647 + }, + { + "epoch": 1.64, + "learning_rate": 8.537629929652186e-05, + "loss": 3.1246, + "step": 5648 + }, + { + "epoch": 1.64, + "learning_rate": 8.524523840784643e-05, + "loss": 3.1919, + "step": 5649 + }, + { + "epoch": 1.64, + "learning_rate": 8.511426881695311e-05, + "loss": 3.2014, + "step": 5650 + }, + { + "epoch": 1.64, + "learning_rate": 8.49833905526718e-05, + "loss": 3.2336, + "step": 5651 + }, + { + "epoch": 1.64, + "learning_rate": 8.485260364381186e-05, + "loss": 3.1385, + "step": 5652 + }, + { + "epoch": 1.64, + "learning_rate": 8.472190811916274e-05, + "loss": 3.1219, + "step": 5653 + }, + { + "epoch": 1.64, + "learning_rate": 8.459130400749404e-05, + "loss": 3.0361, + "step": 5654 + }, + { + "epoch": 1.64, + "learning_rate": 8.446079133755458e-05, + "loss": 3.1297, + "step": 5655 + }, + { + "epoch": 1.64, + "learning_rate": 8.433037013807365e-05, + "loss": 3.0088, + "step": 5656 + }, + { + "epoch": 1.64, + "learning_rate": 8.420004043776003e-05, + "loss": 3.069, + "step": 5657 + }, + { + "epoch": 1.64, + "learning_rate": 8.406980226530276e-05, + "loss": 3.0421, + "step": 5658 + }, + { + "epoch": 1.64, + "learning_rate": 8.393965564937017e-05, + "loss": 3.129, + "step": 5659 + }, + { + "epoch": 1.64, + "learning_rate": 8.380960061861098e-05, + "loss": 3.079, + "step": 5660 + }, + { + "epoch": 1.64, + "learning_rate": 8.367963720165334e-05, + "loss": 3.3014, + "step": 5661 + }, + { + "epoch": 1.64, + "learning_rate": 8.354976542710558e-05, + "loss": 3.1775, + "step": 5662 + }, + { + "epoch": 1.64, + "learning_rate": 8.341998532355565e-05, + "loss": 3.191, + "step": 5663 + }, + { + "epoch": 1.64, + "learning_rate": 8.329029691957124e-05, + "loss": 3.0846, + "step": 5664 + }, + { + "epoch": 1.64, + "learning_rate": 8.31607002437002e-05, + "loss": 3.0621, + "step": 5665 + }, + { + "epoch": 1.64, + "learning_rate": 8.303119532446973e-05, + "loss": 3.1856, + "step": 5666 + }, + { + "epoch": 1.64, + "learning_rate": 8.290178219038725e-05, + "loss": 3.0607, + "step": 5667 + }, + { + "epoch": 1.64, + "learning_rate": 8.277246086993961e-05, + "loss": 3.2026, + "step": 5668 + }, + { + "epoch": 1.64, + "learning_rate": 8.264323139159385e-05, + "loss": 3.2192, + "step": 5669 + }, + { + "epoch": 1.64, + "learning_rate": 8.251409378379638e-05, + "loss": 3.1192, + "step": 5670 + }, + { + "epoch": 1.64, + "learning_rate": 8.23850480749736e-05, + "loss": 3.0606, + "step": 5671 + }, + { + "epoch": 1.64, + "learning_rate": 8.225609429353187e-05, + "loss": 3.207, + "step": 5672 + }, + { + "epoch": 1.64, + "learning_rate": 8.212723246785681e-05, + "loss": 3.1878, + "step": 5673 + }, + { + "epoch": 1.64, + "learning_rate": 8.199846262631438e-05, + "loss": 3.1628, + "step": 5674 + }, + { + "epoch": 1.64, + "learning_rate": 8.186978479724971e-05, + "loss": 3.042, + "step": 5675 + }, + { + "epoch": 1.64, + "learning_rate": 8.17411990089883e-05, + "loss": 3.0535, + "step": 5676 + }, + { + "epoch": 1.64, + "learning_rate": 8.161270528983477e-05, + "loss": 3.1311, + "step": 5677 + }, + { + "epoch": 1.64, + "learning_rate": 8.148430366807403e-05, + "loss": 3.1216, + "step": 5678 + }, + { + "epoch": 1.64, + "learning_rate": 8.135599417197021e-05, + "loss": 3.1101, + "step": 5679 + }, + { + "epoch": 1.64, + "learning_rate": 8.122777682976745e-05, + "loss": 3.1245, + "step": 5680 + }, + { + "epoch": 1.65, + "learning_rate": 8.109965166968975e-05, + "loss": 3.1276, + "step": 5681 + }, + { + "epoch": 1.65, + "learning_rate": 8.097161871994042e-05, + "loss": 3.1533, + "step": 5682 + }, + { + "epoch": 1.65, + "learning_rate": 8.084367800870285e-05, + "loss": 3.1673, + "step": 5683 + }, + { + "epoch": 1.65, + "learning_rate": 8.07158295641397e-05, + "loss": 3.1783, + "step": 5684 + }, + { + "epoch": 1.65, + "learning_rate": 8.058807341439389e-05, + "loss": 3.1357, + "step": 5685 + }, + { + "epoch": 1.65, + "learning_rate": 8.046040958758744e-05, + "loss": 3.2349, + "step": 5686 + }, + { + "epoch": 1.65, + "learning_rate": 8.033283811182246e-05, + "loss": 3.1471, + "step": 5687 + }, + { + "epoch": 1.65, + "learning_rate": 8.02053590151805e-05, + "loss": 3.1883, + "step": 5688 + }, + { + "epoch": 1.65, + "learning_rate": 8.007797232572306e-05, + "loss": 3.0984, + "step": 5689 + }, + { + "epoch": 1.65, + "learning_rate": 7.995067807149093e-05, + "loss": 3.0762, + "step": 5690 + }, + { + "epoch": 1.65, + "learning_rate": 7.982347628050468e-05, + "loss": 3.0534, + "step": 5691 + }, + { + "epoch": 1.65, + "learning_rate": 7.969636698076476e-05, + "loss": 3.1992, + "step": 5692 + }, + { + "epoch": 1.65, + "learning_rate": 7.956935020025081e-05, + "loss": 3.1692, + "step": 5693 + }, + { + "epoch": 1.65, + "learning_rate": 7.944242596692264e-05, + "loss": 3.2134, + "step": 5694 + }, + { + "epoch": 1.65, + "learning_rate": 7.93155943087192e-05, + "loss": 3.2619, + "step": 5695 + }, + { + "epoch": 1.65, + "learning_rate": 7.918885525355934e-05, + "loss": 3.098, + "step": 5696 + }, + { + "epoch": 1.65, + "learning_rate": 7.906220882934146e-05, + "loss": 3.0488, + "step": 5697 + }, + { + "epoch": 1.65, + "learning_rate": 7.893565506394374e-05, + "loss": 3.2197, + "step": 5698 + }, + { + "epoch": 1.65, + "learning_rate": 7.880919398522362e-05, + "loss": 3.0536, + "step": 5699 + }, + { + "epoch": 1.65, + "learning_rate": 7.86828256210182e-05, + "loss": 3.1299, + "step": 5700 + }, + { + "epoch": 1.65, + "learning_rate": 7.855654999914457e-05, + "loss": 2.9909, + "step": 5701 + }, + { + "epoch": 1.65, + "learning_rate": 7.843036714739887e-05, + "loss": 3.1417, + "step": 5702 + }, + { + "epoch": 1.65, + "learning_rate": 7.830427709355725e-05, + "loss": 3.1359, + "step": 5703 + }, + { + "epoch": 1.65, + "learning_rate": 7.817827986537507e-05, + "loss": 3.1367, + "step": 5704 + }, + { + "epoch": 1.65, + "learning_rate": 7.805237549058752e-05, + "loss": 3.122, + "step": 5705 + }, + { + "epoch": 1.65, + "learning_rate": 7.792656399690923e-05, + "loss": 3.246, + "step": 5706 + }, + { + "epoch": 1.65, + "learning_rate": 7.780084541203464e-05, + "loss": 3.2403, + "step": 5707 + }, + { + "epoch": 1.65, + "learning_rate": 7.767521976363735e-05, + "loss": 3.1602, + "step": 5708 + }, + { + "epoch": 1.65, + "learning_rate": 7.754968707937054e-05, + "loss": 3.2944, + "step": 5709 + }, + { + "epoch": 1.65, + "learning_rate": 7.742424738686732e-05, + "loss": 3.204, + "step": 5710 + }, + { + "epoch": 1.65, + "learning_rate": 7.729890071373984e-05, + "loss": 3.1059, + "step": 5711 + }, + { + "epoch": 1.65, + "learning_rate": 7.717364708758023e-05, + "loss": 3.0879, + "step": 5712 + }, + { + "epoch": 1.65, + "learning_rate": 7.704848653595964e-05, + "loss": 3.0976, + "step": 5713 + }, + { + "epoch": 1.65, + "learning_rate": 7.692341908642914e-05, + "loss": 3.0776, + "step": 5714 + }, + { + "epoch": 1.66, + "learning_rate": 7.67984447665192e-05, + "loss": 3.0764, + "step": 5715 + }, + { + "epoch": 1.66, + "learning_rate": 7.667356360373984e-05, + "loss": 3.1327, + "step": 5716 + }, + { + "epoch": 1.66, + "learning_rate": 7.654877562558038e-05, + "loss": 3.1778, + "step": 5717 + }, + { + "epoch": 1.66, + "learning_rate": 7.642408085950964e-05, + "loss": 3.342, + "step": 5718 + }, + { + "epoch": 1.66, + "learning_rate": 7.629947933297626e-05, + "loss": 3.0878, + "step": 5719 + }, + { + "epoch": 1.66, + "learning_rate": 7.617497107340788e-05, + "loss": 3.0365, + "step": 5720 + }, + { + "epoch": 1.66, + "learning_rate": 7.60505561082121e-05, + "loss": 3.1462, + "step": 5721 + }, + { + "epoch": 1.66, + "learning_rate": 7.592623446477543e-05, + "loss": 3.2017, + "step": 5722 + }, + { + "epoch": 1.66, + "learning_rate": 7.580200617046434e-05, + "loss": 3.0974, + "step": 5723 + }, + { + "epoch": 1.66, + "learning_rate": 7.567787125262449e-05, + "loss": 3.0495, + "step": 5724 + }, + { + "epoch": 1.66, + "learning_rate": 7.555382973858116e-05, + "loss": 3.0326, + "step": 5725 + }, + { + "epoch": 1.66, + "learning_rate": 7.542988165563892e-05, + "loss": 3.174, + "step": 5726 + }, + { + "epoch": 1.66, + "learning_rate": 7.530602703108158e-05, + "loss": 3.2053, + "step": 5727 + }, + { + "epoch": 1.66, + "learning_rate": 7.518226589217286e-05, + "loss": 3.0889, + "step": 5728 + }, + { + "epoch": 1.66, + "learning_rate": 7.505859826615551e-05, + "loss": 3.1619, + "step": 5729 + }, + { + "epoch": 1.66, + "learning_rate": 7.4935024180252e-05, + "loss": 3.1477, + "step": 5730 + }, + { + "epoch": 1.66, + "learning_rate": 7.481154366166382e-05, + "loss": 2.9796, + "step": 5731 + }, + { + "epoch": 1.66, + "learning_rate": 7.468815673757218e-05, + "loss": 3.2387, + "step": 5732 + }, + { + "epoch": 1.66, + "learning_rate": 7.456486343513764e-05, + "loss": 3.0035, + "step": 5733 + }, + { + "epoch": 1.66, + "learning_rate": 7.444166378150013e-05, + "loss": 3.064, + "step": 5734 + }, + { + "epoch": 1.66, + "learning_rate": 7.431855780377894e-05, + "loss": 3.0552, + "step": 5735 + }, + { + "epoch": 1.66, + "learning_rate": 7.419554552907259e-05, + "loss": 3.1756, + "step": 5736 + }, + { + "epoch": 1.66, + "learning_rate": 7.407262698445932e-05, + "loss": 3.1005, + "step": 5737 + }, + { + "epoch": 1.66, + "learning_rate": 7.394980219699632e-05, + "loss": 3.1331, + "step": 5738 + }, + { + "epoch": 1.66, + "learning_rate": 7.382707119372051e-05, + "loss": 3.0275, + "step": 5739 + }, + { + "epoch": 1.66, + "learning_rate": 7.370443400164794e-05, + "loss": 3.1569, + "step": 5740 + }, + { + "epoch": 1.66, + "learning_rate": 7.358189064777432e-05, + "loss": 3.1197, + "step": 5741 + }, + { + "epoch": 1.66, + "learning_rate": 7.345944115907421e-05, + "loss": 3.0455, + "step": 5742 + }, + { + "epoch": 1.66, + "learning_rate": 7.333708556250195e-05, + "loss": 3.1007, + "step": 5743 + }, + { + "epoch": 1.66, + "learning_rate": 7.321482388499096e-05, + "loss": 3.1533, + "step": 5744 + }, + { + "epoch": 1.66, + "learning_rate": 7.30926561534539e-05, + "loss": 3.1551, + "step": 5745 + }, + { + "epoch": 1.66, + "learning_rate": 7.297058239478316e-05, + "loss": 3.1592, + "step": 5746 + }, + { + "epoch": 1.66, + "learning_rate": 7.284860263585003e-05, + "loss": 3.2132, + "step": 5747 + }, + { + "epoch": 1.66, + "learning_rate": 7.272671690350529e-05, + "loss": 3.2199, + "step": 5748 + }, + { + "epoch": 1.66, + "learning_rate": 7.260492522457906e-05, + "loss": 3.2039, + "step": 5749 + }, + { + "epoch": 1.67, + "learning_rate": 7.248322762588084e-05, + "loss": 3.1203, + "step": 5750 + }, + { + "epoch": 1.67, + "learning_rate": 7.236162413419895e-05, + "loss": 3.0179, + "step": 5751 + }, + { + "epoch": 1.67, + "learning_rate": 7.224011477630166e-05, + "loss": 3.0848, + "step": 5752 + }, + { + "epoch": 1.67, + "learning_rate": 7.211869957893591e-05, + "loss": 3.0705, + "step": 5753 + }, + { + "epoch": 1.67, + "learning_rate": 7.199737856882843e-05, + "loss": 3.0928, + "step": 5754 + }, + { + "epoch": 1.67, + "learning_rate": 7.187615177268486e-05, + "loss": 3.1805, + "step": 5755 + }, + { + "epoch": 1.67, + "learning_rate": 7.175501921719007e-05, + "loss": 3.1842, + "step": 5756 + }, + { + "epoch": 1.67, + "learning_rate": 7.163398092900852e-05, + "loss": 3.2316, + "step": 5757 + }, + { + "epoch": 1.67, + "learning_rate": 7.151303693478362e-05, + "loss": 3.0629, + "step": 5758 + }, + { + "epoch": 1.67, + "learning_rate": 7.139218726113838e-05, + "loss": 3.2306, + "step": 5759 + }, + { + "epoch": 1.67, + "learning_rate": 7.127143193467444e-05, + "loss": 3.1869, + "step": 5760 + }, + { + "epoch": 1.67, + "learning_rate": 7.115077098197337e-05, + "loss": 3.2043, + "step": 5761 + }, + { + "epoch": 1.67, + "learning_rate": 7.10302044295954e-05, + "loss": 3.1551, + "step": 5762 + }, + { + "epoch": 1.67, + "learning_rate": 7.090973230408033e-05, + "loss": 3.1294, + "step": 5763 + }, + { + "epoch": 1.67, + "learning_rate": 7.078935463194707e-05, + "loss": 3.1149, + "step": 5764 + }, + { + "epoch": 1.67, + "learning_rate": 7.066907143969353e-05, + "loss": 3.2406, + "step": 5765 + }, + { + "epoch": 1.67, + "learning_rate": 7.054888275379712e-05, + "loss": 3.0397, + "step": 5766 + }, + { + "epoch": 1.67, + "learning_rate": 7.042878860071439e-05, + "loss": 3.2545, + "step": 5767 + }, + { + "epoch": 1.67, + "learning_rate": 7.030878900688114e-05, + "loss": 3.0968, + "step": 5768 + }, + { + "epoch": 1.67, + "learning_rate": 7.018888399871204e-05, + "loss": 3.0862, + "step": 5769 + }, + { + "epoch": 1.67, + "learning_rate": 7.006907360260129e-05, + "loss": 3.0908, + "step": 5770 + }, + { + "epoch": 1.67, + "learning_rate": 6.9949357844922e-05, + "loss": 3.0943, + "step": 5771 + }, + { + "epoch": 1.67, + "learning_rate": 6.982973675202675e-05, + "loss": 3.109, + "step": 5772 + }, + { + "epoch": 1.67, + "learning_rate": 6.971021035024694e-05, + "loss": 3.1406, + "step": 5773 + }, + { + "epoch": 1.67, + "learning_rate": 6.959077866589326e-05, + "loss": 3.1201, + "step": 5774 + }, + { + "epoch": 1.67, + "learning_rate": 6.947144172525566e-05, + "loss": 3.1079, + "step": 5775 + }, + { + "epoch": 1.67, + "learning_rate": 6.935219955460309e-05, + "loss": 3.0473, + "step": 5776 + }, + { + "epoch": 1.67, + "learning_rate": 6.923305218018394e-05, + "loss": 3.0841, + "step": 5777 + }, + { + "epoch": 1.67, + "learning_rate": 6.911399962822518e-05, + "loss": 3.1898, + "step": 5778 + }, + { + "epoch": 1.67, + "learning_rate": 6.899504192493344e-05, + "loss": 3.1737, + "step": 5779 + }, + { + "epoch": 1.67, + "learning_rate": 6.887617909649407e-05, + "loss": 3.0332, + "step": 5780 + }, + { + "epoch": 1.67, + "learning_rate": 6.875741116907191e-05, + "loss": 3.0799, + "step": 5781 + }, + { + "epoch": 1.67, + "learning_rate": 6.863873816881061e-05, + "loss": 3.2181, + "step": 5782 + }, + { + "epoch": 1.67, + "learning_rate": 6.852016012183299e-05, + "loss": 3.1488, + "step": 5783 + }, + { + "epoch": 1.68, + "learning_rate": 6.840167705424105e-05, + "loss": 3.129, + "step": 5784 + }, + { + "epoch": 1.68, + "learning_rate": 6.828328899211584e-05, + "loss": 3.2644, + "step": 5785 + }, + { + "epoch": 1.68, + "learning_rate": 6.81649959615176e-05, + "loss": 3.1889, + "step": 5786 + }, + { + "epoch": 1.68, + "learning_rate": 6.80467979884854e-05, + "loss": 3.1024, + "step": 5787 + }, + { + "epoch": 1.68, + "learning_rate": 6.792869509903777e-05, + "loss": 3.2234, + "step": 5788 + }, + { + "epoch": 1.68, + "learning_rate": 6.781068731917173e-05, + "loss": 3.0658, + "step": 5789 + }, + { + "epoch": 1.68, + "learning_rate": 6.769277467486407e-05, + "loss": 3.2215, + "step": 5790 + }, + { + "epoch": 1.68, + "learning_rate": 6.757495719206996e-05, + "loss": 3.1552, + "step": 5791 + }, + { + "epoch": 1.68, + "learning_rate": 6.745723489672412e-05, + "loss": 3.0855, + "step": 5792 + }, + { + "epoch": 1.68, + "learning_rate": 6.733960781474019e-05, + "loss": 3.1169, + "step": 5793 + }, + { + "epoch": 1.68, + "learning_rate": 6.722207597201064e-05, + "loss": 3.0435, + "step": 5794 + }, + { + "epoch": 1.68, + "learning_rate": 6.71046393944073e-05, + "loss": 3.286, + "step": 5795 + }, + { + "epoch": 1.68, + "learning_rate": 6.698729810778065e-05, + "loss": 3.081, + "step": 5796 + }, + { + "epoch": 1.68, + "learning_rate": 6.687005213796071e-05, + "loss": 3.1921, + "step": 5797 + }, + { + "epoch": 1.68, + "learning_rate": 6.675290151075591e-05, + "loss": 3.1434, + "step": 5798 + }, + { + "epoch": 1.68, + "learning_rate": 6.663584625195424e-05, + "loss": 3.104, + "step": 5799 + }, + { + "epoch": 1.68, + "learning_rate": 6.651888638732228e-05, + "loss": 3.2131, + "step": 5800 + }, + { + "epoch": 1.68, + "learning_rate": 6.640202194260586e-05, + "loss": 3.0813, + "step": 5801 + }, + { + "epoch": 1.68, + "learning_rate": 6.628525294352988e-05, + "loss": 3.1544, + "step": 5802 + }, + { + "epoch": 1.68, + "learning_rate": 6.616857941579779e-05, + "loss": 3.1577, + "step": 5803 + }, + { + "epoch": 1.68, + "learning_rate": 6.60520013850926e-05, + "loss": 3.0804, + "step": 5804 + }, + { + "epoch": 1.68, + "learning_rate": 6.593551887707577e-05, + "loss": 3.123, + "step": 5805 + }, + { + "epoch": 1.68, + "learning_rate": 6.581913191738826e-05, + "loss": 3.1361, + "step": 5806 + }, + { + "epoch": 1.68, + "learning_rate": 6.570284053164944e-05, + "loss": 3.2301, + "step": 5807 + }, + { + "epoch": 1.68, + "learning_rate": 6.558664474545817e-05, + "loss": 3.0878, + "step": 5808 + }, + { + "epoch": 1.68, + "learning_rate": 6.547054458439178e-05, + "loss": 3.0973, + "step": 5809 + }, + { + "epoch": 1.68, + "learning_rate": 6.535454007400688e-05, + "loss": 3.0193, + "step": 5810 + }, + { + "epoch": 1.68, + "learning_rate": 6.523863123983909e-05, + "loss": 3.0989, + "step": 5811 + }, + { + "epoch": 1.68, + "learning_rate": 6.51228181074025e-05, + "loss": 3.1453, + "step": 5812 + }, + { + "epoch": 1.68, + "learning_rate": 6.500710070219079e-05, + "loss": 3.1774, + "step": 5813 + }, + { + "epoch": 1.68, + "learning_rate": 6.48914790496759e-05, + "loss": 3.0116, + "step": 5814 + }, + { + "epoch": 1.68, + "learning_rate": 6.477595317530933e-05, + "loss": 3.2688, + "step": 5815 + }, + { + "epoch": 1.68, + "learning_rate": 6.466052310452092e-05, + "loss": 3.0009, + "step": 5816 + }, + { + "epoch": 1.68, + "learning_rate": 6.454518886271981e-05, + "loss": 3.2751, + "step": 5817 + }, + { + "epoch": 1.68, + "learning_rate": 6.442995047529388e-05, + "loss": 3.1206, + "step": 5818 + }, + { + "epoch": 1.69, + "learning_rate": 6.431480796760991e-05, + "loss": 3.1342, + "step": 5819 + }, + { + "epoch": 1.69, + "learning_rate": 6.419976136501376e-05, + "loss": 3.0335, + "step": 5820 + }, + { + "epoch": 1.69, + "learning_rate": 6.40848106928299e-05, + "loss": 3.0244, + "step": 5821 + }, + { + "epoch": 1.69, + "learning_rate": 6.39699559763619e-05, + "loss": 2.9725, + "step": 5822 + }, + { + "epoch": 1.69, + "learning_rate": 6.3855197240892e-05, + "loss": 3.1518, + "step": 5823 + }, + { + "epoch": 1.69, + "learning_rate": 6.374053451168166e-05, + "loss": 3.0842, + "step": 5824 + }, + { + "epoch": 1.69, + "learning_rate": 6.362596781397068e-05, + "loss": 3.0639, + "step": 5825 + }, + { + "epoch": 1.69, + "learning_rate": 6.351149717297833e-05, + "loss": 3.0486, + "step": 5826 + }, + { + "epoch": 1.69, + "learning_rate": 6.339712261390212e-05, + "loss": 3.0668, + "step": 5827 + }, + { + "epoch": 1.69, + "learning_rate": 6.328284416191892e-05, + "loss": 3.1715, + "step": 5828 + }, + { + "epoch": 1.69, + "learning_rate": 6.316866184218434e-05, + "loss": 3.2332, + "step": 5829 + }, + { + "epoch": 1.69, + "learning_rate": 6.305457567983247e-05, + "loss": 3.2108, + "step": 5830 + }, + { + "epoch": 1.69, + "learning_rate": 6.294058569997674e-05, + "loss": 3.1008, + "step": 5831 + }, + { + "epoch": 1.69, + "learning_rate": 6.282669192770896e-05, + "loss": 3.0863, + "step": 5832 + }, + { + "epoch": 1.69, + "learning_rate": 6.271289438810013e-05, + "loss": 3.0901, + "step": 5833 + }, + { + "epoch": 1.69, + "learning_rate": 6.259919310619977e-05, + "loss": 3.1385, + "step": 5834 + }, + { + "epoch": 1.69, + "learning_rate": 6.248558810703647e-05, + "loss": 3.1787, + "step": 5835 + }, + { + "epoch": 1.69, + "learning_rate": 6.237207941561734e-05, + "loss": 3.0745, + "step": 5836 + }, + { + "epoch": 1.69, + "learning_rate": 6.225866705692856e-05, + "loss": 3.3117, + "step": 5837 + }, + { + "epoch": 1.69, + "learning_rate": 6.214535105593505e-05, + "loss": 3.0915, + "step": 5838 + }, + { + "epoch": 1.69, + "learning_rate": 6.203213143758035e-05, + "loss": 3.0393, + "step": 5839 + }, + { + "epoch": 1.69, + "learning_rate": 6.191900822678698e-05, + "loss": 3.1505, + "step": 5840 + }, + { + "epoch": 1.69, + "learning_rate": 6.180598144845606e-05, + "loss": 3.1008, + "step": 5841 + }, + { + "epoch": 1.69, + "learning_rate": 6.169305112746776e-05, + "loss": 3.1517, + "step": 5842 + }, + { + "epoch": 1.69, + "learning_rate": 6.158021728868062e-05, + "loss": 3.0368, + "step": 5843 + }, + { + "epoch": 1.69, + "learning_rate": 6.146747995693224e-05, + "loss": 3.1443, + "step": 5844 + }, + { + "epoch": 1.69, + "learning_rate": 6.135483915703888e-05, + "loss": 3.1922, + "step": 5845 + }, + { + "epoch": 1.69, + "learning_rate": 6.124229491379574e-05, + "loss": 3.1315, + "step": 5846 + }, + { + "epoch": 1.69, + "learning_rate": 6.112984725197645e-05, + "loss": 3.0991, + "step": 5847 + }, + { + "epoch": 1.69, + "learning_rate": 6.1017496196333454e-05, + "loss": 3.1582, + "step": 5848 + }, + { + "epoch": 1.69, + "learning_rate": 6.090524177159812e-05, + "loss": 3.0246, + "step": 5849 + }, + { + "epoch": 1.69, + "learning_rate": 6.079308400248029e-05, + "loss": 3.1043, + "step": 5850 + }, + { + "epoch": 1.69, + "learning_rate": 6.068102291366884e-05, + "loss": 3.1463, + "step": 5851 + }, + { + "epoch": 1.69, + "learning_rate": 6.056905852983102e-05, + "loss": 3.1716, + "step": 5852 + }, + { + "epoch": 1.7, + "learning_rate": 6.0457190875612964e-05, + "loss": 3.0289, + "step": 5853 + }, + { + "epoch": 1.7, + "learning_rate": 6.034541997563964e-05, + "loss": 3.1747, + "step": 5854 + }, + { + "epoch": 1.7, + "learning_rate": 6.0233745854514636e-05, + "loss": 3.1044, + "step": 5855 + }, + { + "epoch": 1.7, + "learning_rate": 6.012216853682001e-05, + "loss": 3.0298, + "step": 5856 + }, + { + "epoch": 1.7, + "learning_rate": 6.0010688047116735e-05, + "loss": 3.2529, + "step": 5857 + }, + { + "epoch": 1.7, + "learning_rate": 5.989930440994451e-05, + "loss": 3.1578, + "step": 5858 + }, + { + "epoch": 1.7, + "learning_rate": 5.978801764982145e-05, + "loss": 3.126, + "step": 5859 + }, + { + "epoch": 1.7, + "learning_rate": 5.967682779124478e-05, + "loss": 3.2123, + "step": 5860 + }, + { + "epoch": 1.7, + "learning_rate": 5.956573485868988e-05, + "loss": 3.1169, + "step": 5861 + }, + { + "epoch": 1.7, + "learning_rate": 5.945473887661118e-05, + "loss": 3.1828, + "step": 5862 + }, + { + "epoch": 1.7, + "learning_rate": 5.934383986944158e-05, + "loss": 3.1016, + "step": 5863 + }, + { + "epoch": 1.7, + "learning_rate": 5.9233037861592896e-05, + "loss": 3.104, + "step": 5864 + }, + { + "epoch": 1.7, + "learning_rate": 5.9122332877455276e-05, + "loss": 3.1052, + "step": 5865 + }, + { + "epoch": 1.7, + "learning_rate": 5.901172494139739e-05, + "loss": 3.179, + "step": 5866 + }, + { + "epoch": 1.7, + "learning_rate": 5.890121407776716e-05, + "loss": 3.2006, + "step": 5867 + }, + { + "epoch": 1.7, + "learning_rate": 5.879080031089046e-05, + "loss": 3.1153, + "step": 5868 + }, + { + "epoch": 1.7, + "learning_rate": 5.8680483665072335e-05, + "loss": 3.2712, + "step": 5869 + }, + { + "epoch": 1.7, + "learning_rate": 5.857026416459593e-05, + "loss": 3.1874, + "step": 5870 + }, + { + "epoch": 1.7, + "learning_rate": 5.84601418337235e-05, + "loss": 3.1764, + "step": 5871 + }, + { + "epoch": 1.7, + "learning_rate": 5.8350116696695596e-05, + "loss": 3.0982, + "step": 5872 + }, + { + "epoch": 1.7, + "learning_rate": 5.824018877773163e-05, + "loss": 3.0777, + "step": 5873 + }, + { + "epoch": 1.7, + "learning_rate": 5.813035810102935e-05, + "loss": 3.2982, + "step": 5874 + }, + { + "epoch": 1.7, + "learning_rate": 5.8020624690765076e-05, + "loss": 3.0828, + "step": 5875 + }, + { + "epoch": 1.7, + "learning_rate": 5.7910988571094e-05, + "loss": 3.1596, + "step": 5876 + }, + { + "epoch": 1.7, + "learning_rate": 5.78014497661497e-05, + "loss": 3.2105, + "step": 5877 + }, + { + "epoch": 1.7, + "learning_rate": 5.76920083000444e-05, + "loss": 3.2571, + "step": 5878 + }, + { + "epoch": 1.7, + "learning_rate": 5.758266419686881e-05, + "loss": 3.2297, + "step": 5879 + }, + { + "epoch": 1.7, + "learning_rate": 5.747341748069229e-05, + "loss": 3.1762, + "step": 5880 + }, + { + "epoch": 1.7, + "learning_rate": 5.7364268175562786e-05, + "loss": 2.9494, + "step": 5881 + }, + { + "epoch": 1.7, + "learning_rate": 5.725521630550684e-05, + "loss": 3.1783, + "step": 5882 + }, + { + "epoch": 1.7, + "learning_rate": 5.714626189452937e-05, + "loss": 3.1142, + "step": 5883 + }, + { + "epoch": 1.7, + "learning_rate": 5.70374049666138e-05, + "loss": 3.1604, + "step": 5884 + }, + { + "epoch": 1.7, + "learning_rate": 5.6928645545722524e-05, + "loss": 3.3185, + "step": 5885 + }, + { + "epoch": 1.7, + "learning_rate": 5.6819983655795935e-05, + "loss": 3.0474, + "step": 5886 + }, + { + "epoch": 1.7, + "learning_rate": 5.671141932075335e-05, + "loss": 3.0614, + "step": 5887 + }, + { + "epoch": 1.71, + "learning_rate": 5.660295256449233e-05, + "loss": 3.2309, + "step": 5888 + }, + { + "epoch": 1.71, + "learning_rate": 5.649458341088914e-05, + "loss": 3.1124, + "step": 5889 + }, + { + "epoch": 1.71, + "learning_rate": 5.638631188379856e-05, + "loss": 3.12, + "step": 5890 + }, + { + "epoch": 1.71, + "learning_rate": 5.6278138007053944e-05, + "loss": 3.0623, + "step": 5891 + }, + { + "epoch": 1.71, + "learning_rate": 5.617006180446688e-05, + "loss": 3.1212, + "step": 5892 + }, + { + "epoch": 1.71, + "learning_rate": 5.606208329982754e-05, + "loss": 3.1292, + "step": 5893 + }, + { + "epoch": 1.71, + "learning_rate": 5.5954202516904864e-05, + "loss": 3.026, + "step": 5894 + }, + { + "epoch": 1.71, + "learning_rate": 5.584641947944591e-05, + "loss": 3.1871, + "step": 5895 + }, + { + "epoch": 1.71, + "learning_rate": 5.573873421117642e-05, + "loss": 3.0561, + "step": 5896 + }, + { + "epoch": 1.71, + "learning_rate": 5.563114673580061e-05, + "loss": 3.0537, + "step": 5897 + }, + { + "epoch": 1.71, + "learning_rate": 5.55236570770013e-05, + "loss": 3.1289, + "step": 5898 + }, + { + "epoch": 1.71, + "learning_rate": 5.54162652584394e-05, + "loss": 3.1259, + "step": 5899 + }, + { + "epoch": 1.71, + "learning_rate": 5.530897130375467e-05, + "loss": 3.1487, + "step": 5900 + }, + { + "epoch": 1.71, + "learning_rate": 5.5201775236565056e-05, + "loss": 3.0727, + "step": 5901 + }, + { + "epoch": 1.71, + "learning_rate": 5.509467708046706e-05, + "loss": 2.9689, + "step": 5902 + }, + { + "epoch": 1.71, + "learning_rate": 5.498767685903572e-05, + "loss": 3.2156, + "step": 5903 + }, + { + "epoch": 1.71, + "learning_rate": 5.4880774595824246e-05, + "loss": 3.1497, + "step": 5904 + }, + { + "epoch": 1.71, + "learning_rate": 5.477397031436465e-05, + "loss": 3.1182, + "step": 5905 + }, + { + "epoch": 1.71, + "learning_rate": 5.466726403816713e-05, + "loss": 3.2049, + "step": 5906 + }, + { + "epoch": 1.71, + "learning_rate": 5.45606557907205e-05, + "loss": 3.1578, + "step": 5907 + }, + { + "epoch": 1.71, + "learning_rate": 5.445414559549167e-05, + "loss": 3.0453, + "step": 5908 + }, + { + "epoch": 1.71, + "learning_rate": 5.4347733475926365e-05, + "loss": 3.1014, + "step": 5909 + }, + { + "epoch": 1.71, + "learning_rate": 5.4241419455448414e-05, + "loss": 3.1318, + "step": 5910 + }, + { + "epoch": 1.71, + "learning_rate": 5.41352035574601e-05, + "loss": 3.1573, + "step": 5911 + }, + { + "epoch": 1.71, + "learning_rate": 5.4029085805342324e-05, + "loss": 3.1595, + "step": 5912 + }, + { + "epoch": 1.71, + "learning_rate": 5.392306622245407e-05, + "loss": 3.114, + "step": 5913 + }, + { + "epoch": 1.71, + "learning_rate": 5.38171448321329e-05, + "loss": 3.0953, + "step": 5914 + }, + { + "epoch": 1.71, + "learning_rate": 5.3711321657694825e-05, + "loss": 3.1468, + "step": 5915 + }, + { + "epoch": 1.71, + "learning_rate": 5.36055967224342e-05, + "loss": 3.0251, + "step": 5916 + }, + { + "epoch": 1.71, + "learning_rate": 5.3499970049623483e-05, + "loss": 3.0255, + "step": 5917 + }, + { + "epoch": 1.71, + "learning_rate": 5.3394441662513945e-05, + "loss": 3.0289, + "step": 5918 + }, + { + "epoch": 1.71, + "learning_rate": 5.3289011584334855e-05, + "loss": 3.1095, + "step": 5919 + }, + { + "epoch": 1.71, + "learning_rate": 5.318367983829392e-05, + "loss": 3.0044, + "step": 5920 + }, + { + "epoch": 1.71, + "learning_rate": 5.307844644757742e-05, + "loss": 3.2112, + "step": 5921 + }, + { + "epoch": 1.72, + "learning_rate": 5.297331143534973e-05, + "loss": 3.0923, + "step": 5922 + }, + { + "epoch": 1.72, + "learning_rate": 5.286827482475365e-05, + "loss": 3.1513, + "step": 5923 + }, + { + "epoch": 1.72, + "learning_rate": 5.276333663891036e-05, + "loss": 3.1145, + "step": 5924 + }, + { + "epoch": 1.72, + "learning_rate": 5.26584969009195e-05, + "loss": 3.043, + "step": 5925 + }, + { + "epoch": 1.72, + "learning_rate": 5.255375563385867e-05, + "loss": 3.1938, + "step": 5926 + }, + { + "epoch": 1.72, + "learning_rate": 5.244911286078419e-05, + "loss": 3.0888, + "step": 5927 + }, + { + "epoch": 1.72, + "learning_rate": 5.234456860473041e-05, + "loss": 3.1117, + "step": 5928 + }, + { + "epoch": 1.72, + "learning_rate": 5.224012288871011e-05, + "loss": 3.0747, + "step": 5929 + }, + { + "epoch": 1.72, + "learning_rate": 5.213577573571443e-05, + "loss": 3.1275, + "step": 5930 + }, + { + "epoch": 1.72, + "learning_rate": 5.2031527168712624e-05, + "loss": 3.2608, + "step": 5931 + }, + { + "epoch": 1.72, + "learning_rate": 5.192737721065255e-05, + "loss": 3.0666, + "step": 5932 + }, + { + "epoch": 1.72, + "learning_rate": 5.182332588446009e-05, + "loss": 3.1642, + "step": 5933 + }, + { + "epoch": 1.72, + "learning_rate": 5.17193732130396e-05, + "loss": 3.0845, + "step": 5934 + }, + { + "epoch": 1.72, + "learning_rate": 5.1615519219273546e-05, + "loss": 3.1004, + "step": 5935 + }, + { + "epoch": 1.72, + "learning_rate": 5.151176392602291e-05, + "loss": 3.1509, + "step": 5936 + }, + { + "epoch": 1.72, + "learning_rate": 5.140810735612655e-05, + "loss": 3.0493, + "step": 5937 + }, + { + "epoch": 1.72, + "learning_rate": 5.13045495324021e-05, + "loss": 3.0786, + "step": 5938 + }, + { + "epoch": 1.72, + "learning_rate": 5.120109047764504e-05, + "loss": 3.1068, + "step": 5939 + }, + { + "epoch": 1.72, + "learning_rate": 5.109773021462921e-05, + "loss": 3.093, + "step": 5940 + }, + { + "epoch": 1.72, + "learning_rate": 5.099446876610692e-05, + "loss": 3.164, + "step": 5941 + }, + { + "epoch": 1.72, + "learning_rate": 5.089130615480841e-05, + "loss": 2.9833, + "step": 5942 + }, + { + "epoch": 1.72, + "learning_rate": 5.07882424034426e-05, + "loss": 3.155, + "step": 5943 + }, + { + "epoch": 1.72, + "learning_rate": 5.068527753469604e-05, + "loss": 3.0512, + "step": 5944 + }, + { + "epoch": 1.72, + "learning_rate": 5.058241157123411e-05, + "loss": 3.146, + "step": 5945 + }, + { + "epoch": 1.72, + "learning_rate": 5.047964453569992e-05, + "loss": 3.2512, + "step": 5946 + }, + { + "epoch": 1.72, + "learning_rate": 5.037697645071526e-05, + "loss": 3.1434, + "step": 5947 + }, + { + "epoch": 1.72, + "learning_rate": 5.027440733887972e-05, + "loss": 3.1983, + "step": 5948 + }, + { + "epoch": 1.72, + "learning_rate": 5.017193722277136e-05, + "loss": 3.0799, + "step": 5949 + }, + { + "epoch": 1.72, + "learning_rate": 5.006956612494651e-05, + "loss": 3.0886, + "step": 5950 + }, + { + "epoch": 1.72, + "learning_rate": 4.996729406793943e-05, + "loss": 3.1598, + "step": 5951 + }, + { + "epoch": 1.72, + "learning_rate": 4.986512107426283e-05, + "loss": 3.2882, + "step": 5952 + }, + { + "epoch": 1.72, + "learning_rate": 4.9763047166407395e-05, + "loss": 3.1816, + "step": 5953 + }, + { + "epoch": 1.72, + "learning_rate": 4.966107236684225e-05, + "loss": 3.137, + "step": 5954 + }, + { + "epoch": 1.72, + "learning_rate": 4.9559196698014455e-05, + "loss": 3.0786, + "step": 5955 + }, + { + "epoch": 1.72, + "learning_rate": 4.94574201823495e-05, + "loss": 3.2056, + "step": 5956 + }, + { + "epoch": 1.73, + "learning_rate": 4.9355742842250695e-05, + "loss": 3.1278, + "step": 5957 + }, + { + "epoch": 1.73, + "learning_rate": 4.925416470009991e-05, + "loss": 3.0525, + "step": 5958 + }, + { + "epoch": 1.73, + "learning_rate": 4.91526857782571e-05, + "loss": 3.178, + "step": 5959 + }, + { + "epoch": 1.73, + "learning_rate": 4.9051306099060055e-05, + "loss": 3.1103, + "step": 5960 + }, + { + "epoch": 1.73, + "learning_rate": 4.895002568482509e-05, + "loss": 3.1031, + "step": 5961 + }, + { + "epoch": 1.73, + "learning_rate": 4.884884455784644e-05, + "loss": 3.085, + "step": 5962 + }, + { + "epoch": 1.73, + "learning_rate": 4.8747762740396714e-05, + "loss": 3.1366, + "step": 5963 + }, + { + "epoch": 1.73, + "learning_rate": 4.864678025472635e-05, + "loss": 3.0422, + "step": 5964 + }, + { + "epoch": 1.73, + "learning_rate": 4.854589712306423e-05, + "loss": 3.0585, + "step": 5965 + }, + { + "epoch": 1.73, + "learning_rate": 4.844511336761709e-05, + "loss": 3.0954, + "step": 5966 + }, + { + "epoch": 1.73, + "learning_rate": 4.834442901057001e-05, + "loss": 3.1752, + "step": 5967 + }, + { + "epoch": 1.73, + "learning_rate": 4.824384407408622e-05, + "loss": 3.1914, + "step": 5968 + }, + { + "epoch": 1.73, + "learning_rate": 4.814335858030672e-05, + "loss": 3.0803, + "step": 5969 + }, + { + "epoch": 1.73, + "learning_rate": 4.80429725513511e-05, + "loss": 3.0941, + "step": 5970 + }, + { + "epoch": 1.73, + "learning_rate": 4.794268600931651e-05, + "loss": 3.1277, + "step": 5971 + }, + { + "epoch": 1.73, + "learning_rate": 4.7842498976278794e-05, + "loss": 3.1871, + "step": 5972 + }, + { + "epoch": 1.73, + "learning_rate": 4.7742411474291425e-05, + "loss": 3.1268, + "step": 5973 + }, + { + "epoch": 1.73, + "learning_rate": 4.7642423525386225e-05, + "loss": 3.1671, + "step": 5974 + }, + { + "epoch": 1.73, + "learning_rate": 4.754253515157286e-05, + "loss": 3.0443, + "step": 5975 + }, + { + "epoch": 1.73, + "learning_rate": 4.7442746374839365e-05, + "loss": 3.0991, + "step": 5976 + }, + { + "epoch": 1.73, + "learning_rate": 4.734305721715182e-05, + "loss": 3.1885, + "step": 5977 + }, + { + "epoch": 1.73, + "learning_rate": 4.724346770045401e-05, + "loss": 3.1831, + "step": 5978 + }, + { + "epoch": 1.73, + "learning_rate": 4.7143977846668294e-05, + "loss": 3.0665, + "step": 5979 + }, + { + "epoch": 1.73, + "learning_rate": 4.704458767769471e-05, + "loss": 3.2454, + "step": 5980 + }, + { + "epoch": 1.73, + "learning_rate": 4.6945297215411576e-05, + "loss": 3.177, + "step": 5981 + }, + { + "epoch": 1.73, + "learning_rate": 4.684610648167503e-05, + "loss": 3.0679, + "step": 5982 + }, + { + "epoch": 1.73, + "learning_rate": 4.674701549831961e-05, + "loss": 3.0118, + "step": 5983 + }, + { + "epoch": 1.73, + "learning_rate": 4.664802428715753e-05, + "loss": 3.0512, + "step": 5984 + }, + { + "epoch": 1.73, + "learning_rate": 4.654913286997925e-05, + "loss": 3.2122, + "step": 5985 + }, + { + "epoch": 1.73, + "learning_rate": 4.64503412685533e-05, + "loss": 3.0368, + "step": 5986 + }, + { + "epoch": 1.73, + "learning_rate": 4.6351649504626e-05, + "loss": 3.086, + "step": 5987 + }, + { + "epoch": 1.73, + "learning_rate": 4.6253057599922044e-05, + "loss": 3.1825, + "step": 5988 + }, + { + "epoch": 1.73, + "learning_rate": 4.6154565576143724e-05, + "loss": 3.136, + "step": 5989 + }, + { + "epoch": 1.73, + "learning_rate": 4.6056173454971815e-05, + "loss": 3.2387, + "step": 5990 + }, + { + "epoch": 1.74, + "learning_rate": 4.595788125806466e-05, + "loss": 3.0936, + "step": 5991 + }, + { + "epoch": 1.74, + "learning_rate": 4.585968900705889e-05, + "loss": 3.1251, + "step": 5992 + }, + { + "epoch": 1.74, + "learning_rate": 4.5761596723569e-05, + "loss": 3.0895, + "step": 5993 + }, + { + "epoch": 1.74, + "learning_rate": 4.566360442918754e-05, + "loss": 2.9366, + "step": 5994 + }, + { + "epoch": 1.74, + "learning_rate": 4.556571214548516e-05, + "loss": 3.1494, + "step": 5995 + }, + { + "epoch": 1.74, + "learning_rate": 4.546791989401011e-05, + "loss": 3.1151, + "step": 5996 + }, + { + "epoch": 1.74, + "learning_rate": 4.537022769628923e-05, + "loss": 3.2048, + "step": 5997 + }, + { + "epoch": 1.74, + "learning_rate": 4.527263557382666e-05, + "loss": 3.1502, + "step": 5998 + }, + { + "epoch": 1.74, + "learning_rate": 4.517514354810509e-05, + "loss": 3.0745, + "step": 5999 + }, + { + "epoch": 1.74, + "learning_rate": 4.5077751640584696e-05, + "loss": 3.1254, + "step": 6000 + }, + { + "epoch": 1.74, + "learning_rate": 4.4980459872704e-05, + "loss": 3.1738, + "step": 6001 + }, + { + "epoch": 1.74, + "learning_rate": 4.4883268265879404e-05, + "loss": 3.0157, + "step": 6002 + }, + { + "epoch": 1.74, + "learning_rate": 4.4786176841504935e-05, + "loss": 3.2618, + "step": 6003 + }, + { + "epoch": 1.74, + "learning_rate": 4.468918562095309e-05, + "loss": 3.0559, + "step": 6004 + }, + { + "epoch": 1.74, + "learning_rate": 4.459229462557379e-05, + "loss": 3.0632, + "step": 6005 + }, + { + "epoch": 1.74, + "learning_rate": 4.449550387669537e-05, + "loss": 3.1193, + "step": 6006 + }, + { + "epoch": 1.74, + "learning_rate": 4.4398813395623626e-05, + "loss": 3.1638, + "step": 6007 + }, + { + "epoch": 1.74, + "learning_rate": 4.43022232036428e-05, + "loss": 3.1171, + "step": 6008 + }, + { + "epoch": 1.74, + "learning_rate": 4.4205733322014496e-05, + "loss": 3.0651, + "step": 6009 + }, + { + "epoch": 1.74, + "learning_rate": 4.410934377197867e-05, + "loss": 3.1376, + "step": 6010 + }, + { + "epoch": 1.74, + "learning_rate": 4.401305457475313e-05, + "loss": 3.1768, + "step": 6011 + }, + { + "epoch": 1.74, + "learning_rate": 4.391686575153331e-05, + "loss": 3.0957, + "step": 6012 + }, + { + "epoch": 1.74, + "learning_rate": 4.382077732349299e-05, + "loss": 3.0247, + "step": 6013 + }, + { + "epoch": 1.74, + "learning_rate": 4.372478931178336e-05, + "loss": 3.0169, + "step": 6014 + }, + { + "epoch": 1.74, + "learning_rate": 4.362890173753392e-05, + "loss": 3.1424, + "step": 6015 + }, + { + "epoch": 1.74, + "learning_rate": 4.353311462185183e-05, + "loss": 3.1256, + "step": 6016 + }, + { + "epoch": 1.74, + "learning_rate": 4.343742798582229e-05, + "loss": 3.0539, + "step": 6017 + }, + { + "epoch": 1.74, + "learning_rate": 4.3341841850508104e-05, + "loss": 3.0544, + "step": 6018 + }, + { + "epoch": 1.74, + "learning_rate": 4.324635623695028e-05, + "loss": 3.1493, + "step": 6019 + }, + { + "epoch": 1.74, + "learning_rate": 4.315097116616767e-05, + "loss": 3.2077, + "step": 6020 + }, + { + "epoch": 1.74, + "learning_rate": 4.305568665915666e-05, + "loss": 3.0578, + "step": 6021 + }, + { + "epoch": 1.74, + "learning_rate": 4.296050273689195e-05, + "loss": 3.156, + "step": 6022 + }, + { + "epoch": 1.74, + "learning_rate": 4.286541942032562e-05, + "loss": 3.1038, + "step": 6023 + }, + { + "epoch": 1.74, + "learning_rate": 4.277043673038816e-05, + "loss": 3.0752, + "step": 6024 + }, + { + "epoch": 1.74, + "learning_rate": 4.267555468798734e-05, + "loss": 3.1069, + "step": 6025 + }, + { + "epoch": 1.75, + "learning_rate": 4.2580773314009225e-05, + "loss": 3.2482, + "step": 6026 + }, + { + "epoch": 1.75, + "learning_rate": 4.248609262931735e-05, + "loss": 3.0604, + "step": 6027 + }, + { + "epoch": 1.75, + "learning_rate": 4.239151265475344e-05, + "loss": 3.0564, + "step": 6028 + }, + { + "epoch": 1.75, + "learning_rate": 4.2297033411136886e-05, + "loss": 2.9924, + "step": 6029 + }, + { + "epoch": 1.75, + "learning_rate": 4.220265491926489e-05, + "loss": 3.1359, + "step": 6030 + }, + { + "epoch": 1.75, + "learning_rate": 4.21083771999125e-05, + "loss": 2.9635, + "step": 6031 + }, + { + "epoch": 1.75, + "learning_rate": 4.2014200273832404e-05, + "loss": 3.1017, + "step": 6032 + }, + { + "epoch": 1.75, + "learning_rate": 4.1920124161755555e-05, + "loss": 3.0713, + "step": 6033 + }, + { + "epoch": 1.75, + "learning_rate": 4.182614888439018e-05, + "loss": 3.1888, + "step": 6034 + }, + { + "epoch": 1.75, + "learning_rate": 4.1732274462422725e-05, + "loss": 3.2342, + "step": 6035 + }, + { + "epoch": 1.75, + "learning_rate": 4.1638500916517165e-05, + "loss": 3.1207, + "step": 6036 + }, + { + "epoch": 1.75, + "learning_rate": 4.1544828267315425e-05, + "loss": 3.1116, + "step": 6037 + }, + { + "epoch": 1.75, + "learning_rate": 4.145125653543719e-05, + "loss": 3.1723, + "step": 6038 + }, + { + "epoch": 1.75, + "learning_rate": 4.135778574147997e-05, + "loss": 3.1291, + "step": 6039 + }, + { + "epoch": 1.75, + "learning_rate": 4.1264415906018924e-05, + "loss": 3.0732, + "step": 6040 + }, + { + "epoch": 1.75, + "learning_rate": 4.117114704960695e-05, + "loss": 3.204, + "step": 6041 + }, + { + "epoch": 1.75, + "learning_rate": 4.107797919277506e-05, + "loss": 3.134, + "step": 6042 + }, + { + "epoch": 1.75, + "learning_rate": 4.098491235603163e-05, + "loss": 3.1801, + "step": 6043 + }, + { + "epoch": 1.75, + "learning_rate": 4.0891946559863055e-05, + "loss": 3.055, + "step": 6044 + }, + { + "epoch": 1.75, + "learning_rate": 4.079908182473335e-05, + "loss": 3.1006, + "step": 6045 + }, + { + "epoch": 1.75, + "learning_rate": 4.0706318171084334e-05, + "loss": 3.1204, + "step": 6046 + }, + { + "epoch": 1.75, + "learning_rate": 4.061365561933567e-05, + "loss": 3.1425, + "step": 6047 + }, + { + "epoch": 1.75, + "learning_rate": 4.05210941898847e-05, + "loss": 3.1938, + "step": 6048 + }, + { + "epoch": 1.75, + "learning_rate": 4.042863390310642e-05, + "loss": 3.1896, + "step": 6049 + }, + { + "epoch": 1.75, + "learning_rate": 4.0336274779353535e-05, + "loss": 3.0589, + "step": 6050 + }, + { + "epoch": 1.75, + "learning_rate": 4.024401683895668e-05, + "loss": 3.1176, + "step": 6051 + }, + { + "epoch": 1.75, + "learning_rate": 4.0151860102224044e-05, + "loss": 3.1769, + "step": 6052 + }, + { + "epoch": 1.75, + "learning_rate": 4.005980458944169e-05, + "loss": 3.0197, + "step": 6053 + }, + { + "epoch": 1.75, + "learning_rate": 3.996785032087324e-05, + "loss": 3.0603, + "step": 6054 + }, + { + "epoch": 1.75, + "learning_rate": 3.987599731676022e-05, + "loss": 3.2522, + "step": 6055 + }, + { + "epoch": 1.75, + "learning_rate": 3.978424559732158e-05, + "loss": 3.0098, + "step": 6056 + }, + { + "epoch": 1.75, + "learning_rate": 3.969259518275431e-05, + "loss": 3.1194, + "step": 6057 + }, + { + "epoch": 1.75, + "learning_rate": 3.960104609323284e-05, + "loss": 3.0131, + "step": 6058 + }, + { + "epoch": 1.75, + "learning_rate": 3.950959834890927e-05, + "loss": 3.0291, + "step": 6059 + }, + { + "epoch": 1.76, + "learning_rate": 3.941825196991378e-05, + "loss": 3.1139, + "step": 6060 + }, + { + "epoch": 1.76, + "learning_rate": 3.932700697635372e-05, + "loss": 3.0906, + "step": 6061 + }, + { + "epoch": 1.76, + "learning_rate": 3.923586338831453e-05, + "loss": 3.1001, + "step": 6062 + }, + { + "epoch": 1.76, + "learning_rate": 3.914482122585905e-05, + "loss": 3.2197, + "step": 6063 + }, + { + "epoch": 1.76, + "learning_rate": 3.905388050902808e-05, + "loss": 3.0445, + "step": 6064 + }, + { + "epoch": 1.76, + "learning_rate": 3.896304125783973e-05, + "loss": 3.2518, + "step": 6065 + }, + { + "epoch": 1.76, + "learning_rate": 3.8872303492290154e-05, + "loss": 3.1282, + "step": 6066 + }, + { + "epoch": 1.76, + "learning_rate": 3.878166723235288e-05, + "loss": 3.0362, + "step": 6067 + }, + { + "epoch": 1.76, + "learning_rate": 3.8691132497979066e-05, + "loss": 3.1454, + "step": 6068 + }, + { + "epoch": 1.76, + "learning_rate": 3.8600699309097876e-05, + "loss": 3.0578, + "step": 6069 + }, + { + "epoch": 1.76, + "learning_rate": 3.851036768561572e-05, + "loss": 3.095, + "step": 6070 + }, + { + "epoch": 1.76, + "learning_rate": 3.842013764741686e-05, + "loss": 3.0728, + "step": 6071 + }, + { + "epoch": 1.76, + "learning_rate": 3.8330009214363194e-05, + "loss": 3.1818, + "step": 6072 + }, + { + "epoch": 1.76, + "learning_rate": 3.823998240629434e-05, + "loss": 3.026, + "step": 6073 + }, + { + "epoch": 1.76, + "learning_rate": 3.815005724302717e-05, + "loss": 3.0081, + "step": 6074 + }, + { + "epoch": 1.76, + "learning_rate": 3.806023374435663e-05, + "loss": 3.0505, + "step": 6075 + }, + { + "epoch": 1.76, + "learning_rate": 3.797051193005507e-05, + "loss": 3.0276, + "step": 6076 + }, + { + "epoch": 1.76, + "learning_rate": 3.788089181987236e-05, + "loss": 3.1396, + "step": 6077 + }, + { + "epoch": 1.76, + "learning_rate": 3.779137343353623e-05, + "loss": 3.2437, + "step": 6078 + }, + { + "epoch": 1.76, + "learning_rate": 3.770195679075178e-05, + "loss": 3.1643, + "step": 6079 + }, + { + "epoch": 1.76, + "learning_rate": 3.761264191120189e-05, + "loss": 3.1272, + "step": 6080 + }, + { + "epoch": 1.76, + "learning_rate": 3.7523428814546954e-05, + "loss": 3.0861, + "step": 6081 + }, + { + "epoch": 1.76, + "learning_rate": 3.743431752042509e-05, + "loss": 3.0726, + "step": 6082 + }, + { + "epoch": 1.76, + "learning_rate": 3.734530804845177e-05, + "loss": 3.1536, + "step": 6083 + }, + { + "epoch": 1.76, + "learning_rate": 3.725640041822026e-05, + "loss": 3.1487, + "step": 6084 + }, + { + "epoch": 1.76, + "learning_rate": 3.716759464930125e-05, + "loss": 3.0443, + "step": 6085 + }, + { + "epoch": 1.76, + "learning_rate": 3.70788907612431e-05, + "loss": 3.0403, + "step": 6086 + }, + { + "epoch": 1.76, + "learning_rate": 3.699028877357186e-05, + "loss": 3.1099, + "step": 6087 + }, + { + "epoch": 1.76, + "learning_rate": 3.690178870579075e-05, + "loss": 3.1775, + "step": 6088 + }, + { + "epoch": 1.76, + "learning_rate": 3.681339057738103e-05, + "loss": 3.1721, + "step": 6089 + }, + { + "epoch": 1.76, + "learning_rate": 3.672509440780125e-05, + "loss": 2.9877, + "step": 6090 + }, + { + "epoch": 1.76, + "learning_rate": 3.6636900216487745e-05, + "loss": 3.2046, + "step": 6091 + }, + { + "epoch": 1.76, + "learning_rate": 3.654880802285393e-05, + "loss": 3.1411, + "step": 6092 + }, + { + "epoch": 1.76, + "learning_rate": 3.646081784629141e-05, + "loss": 2.9192, + "step": 6093 + }, + { + "epoch": 1.76, + "learning_rate": 3.637292970616879e-05, + "loss": 3.0551, + "step": 6094 + }, + { + "epoch": 1.77, + "learning_rate": 3.6285143621832386e-05, + "loss": 3.1556, + "step": 6095 + }, + { + "epoch": 1.77, + "learning_rate": 3.619745961260623e-05, + "loss": 3.145, + "step": 6096 + }, + { + "epoch": 1.77, + "learning_rate": 3.610987769779156e-05, + "loss": 3.1005, + "step": 6097 + }, + { + "epoch": 1.77, + "learning_rate": 3.60223978966675e-05, + "loss": 2.9397, + "step": 6098 + }, + { + "epoch": 1.77, + "learning_rate": 3.593502022849043e-05, + "loss": 2.9234, + "step": 6099 + }, + { + "epoch": 1.77, + "learning_rate": 3.584774471249447e-05, + "loss": 3.0883, + "step": 6100 + }, + { + "epoch": 1.77, + "learning_rate": 3.5760571367890904e-05, + "loss": 3.0439, + "step": 6101 + }, + { + "epoch": 1.77, + "learning_rate": 3.567350021386895e-05, + "loss": 3.0982, + "step": 6102 + }, + { + "epoch": 1.77, + "learning_rate": 3.558653126959499e-05, + "loss": 3.194, + "step": 6103 + }, + { + "epoch": 1.77, + "learning_rate": 3.549966455421305e-05, + "loss": 3.1463, + "step": 6104 + }, + { + "epoch": 1.77, + "learning_rate": 3.5412900086844615e-05, + "loss": 3.0536, + "step": 6105 + }, + { + "epoch": 1.77, + "learning_rate": 3.532623788658873e-05, + "loss": 3.0922, + "step": 6106 + }, + { + "epoch": 1.77, + "learning_rate": 3.5239677972522035e-05, + "loss": 3.0262, + "step": 6107 + }, + { + "epoch": 1.77, + "learning_rate": 3.515322036369822e-05, + "loss": 3.1892, + "step": 6108 + }, + { + "epoch": 1.77, + "learning_rate": 3.506686507914902e-05, + "loss": 3.082, + "step": 6109 + }, + { + "epoch": 1.77, + "learning_rate": 3.4980612137883175e-05, + "loss": 3.1276, + "step": 6110 + }, + { + "epoch": 1.77, + "learning_rate": 3.4894461558887256e-05, + "loss": 2.9493, + "step": 6111 + }, + { + "epoch": 1.77, + "learning_rate": 3.4808413361125e-05, + "loss": 3.0368, + "step": 6112 + }, + { + "epoch": 1.77, + "learning_rate": 3.4722467563537696e-05, + "loss": 3.164, + "step": 6113 + }, + { + "epoch": 1.77, + "learning_rate": 3.4636624185044266e-05, + "loss": 3.0932, + "step": 6114 + }, + { + "epoch": 1.77, + "learning_rate": 3.455088324454092e-05, + "loss": 3.1401, + "step": 6115 + }, + { + "epoch": 1.77, + "learning_rate": 3.44652447609014e-05, + "loss": 2.9825, + "step": 6116 + }, + { + "epoch": 1.77, + "learning_rate": 3.4379708752976804e-05, + "loss": 3.115, + "step": 6117 + }, + { + "epoch": 1.77, + "learning_rate": 3.429427523959577e-05, + "loss": 3.1157, + "step": 6118 + }, + { + "epoch": 1.77, + "learning_rate": 3.420894423956422e-05, + "loss": 3.1178, + "step": 6119 + }, + { + "epoch": 1.77, + "learning_rate": 3.4123715771665786e-05, + "loss": 3.1586, + "step": 6120 + }, + { + "epoch": 1.77, + "learning_rate": 3.403858985466129e-05, + "loss": 3.0353, + "step": 6121 + }, + { + "epoch": 1.77, + "learning_rate": 3.3953566507288927e-05, + "loss": 3.0048, + "step": 6122 + }, + { + "epoch": 1.77, + "learning_rate": 3.3868645748264534e-05, + "loss": 3.2191, + "step": 6123 + }, + { + "epoch": 1.77, + "learning_rate": 3.378382759628129e-05, + "loss": 3.108, + "step": 6124 + }, + { + "epoch": 1.77, + "learning_rate": 3.3699112070009865e-05, + "loss": 3.2803, + "step": 6125 + }, + { + "epoch": 1.77, + "learning_rate": 3.3614499188098e-05, + "loss": 3.0845, + "step": 6126 + }, + { + "epoch": 1.77, + "learning_rate": 3.352998896917131e-05, + "loss": 3.0518, + "step": 6127 + }, + { + "epoch": 1.77, + "learning_rate": 3.344558143183246e-05, + "loss": 3.0412, + "step": 6128 + }, + { + "epoch": 1.78, + "learning_rate": 3.3361276594661706e-05, + "loss": 3.0978, + "step": 6129 + }, + { + "epoch": 1.78, + "learning_rate": 3.327707447621653e-05, + "loss": 3.2128, + "step": 6130 + }, + { + "epoch": 1.78, + "learning_rate": 3.319297509503205e-05, + "loss": 3.0835, + "step": 6131 + }, + { + "epoch": 1.78, + "learning_rate": 3.3108978469620407e-05, + "loss": 3.087, + "step": 6132 + }, + { + "epoch": 1.78, + "learning_rate": 3.3025084618471535e-05, + "loss": 3.0215, + "step": 6133 + }, + { + "epoch": 1.78, + "learning_rate": 3.294129356005249e-05, + "loss": 3.1951, + "step": 6134 + }, + { + "epoch": 1.78, + "learning_rate": 3.2857605312807684e-05, + "loss": 3.1377, + "step": 6135 + }, + { + "epoch": 1.78, + "learning_rate": 3.27740198951591e-05, + "loss": 3.2303, + "step": 6136 + }, + { + "epoch": 1.78, + "learning_rate": 3.2690537325505806e-05, + "loss": 3.073, + "step": 6137 + }, + { + "epoch": 1.78, + "learning_rate": 3.260715762222449e-05, + "loss": 3.1163, + "step": 6138 + }, + { + "epoch": 1.78, + "learning_rate": 3.252388080366903e-05, + "loss": 3.0205, + "step": 6139 + }, + { + "epoch": 1.78, + "learning_rate": 3.24407068881708e-05, + "loss": 3.0895, + "step": 6140 + }, + { + "epoch": 1.78, + "learning_rate": 3.235763589403829e-05, + "loss": 3.0907, + "step": 6141 + }, + { + "epoch": 1.78, + "learning_rate": 3.227466783955757e-05, + "loss": 3.1009, + "step": 6142 + }, + { + "epoch": 1.78, + "learning_rate": 3.219180274299205e-05, + "loss": 3.1725, + "step": 6143 + }, + { + "epoch": 1.78, + "learning_rate": 3.210904062258219e-05, + "loss": 3.0727, + "step": 6144 + }, + { + "epoch": 1.78, + "learning_rate": 3.202638149654613e-05, + "loss": 3.1022, + "step": 6145 + }, + { + "epoch": 1.78, + "learning_rate": 3.1943825383079106e-05, + "loss": 3.0993, + "step": 6146 + }, + { + "epoch": 1.78, + "learning_rate": 3.1861372300353854e-05, + "loss": 3.2258, + "step": 6147 + }, + { + "epoch": 1.78, + "learning_rate": 3.177902226652024e-05, + "loss": 3.043, + "step": 6148 + }, + { + "epoch": 1.78, + "learning_rate": 3.16967752997056e-05, + "loss": 3.1962, + "step": 6149 + }, + { + "epoch": 1.78, + "learning_rate": 3.161463141801446e-05, + "loss": 3.167, + "step": 6150 + }, + { + "epoch": 1.78, + "learning_rate": 3.153259063952879e-05, + "loss": 3.1604, + "step": 6151 + }, + { + "epoch": 1.78, + "learning_rate": 3.145065298230782e-05, + "loss": 3.2486, + "step": 6152 + }, + { + "epoch": 1.78, + "learning_rate": 3.136881846438794e-05, + "loss": 3.0451, + "step": 6153 + }, + { + "epoch": 1.78, + "learning_rate": 3.128708710378308e-05, + "loss": 3.0778, + "step": 6154 + }, + { + "epoch": 1.78, + "learning_rate": 3.12054589184842e-05, + "loss": 3.2212, + "step": 6155 + }, + { + "epoch": 1.78, + "learning_rate": 3.112393392645985e-05, + "loss": 3.1389, + "step": 6156 + }, + { + "epoch": 1.78, + "learning_rate": 3.104251214565545e-05, + "loss": 3.0437, + "step": 6157 + }, + { + "epoch": 1.78, + "learning_rate": 3.0961193593994186e-05, + "loss": 3.1085, + "step": 6158 + }, + { + "epoch": 1.78, + "learning_rate": 3.087997828937627e-05, + "loss": 3.2129, + "step": 6159 + }, + { + "epoch": 1.78, + "learning_rate": 3.0798866249679036e-05, + "loss": 3.0632, + "step": 6160 + }, + { + "epoch": 1.78, + "learning_rate": 3.071785749275741e-05, + "loss": 3.1146, + "step": 6161 + }, + { + "epoch": 1.78, + "learning_rate": 3.063695203644329e-05, + "loss": 3.1002, + "step": 6162 + }, + { + "epoch": 1.78, + "learning_rate": 3.0556149898546046e-05, + "loss": 3.1694, + "step": 6163 + }, + { + "epoch": 1.79, + "learning_rate": 3.0475451096852237e-05, + "loss": 3.1584, + "step": 6164 + }, + { + "epoch": 1.79, + "learning_rate": 3.039485564912564e-05, + "loss": 3.1646, + "step": 6165 + }, + { + "epoch": 1.79, + "learning_rate": 3.0314363573107294e-05, + "loss": 3.0997, + "step": 6166 + }, + { + "epoch": 1.79, + "learning_rate": 3.0233974886515468e-05, + "loss": 3.0548, + "step": 6167 + }, + { + "epoch": 1.79, + "learning_rate": 3.0153689607045842e-05, + "loss": 3.0232, + "step": 6168 + }, + { + "epoch": 1.79, + "learning_rate": 3.0073507752371e-05, + "loss": 3.1437, + "step": 6169 + }, + { + "epoch": 1.79, + "learning_rate": 2.9993429340141153e-05, + "loss": 3.0972, + "step": 6170 + }, + { + "epoch": 1.79, + "learning_rate": 2.9913454387983318e-05, + "loss": 3.0373, + "step": 6171 + }, + { + "epoch": 1.79, + "learning_rate": 2.9833582913502132e-05, + "loss": 3.2651, + "step": 6172 + }, + { + "epoch": 1.79, + "learning_rate": 2.97538149342792e-05, + "loss": 3.1755, + "step": 6173 + }, + { + "epoch": 1.79, + "learning_rate": 2.967415046787353e-05, + "loss": 3.1516, + "step": 6174 + }, + { + "epoch": 1.79, + "learning_rate": 2.9594589531821036e-05, + "loss": 3.145, + "step": 6175 + }, + { + "epoch": 1.79, + "learning_rate": 2.9515132143635215e-05, + "loss": 3.1525, + "step": 6176 + }, + { + "epoch": 1.79, + "learning_rate": 2.9435778320806627e-05, + "loss": 3.1297, + "step": 6177 + }, + { + "epoch": 1.79, + "learning_rate": 2.9356528080802914e-05, + "loss": 3.1787, + "step": 6178 + }, + { + "epoch": 1.79, + "learning_rate": 2.9277381441069063e-05, + "loss": 3.1076, + "step": 6179 + }, + { + "epoch": 1.79, + "learning_rate": 2.9198338419027137e-05, + "loss": 3.047, + "step": 6180 + }, + { + "epoch": 1.79, + "learning_rate": 2.911939903207661e-05, + "loss": 3.1579, + "step": 6181 + }, + { + "epoch": 1.79, + "learning_rate": 2.9040563297593804e-05, + "loss": 3.1297, + "step": 6182 + }, + { + "epoch": 1.79, + "learning_rate": 2.8961831232932555e-05, + "loss": 3.2644, + "step": 6183 + }, + { + "epoch": 1.79, + "learning_rate": 2.8883202855423673e-05, + "loss": 3.1341, + "step": 6184 + }, + { + "epoch": 1.79, + "learning_rate": 2.8804678182375198e-05, + "loss": 3.1868, + "step": 6185 + }, + { + "epoch": 1.79, + "learning_rate": 2.8726257231072474e-05, + "loss": 3.2603, + "step": 6186 + }, + { + "epoch": 1.79, + "learning_rate": 2.864794001877774e-05, + "loss": 3.1676, + "step": 6187 + }, + { + "epoch": 1.79, + "learning_rate": 2.8569726562730658e-05, + "loss": 3.1233, + "step": 6188 + }, + { + "epoch": 1.79, + "learning_rate": 2.8491616880147843e-05, + "loss": 3.069, + "step": 6189 + }, + { + "epoch": 1.79, + "learning_rate": 2.841361098822326e-05, + "loss": 3.0773, + "step": 6190 + }, + { + "epoch": 1.79, + "learning_rate": 2.83357089041279e-05, + "loss": 3.0975, + "step": 6191 + }, + { + "epoch": 1.79, + "learning_rate": 2.825791064500993e-05, + "loss": 3.0335, + "step": 6192 + }, + { + "epoch": 1.79, + "learning_rate": 2.818021622799466e-05, + "loss": 3.0387, + "step": 6193 + }, + { + "epoch": 1.79, + "learning_rate": 2.8102625670184566e-05, + "loss": 3.0713, + "step": 6194 + }, + { + "epoch": 1.79, + "learning_rate": 2.8025138988659327e-05, + "loss": 3.1511, + "step": 6195 + }, + { + "epoch": 1.79, + "learning_rate": 2.7947756200475517e-05, + "loss": 3.2123, + "step": 6196 + }, + { + "epoch": 1.79, + "learning_rate": 2.7870477322667176e-05, + "loss": 3.1706, + "step": 6197 + }, + { + "epoch": 1.79, + "learning_rate": 2.7793302372245145e-05, + "loss": 3.1443, + "step": 6198 + }, + { + "epoch": 1.8, + "learning_rate": 2.7716231366197665e-05, + "loss": 3.1652, + "step": 6199 + }, + { + "epoch": 1.8, + "learning_rate": 2.7639264321489888e-05, + "loss": 3.1763, + "step": 6200 + }, + { + "epoch": 1.8, + "learning_rate": 2.7562401255064207e-05, + "loss": 3.1487, + "step": 6201 + }, + { + "epoch": 1.8, + "learning_rate": 2.7485642183840032e-05, + "loss": 3.0922, + "step": 6202 + }, + { + "epoch": 1.8, + "learning_rate": 2.740898712471396e-05, + "loss": 3.0595, + "step": 6203 + }, + { + "epoch": 1.8, + "learning_rate": 2.733243609455971e-05, + "loss": 3.0, + "step": 6204 + }, + { + "epoch": 1.8, + "learning_rate": 2.7255989110227973e-05, + "loss": 3.0418, + "step": 6205 + }, + { + "epoch": 1.8, + "learning_rate": 2.7179646188546738e-05, + "loss": 3.1038, + "step": 6206 + }, + { + "epoch": 1.8, + "learning_rate": 2.710340734632083e-05, + "loss": 3.206, + "step": 6207 + }, + { + "epoch": 1.8, + "learning_rate": 2.702727260033244e-05, + "loss": 3.0555, + "step": 6208 + }, + { + "epoch": 1.8, + "learning_rate": 2.695124196734061e-05, + "loss": 3.1806, + "step": 6209 + }, + { + "epoch": 1.8, + "learning_rate": 2.6875315464081564e-05, + "loss": 3.0993, + "step": 6210 + }, + { + "epoch": 1.8, + "learning_rate": 2.6799493107268646e-05, + "loss": 3.152, + "step": 6211 + }, + { + "epoch": 1.8, + "learning_rate": 2.672377491359229e-05, + "loss": 3.157, + "step": 6212 + }, + { + "epoch": 1.8, + "learning_rate": 2.6648160899719888e-05, + "loss": 3.1223, + "step": 6213 + }, + { + "epoch": 1.8, + "learning_rate": 2.657265108229584e-05, + "loss": 3.2137, + "step": 6214 + }, + { + "epoch": 1.8, + "learning_rate": 2.6497245477941967e-05, + "loss": 3.0486, + "step": 6215 + }, + { + "epoch": 1.8, + "learning_rate": 2.6421944103256656e-05, + "loss": 3.1507, + "step": 6216 + }, + { + "epoch": 1.8, + "learning_rate": 2.634674697481576e-05, + "loss": 3.2027, + "step": 6217 + }, + { + "epoch": 1.8, + "learning_rate": 2.6271654109171927e-05, + "loss": 3.0697, + "step": 6218 + }, + { + "epoch": 1.8, + "learning_rate": 2.6196665522855045e-05, + "loss": 3.1612, + "step": 6219 + }, + { + "epoch": 1.8, + "learning_rate": 2.6121781232371853e-05, + "loss": 3.2194, + "step": 6220 + }, + { + "epoch": 1.8, + "learning_rate": 2.604700125420645e-05, + "loss": 3.096, + "step": 6221 + }, + { + "epoch": 1.8, + "learning_rate": 2.5972325604819492e-05, + "loss": 3.122, + "step": 6222 + }, + { + "epoch": 1.8, + "learning_rate": 2.5897754300649112e-05, + "loss": 3.2408, + "step": 6223 + }, + { + "epoch": 1.8, + "learning_rate": 2.5823287358110292e-05, + "loss": 2.9659, + "step": 6224 + }, + { + "epoch": 1.8, + "learning_rate": 2.574892479359492e-05, + "loss": 3.0851, + "step": 6225 + }, + { + "epoch": 1.8, + "learning_rate": 2.5674666623472177e-05, + "loss": 3.1031, + "step": 6226 + }, + { + "epoch": 1.8, + "learning_rate": 2.560051286408793e-05, + "loss": 3.1885, + "step": 6227 + }, + { + "epoch": 1.8, + "learning_rate": 2.5526463531765464e-05, + "loss": 3.2135, + "step": 6228 + }, + { + "epoch": 1.8, + "learning_rate": 2.545251864280479e-05, + "loss": 2.9322, + "step": 6229 + }, + { + "epoch": 1.8, + "learning_rate": 2.537867821348305e-05, + "loss": 3.1367, + "step": 6230 + }, + { + "epoch": 1.8, + "learning_rate": 2.5304942260054253e-05, + "loss": 3.1428, + "step": 6231 + }, + { + "epoch": 1.8, + "learning_rate": 2.523131079874963e-05, + "loss": 3.0451, + "step": 6232 + }, + { + "epoch": 1.81, + "learning_rate": 2.5157783845777216e-05, + "loss": 3.1494, + "step": 6233 + }, + { + "epoch": 1.81, + "learning_rate": 2.5084361417322067e-05, + "loss": 3.1537, + "step": 6234 + }, + { + "epoch": 1.81, + "learning_rate": 2.5011043529546363e-05, + "loss": 3.1055, + "step": 6235 + }, + { + "epoch": 1.81, + "learning_rate": 2.4937830198589086e-05, + "loss": 3.0113, + "step": 6236 + }, + { + "epoch": 1.81, + "learning_rate": 2.486472144056634e-05, + "loss": 3.1561, + "step": 6237 + }, + { + "epoch": 1.81, + "learning_rate": 2.4791717271571202e-05, + "loss": 3.2308, + "step": 6238 + }, + { + "epoch": 1.81, + "learning_rate": 2.471881770767376e-05, + "loss": 3.1286, + "step": 6239 + }, + { + "epoch": 1.81, + "learning_rate": 2.464602276492084e-05, + "loss": 3.0813, + "step": 6240 + }, + { + "epoch": 1.81, + "learning_rate": 2.4573332459336628e-05, + "loss": 3.101, + "step": 6241 + }, + { + "epoch": 1.81, + "learning_rate": 2.450074680692188e-05, + "loss": 3.1187, + "step": 6242 + }, + { + "epoch": 1.81, + "learning_rate": 2.442826582365454e-05, + "loss": 3.1196, + "step": 6243 + }, + { + "epoch": 1.81, + "learning_rate": 2.4355889525489505e-05, + "loss": 3.0235, + "step": 6244 + }, + { + "epoch": 1.81, + "learning_rate": 2.428361792835848e-05, + "loss": 3.2031, + "step": 6245 + }, + { + "epoch": 1.81, + "learning_rate": 2.4211451048170296e-05, + "loss": 3.1032, + "step": 6246 + }, + { + "epoch": 1.81, + "learning_rate": 2.4139388900810688e-05, + "loss": 3.1277, + "step": 6247 + }, + { + "epoch": 1.81, + "learning_rate": 2.4067431502142413e-05, + "loss": 3.0769, + "step": 6248 + }, + { + "epoch": 1.81, + "learning_rate": 2.3995578868004908e-05, + "loss": 3.0699, + "step": 6249 + }, + { + "epoch": 1.81, + "learning_rate": 2.3923831014214803e-05, + "loss": 3.084, + "step": 6250 + }, + { + "epoch": 1.81, + "learning_rate": 2.3852187956565576e-05, + "loss": 3.1903, + "step": 6251 + }, + { + "epoch": 1.81, + "learning_rate": 2.378064971082755e-05, + "loss": 3.1221, + "step": 6252 + }, + { + "epoch": 1.81, + "learning_rate": 2.3709216292748194e-05, + "loss": 3.0171, + "step": 6253 + }, + { + "epoch": 1.81, + "learning_rate": 2.363788771805159e-05, + "loss": 3.0426, + "step": 6254 + }, + { + "epoch": 1.81, + "learning_rate": 2.3566664002439066e-05, + "loss": 3.1029, + "step": 6255 + }, + { + "epoch": 1.81, + "learning_rate": 2.3495545161588694e-05, + "loss": 3.1745, + "step": 6256 + }, + { + "epoch": 1.81, + "learning_rate": 2.3424531211155563e-05, + "loss": 3.1584, + "step": 6257 + }, + { + "epoch": 1.81, + "learning_rate": 2.335362216677139e-05, + "loss": 3.1612, + "step": 6258 + }, + { + "epoch": 1.81, + "learning_rate": 2.3282818044045306e-05, + "loss": 3.223, + "step": 6259 + }, + { + "epoch": 1.81, + "learning_rate": 2.321211885856278e-05, + "loss": 3.0827, + "step": 6260 + }, + { + "epoch": 1.81, + "learning_rate": 2.314152462588659e-05, + "loss": 3.1879, + "step": 6261 + }, + { + "epoch": 1.81, + "learning_rate": 2.3071035361556192e-05, + "loss": 3.0752, + "step": 6262 + }, + { + "epoch": 1.81, + "learning_rate": 2.3000651081088064e-05, + "loss": 3.1168, + "step": 6263 + }, + { + "epoch": 1.81, + "learning_rate": 2.2930371799975592e-05, + "loss": 3.2242, + "step": 6264 + }, + { + "epoch": 1.81, + "learning_rate": 2.2860197533688843e-05, + "loss": 3.1362, + "step": 6265 + }, + { + "epoch": 1.81, + "learning_rate": 2.2790128297675073e-05, + "loss": 3.2278, + "step": 6266 + }, + { + "epoch": 1.81, + "learning_rate": 2.272016410735811e-05, + "loss": 3.1278, + "step": 6267 + }, + { + "epoch": 1.82, + "learning_rate": 2.2650304978138912e-05, + "loss": 3.1338, + "step": 6268 + }, + { + "epoch": 1.82, + "learning_rate": 2.2580550925395128e-05, + "loss": 3.1196, + "step": 6269 + }, + { + "epoch": 1.82, + "learning_rate": 2.2510901964481358e-05, + "loss": 3.14, + "step": 6270 + }, + { + "epoch": 1.82, + "learning_rate": 2.2441358110729006e-05, + "loss": 3.1376, + "step": 6271 + }, + { + "epoch": 1.82, + "learning_rate": 2.237191937944649e-05, + "loss": 3.0487, + "step": 6272 + }, + { + "epoch": 1.82, + "learning_rate": 2.2302585785919094e-05, + "loss": 3.1273, + "step": 6273 + }, + { + "epoch": 1.82, + "learning_rate": 2.22333573454086e-05, + "loss": 3.2614, + "step": 6274 + }, + { + "epoch": 1.82, + "learning_rate": 2.21642340731541e-05, + "loss": 3.1334, + "step": 6275 + }, + { + "epoch": 1.82, + "learning_rate": 2.2095215984371197e-05, + "loss": 3.1012, + "step": 6276 + }, + { + "epoch": 1.82, + "learning_rate": 2.202630309425263e-05, + "loss": 3.1209, + "step": 6277 + }, + { + "epoch": 1.82, + "learning_rate": 2.195749541796771e-05, + "loss": 3.0694, + "step": 6278 + }, + { + "epoch": 1.82, + "learning_rate": 2.1888792970662706e-05, + "loss": 3.1122, + "step": 6279 + }, + { + "epoch": 1.82, + "learning_rate": 2.1820195767460803e-05, + "loss": 3.0099, + "step": 6280 + }, + { + "epoch": 1.82, + "learning_rate": 2.1751703823461866e-05, + "loss": 3.0901, + "step": 6281 + }, + { + "epoch": 1.82, + "learning_rate": 2.1683317153742778e-05, + "loss": 3.1909, + "step": 6282 + }, + { + "epoch": 1.82, + "learning_rate": 2.1615035773356996e-05, + "loss": 3.1488, + "step": 6283 + }, + { + "epoch": 1.82, + "learning_rate": 2.1546859697335052e-05, + "loss": 3.1188, + "step": 6284 + }, + { + "epoch": 1.82, + "learning_rate": 2.1478788940684114e-05, + "loss": 3.1619, + "step": 6285 + }, + { + "epoch": 1.82, + "learning_rate": 2.1410823518388302e-05, + "loss": 3.1038, + "step": 6286 + }, + { + "epoch": 1.82, + "learning_rate": 2.134296344540848e-05, + "loss": 3.1216, + "step": 6287 + }, + { + "epoch": 1.82, + "learning_rate": 2.1275208736682262e-05, + "loss": 3.0857, + "step": 6288 + }, + { + "epoch": 1.82, + "learning_rate": 2.1207559407124155e-05, + "loss": 3.1864, + "step": 6289 + }, + { + "epoch": 1.82, + "learning_rate": 2.114001547162542e-05, + "loss": 3.0873, + "step": 6290 + }, + { + "epoch": 1.82, + "learning_rate": 2.1072576945054324e-05, + "loss": 3.1745, + "step": 6291 + }, + { + "epoch": 1.82, + "learning_rate": 2.100524384225555e-05, + "loss": 3.0182, + "step": 6292 + }, + { + "epoch": 1.82, + "learning_rate": 2.0938016178050912e-05, + "loss": 3.0848, + "step": 6293 + }, + { + "epoch": 1.82, + "learning_rate": 2.0870893967238792e-05, + "loss": 3.1946, + "step": 6294 + }, + { + "epoch": 1.82, + "learning_rate": 2.0803877224594536e-05, + "loss": 3.2287, + "step": 6295 + }, + { + "epoch": 1.82, + "learning_rate": 2.0736965964870124e-05, + "loss": 3.1475, + "step": 6296 + }, + { + "epoch": 1.82, + "learning_rate": 2.067016020279433e-05, + "loss": 3.1075, + "step": 6297 + }, + { + "epoch": 1.82, + "learning_rate": 2.0603459953072834e-05, + "loss": 3.1745, + "step": 6298 + }, + { + "epoch": 1.82, + "learning_rate": 2.0536865230387948e-05, + "loss": 3.1149, + "step": 6299 + }, + { + "epoch": 1.82, + "learning_rate": 2.0470376049398942e-05, + "loss": 3.3075, + "step": 6300 + }, + { + "epoch": 1.82, + "learning_rate": 2.0403992424741613e-05, + "loss": 3.0925, + "step": 6301 + }, + { + "epoch": 1.83, + "learning_rate": 2.033771437102877e-05, + "loss": 3.0459, + "step": 6302 + }, + { + "epoch": 1.83, + "learning_rate": 2.0271541902849633e-05, + "loss": 3.1562, + "step": 6303 + }, + { + "epoch": 1.83, + "learning_rate": 2.0205475034770604e-05, + "loss": 3.1506, + "step": 6304 + }, + { + "epoch": 1.83, + "learning_rate": 2.013951378133455e-05, + "loss": 3.2096, + "step": 6305 + }, + { + "epoch": 1.83, + "learning_rate": 2.0073658157061137e-05, + "loss": 3.1631, + "step": 6306 + }, + { + "epoch": 1.83, + "learning_rate": 2.0007908176446877e-05, + "loss": 3.1004, + "step": 6307 + }, + { + "epoch": 1.83, + "learning_rate": 1.9942263853964915e-05, + "loss": 3.1105, + "step": 6308 + }, + { + "epoch": 1.83, + "learning_rate": 1.987672520406525e-05, + "loss": 3.1035, + "step": 6309 + }, + { + "epoch": 1.83, + "learning_rate": 1.9811292241174562e-05, + "loss": 3.1308, + "step": 6310 + }, + { + "epoch": 1.83, + "learning_rate": 1.974596497969622e-05, + "loss": 3.1432, + "step": 6311 + }, + { + "epoch": 1.83, + "learning_rate": 1.9680743434010386e-05, + "loss": 3.1862, + "step": 6312 + }, + { + "epoch": 1.83, + "learning_rate": 1.9615627618473964e-05, + "loss": 3.1164, + "step": 6313 + }, + { + "epoch": 1.83, + "learning_rate": 1.955061754742049e-05, + "loss": 3.0971, + "step": 6314 + }, + { + "epoch": 1.83, + "learning_rate": 1.9485713235160295e-05, + "loss": 3.1791, + "step": 6315 + }, + { + "epoch": 1.83, + "learning_rate": 1.9420914695980563e-05, + "loss": 3.0816, + "step": 6316 + }, + { + "epoch": 1.83, + "learning_rate": 1.9356221944144824e-05, + "loss": 3.0991, + "step": 6317 + }, + { + "epoch": 1.83, + "learning_rate": 1.9291634993893803e-05, + "loss": 3.1261, + "step": 6318 + }, + { + "epoch": 1.83, + "learning_rate": 1.9227153859444456e-05, + "loss": 3.0616, + "step": 6319 + }, + { + "epoch": 1.83, + "learning_rate": 1.9162778554990822e-05, + "loss": 3.1082, + "step": 6320 + }, + { + "epoch": 1.83, + "learning_rate": 1.9098509094703452e-05, + "loss": 3.1024, + "step": 6321 + }, + { + "epoch": 1.83, + "learning_rate": 1.90343454927297e-05, + "loss": 3.0202, + "step": 6322 + }, + { + "epoch": 1.83, + "learning_rate": 1.8970287763193428e-05, + "loss": 3.0735, + "step": 6323 + }, + { + "epoch": 1.83, + "learning_rate": 1.8906335920195416e-05, + "loss": 3.1481, + "step": 6324 + }, + { + "epoch": 1.83, + "learning_rate": 1.884248997781307e-05, + "loss": 3.1614, + "step": 6325 + }, + { + "epoch": 1.83, + "learning_rate": 1.877874995010037e-05, + "loss": 2.9909, + "step": 6326 + }, + { + "epoch": 1.83, + "learning_rate": 1.871511585108826e-05, + "loss": 3.1483, + "step": 6327 + }, + { + "epoch": 1.83, + "learning_rate": 1.8651587694783922e-05, + "loss": 3.1272, + "step": 6328 + }, + { + "epoch": 1.83, + "learning_rate": 1.8588165495171673e-05, + "loss": 3.0809, + "step": 6329 + }, + { + "epoch": 1.83, + "learning_rate": 1.852484926621223e-05, + "loss": 3.0576, + "step": 6330 + }, + { + "epoch": 1.83, + "learning_rate": 1.8461639021843058e-05, + "loss": 3.0726, + "step": 6331 + }, + { + "epoch": 1.83, + "learning_rate": 1.8398534775978304e-05, + "loss": 3.1004, + "step": 6332 + }, + { + "epoch": 1.83, + "learning_rate": 1.8335536542508746e-05, + "loss": 3.1923, + "step": 6333 + }, + { + "epoch": 1.83, + "learning_rate": 1.827264433530196e-05, + "loss": 3.1047, + "step": 6334 + }, + { + "epoch": 1.83, + "learning_rate": 1.820985816820192e-05, + "loss": 3.2106, + "step": 6335 + }, + { + "epoch": 1.83, + "learning_rate": 1.8147178055029577e-05, + "loss": 3.0289, + "step": 6336 + }, + { + "epoch": 1.84, + "learning_rate": 1.8084604009582227e-05, + "loss": 3.0494, + "step": 6337 + }, + { + "epoch": 1.84, + "learning_rate": 1.802213604563413e-05, + "loss": 3.1464, + "step": 6338 + }, + { + "epoch": 1.84, + "learning_rate": 1.7959774176935837e-05, + "loss": 3.1448, + "step": 6339 + }, + { + "epoch": 1.84, + "learning_rate": 1.7897518417214876e-05, + "loss": 3.0576, + "step": 6340 + }, + { + "epoch": 1.84, + "learning_rate": 1.7835368780175164e-05, + "loss": 3.146, + "step": 6341 + }, + { + "epoch": 1.84, + "learning_rate": 1.7773325279497487e-05, + "loss": 3.1263, + "step": 6342 + }, + { + "epoch": 1.84, + "learning_rate": 1.771138792883914e-05, + "loss": 3.1386, + "step": 6343 + }, + { + "epoch": 1.84, + "learning_rate": 1.7649556741833994e-05, + "loss": 3.1723, + "step": 6344 + }, + { + "epoch": 1.84, + "learning_rate": 1.7587831732092717e-05, + "loss": 3.3039, + "step": 6345 + }, + { + "epoch": 1.84, + "learning_rate": 1.7526212913202444e-05, + "loss": 3.1389, + "step": 6346 + }, + { + "epoch": 1.84, + "learning_rate": 1.7464700298727042e-05, + "loss": 3.1866, + "step": 6347 + }, + { + "epoch": 1.84, + "learning_rate": 1.740329390220685e-05, + "loss": 3.173, + "step": 6348 + }, + { + "epoch": 1.84, + "learning_rate": 1.734199373715911e-05, + "loss": 3.0617, + "step": 6349 + }, + { + "epoch": 1.84, + "learning_rate": 1.7280799817077362e-05, + "loss": 3.067, + "step": 6350 + }, + { + "epoch": 1.84, + "learning_rate": 1.7219712155431945e-05, + "loss": 2.9978, + "step": 6351 + }, + { + "epoch": 1.84, + "learning_rate": 1.7158730765669816e-05, + "loss": 3.0746, + "step": 6352 + }, + { + "epoch": 1.84, + "learning_rate": 1.7097855661214357e-05, + "loss": 3.005, + "step": 6353 + }, + { + "epoch": 1.84, + "learning_rate": 1.70370868554659e-05, + "loss": 3.0737, + "step": 6354 + }, + { + "epoch": 1.84, + "learning_rate": 1.6976424361800912e-05, + "loss": 3.0536, + "step": 6355 + }, + { + "epoch": 1.84, + "learning_rate": 1.6915868193572935e-05, + "loss": 3.0977, + "step": 6356 + }, + { + "epoch": 1.84, + "learning_rate": 1.6855418364111695e-05, + "loss": 3.182, + "step": 6357 + }, + { + "epoch": 1.84, + "learning_rate": 1.6795074886723827e-05, + "loss": 3.1073, + "step": 6358 + }, + { + "epoch": 1.84, + "learning_rate": 1.673483777469237e-05, + "loss": 3.1384, + "step": 6359 + }, + { + "epoch": 1.84, + "learning_rate": 1.6674707041276936e-05, + "loss": 3.1393, + "step": 6360 + }, + { + "epoch": 1.84, + "learning_rate": 1.6614682699713944e-05, + "loss": 3.2779, + "step": 6361 + }, + { + "epoch": 1.84, + "learning_rate": 1.6554764763216046e-05, + "loss": 3.1476, + "step": 6362 + }, + { + "epoch": 1.84, + "learning_rate": 1.6494953244972854e-05, + "loss": 3.0892, + "step": 6363 + }, + { + "epoch": 1.84, + "learning_rate": 1.643524815815023e-05, + "loss": 3.1203, + "step": 6364 + }, + { + "epoch": 1.84, + "learning_rate": 1.637564951589082e-05, + "loss": 3.0482, + "step": 6365 + }, + { + "epoch": 1.84, + "learning_rate": 1.631615733131364e-05, + "loss": 3.1882, + "step": 6366 + }, + { + "epoch": 1.84, + "learning_rate": 1.6256771617514487e-05, + "loss": 3.0241, + "step": 6367 + }, + { + "epoch": 1.84, + "learning_rate": 1.6197492387565627e-05, + "loss": 3.0305, + "step": 6368 + }, + { + "epoch": 1.84, + "learning_rate": 1.6138319654515897e-05, + "loss": 2.9972, + "step": 6369 + }, + { + "epoch": 1.84, + "learning_rate": 1.607925343139066e-05, + "loss": 3.161, + "step": 6370 + }, + { + "epoch": 1.85, + "learning_rate": 1.6020293731191794e-05, + "loss": 3.1261, + "step": 6371 + }, + { + "epoch": 1.85, + "learning_rate": 1.596144056689791e-05, + "loss": 3.0534, + "step": 6372 + }, + { + "epoch": 1.85, + "learning_rate": 1.5902693951463877e-05, + "loss": 3.0869, + "step": 6373 + }, + { + "epoch": 1.85, + "learning_rate": 1.5844053897821454e-05, + "loss": 2.9584, + "step": 6374 + }, + { + "epoch": 1.85, + "learning_rate": 1.5785520418878597e-05, + "loss": 3.1653, + "step": 6375 + }, + { + "epoch": 1.85, + "learning_rate": 1.572709352752011e-05, + "loss": 3.0914, + "step": 6376 + }, + { + "epoch": 1.85, + "learning_rate": 1.566877323660709e-05, + "loss": 3.119, + "step": 6377 + }, + { + "epoch": 1.85, + "learning_rate": 1.5610559558977446e-05, + "loss": 3.213, + "step": 6378 + }, + { + "epoch": 1.85, + "learning_rate": 1.55524525074453e-05, + "loss": 3.121, + "step": 6379 + }, + { + "epoch": 1.85, + "learning_rate": 1.5494452094801494e-05, + "loss": 3.0992, + "step": 6380 + }, + { + "epoch": 1.85, + "learning_rate": 1.5436558333813356e-05, + "loss": 3.1636, + "step": 6381 + }, + { + "epoch": 1.85, + "learning_rate": 1.537877123722464e-05, + "loss": 3.1216, + "step": 6382 + }, + { + "epoch": 1.85, + "learning_rate": 1.53210908177559e-05, + "loss": 3.1121, + "step": 6383 + }, + { + "epoch": 1.85, + "learning_rate": 1.526351708810386e-05, + "loss": 3.0118, + "step": 6384 + }, + { + "epoch": 1.85, + "learning_rate": 1.5206050060942001e-05, + "loss": 3.245, + "step": 6385 + }, + { + "epoch": 1.85, + "learning_rate": 1.5148689748920197e-05, + "loss": 3.0742, + "step": 6386 + }, + { + "epoch": 1.85, + "learning_rate": 1.5091436164664963e-05, + "loss": 3.1396, + "step": 6387 + }, + { + "epoch": 1.85, + "learning_rate": 1.503428932077916e-05, + "loss": 3.1392, + "step": 6388 + }, + { + "epoch": 1.85, + "learning_rate": 1.4977249229842172e-05, + "loss": 3.0874, + "step": 6389 + }, + { + "epoch": 1.85, + "learning_rate": 1.4920315904410064e-05, + "loss": 3.083, + "step": 6390 + }, + { + "epoch": 1.85, + "learning_rate": 1.4863489357015093e-05, + "loss": 3.099, + "step": 6391 + }, + { + "epoch": 1.85, + "learning_rate": 1.4806769600166359e-05, + "loss": 3.119, + "step": 6392 + }, + { + "epoch": 1.85, + "learning_rate": 1.4750156646349156e-05, + "loss": 3.1154, + "step": 6393 + }, + { + "epoch": 1.85, + "learning_rate": 1.4693650508025513e-05, + "loss": 3.1144, + "step": 6394 + }, + { + "epoch": 1.85, + "learning_rate": 1.4637251197633705e-05, + "loss": 3.168, + "step": 6395 + }, + { + "epoch": 1.85, + "learning_rate": 1.4580958727588745e-05, + "loss": 3.0573, + "step": 6396 + }, + { + "epoch": 1.85, + "learning_rate": 1.4524773110281885e-05, + "loss": 3.1149, + "step": 6397 + }, + { + "epoch": 1.85, + "learning_rate": 1.4468694358081013e-05, + "loss": 3.0852, + "step": 6398 + }, + { + "epoch": 1.85, + "learning_rate": 1.4412722483330532e-05, + "loss": 3.2055, + "step": 6399 + }, + { + "epoch": 1.85, + "learning_rate": 1.4356857498351029e-05, + "loss": 3.0752, + "step": 6400 + }, + { + "epoch": 1.85, + "learning_rate": 1.430109941544e-05, + "loss": 3.1552, + "step": 6401 + }, + { + "epoch": 1.85, + "learning_rate": 1.4245448246871017e-05, + "loss": 3.242, + "step": 6402 + }, + { + "epoch": 1.85, + "learning_rate": 1.4189904004894338e-05, + "loss": 3.1432, + "step": 6403 + }, + { + "epoch": 1.85, + "learning_rate": 1.413446670173657e-05, + "loss": 3.1599, + "step": 6404 + }, + { + "epoch": 1.85, + "learning_rate": 1.4079136349601008e-05, + "loss": 3.2014, + "step": 6405 + }, + { + "epoch": 1.86, + "learning_rate": 1.4023912960667017e-05, + "loss": 3.1798, + "step": 6406 + }, + { + "epoch": 1.86, + "learning_rate": 1.3968796547090767e-05, + "loss": 3.1114, + "step": 6407 + }, + { + "epoch": 1.86, + "learning_rate": 1.3913787121004718e-05, + "loss": 3.1716, + "step": 6408 + }, + { + "epoch": 1.86, + "learning_rate": 1.3858884694517736e-05, + "loss": 3.2266, + "step": 6409 + }, + { + "epoch": 1.86, + "learning_rate": 1.3804089279715326e-05, + "loss": 3.1591, + "step": 6410 + }, + { + "epoch": 1.86, + "learning_rate": 1.374940088865917e-05, + "loss": 3.1218, + "step": 6411 + }, + { + "epoch": 1.86, + "learning_rate": 1.3694819533387636e-05, + "loss": 3.1426, + "step": 6412 + }, + { + "epoch": 1.86, + "learning_rate": 1.364034522591534e-05, + "loss": 3.0111, + "step": 6413 + }, + { + "epoch": 1.86, + "learning_rate": 1.3585977978233577e-05, + "loss": 3.0267, + "step": 6414 + }, + { + "epoch": 1.86, + "learning_rate": 1.3531717802309829e-05, + "loss": 3.1814, + "step": 6415 + }, + { + "epoch": 1.86, + "learning_rate": 1.3477564710088097e-05, + "loss": 3.2315, + "step": 6416 + }, + { + "epoch": 1.86, + "learning_rate": 1.3423518713488847e-05, + "loss": 3.1598, + "step": 6417 + }, + { + "epoch": 1.86, + "learning_rate": 1.336957982440884e-05, + "loss": 3.0794, + "step": 6418 + }, + { + "epoch": 1.86, + "learning_rate": 1.3315748054721466e-05, + "loss": 3.2218, + "step": 6419 + }, + { + "epoch": 1.86, + "learning_rate": 1.3262023416276414e-05, + "loss": 3.0482, + "step": 6420 + }, + { + "epoch": 1.86, + "learning_rate": 1.3208405920899835e-05, + "loss": 3.0691, + "step": 6421 + }, + { + "epoch": 1.86, + "learning_rate": 1.3154895580394177e-05, + "loss": 3.224, + "step": 6422 + }, + { + "epoch": 1.86, + "learning_rate": 1.3101492406538518e-05, + "loss": 3.112, + "step": 6423 + }, + { + "epoch": 1.86, + "learning_rate": 1.3048196411088009e-05, + "loss": 3.0941, + "step": 6424 + }, + { + "epoch": 1.86, + "learning_rate": 1.2995007605774656e-05, + "loss": 3.1237, + "step": 6425 + }, + { + "epoch": 1.86, + "learning_rate": 1.2941926002306537e-05, + "loss": 3.0528, + "step": 6426 + }, + { + "epoch": 1.86, + "learning_rate": 1.2888951612368082e-05, + "loss": 3.0388, + "step": 6427 + }, + { + "epoch": 1.86, + "learning_rate": 1.2836084447620467e-05, + "loss": 3.0796, + "step": 6428 + }, + { + "epoch": 1.86, + "learning_rate": 1.2783324519700935e-05, + "loss": 3.0084, + "step": 6429 + }, + { + "epoch": 1.86, + "learning_rate": 1.2730671840223362e-05, + "loss": 3.0655, + "step": 6430 + }, + { + "epoch": 1.86, + "learning_rate": 1.2678126420777814e-05, + "loss": 3.044, + "step": 6431 + }, + { + "epoch": 1.86, + "learning_rate": 1.2625688272930925e-05, + "loss": 3.0145, + "step": 6432 + }, + { + "epoch": 1.86, + "learning_rate": 1.2573357408225516e-05, + "loss": 3.1551, + "step": 6433 + }, + { + "epoch": 1.86, + "learning_rate": 1.2521133838181043e-05, + "loss": 3.2641, + "step": 6434 + }, + { + "epoch": 1.86, + "learning_rate": 1.2469017574293085e-05, + "loss": 3.1316, + "step": 6435 + }, + { + "epoch": 1.86, + "learning_rate": 1.2417008628033688e-05, + "loss": 3.2041, + "step": 6436 + }, + { + "epoch": 1.86, + "learning_rate": 1.2365107010851417e-05, + "loss": 3.115, + "step": 6437 + }, + { + "epoch": 1.86, + "learning_rate": 1.2313312734171078e-05, + "loss": 2.9428, + "step": 6438 + }, + { + "epoch": 1.86, + "learning_rate": 1.2261625809393884e-05, + "loss": 3.0693, + "step": 6439 + }, + { + "epoch": 1.87, + "learning_rate": 1.2210046247897344e-05, + "loss": 3.0712, + "step": 6440 + }, + { + "epoch": 1.87, + "learning_rate": 1.2158574061035432e-05, + "loss": 3.0785, + "step": 6441 + }, + { + "epoch": 1.87, + "learning_rate": 1.2107209260138475e-05, + "loss": 3.0498, + "step": 6442 + }, + { + "epoch": 1.87, + "learning_rate": 1.2055951856513147e-05, + "loss": 3.1652, + "step": 6443 + }, + { + "epoch": 1.87, + "learning_rate": 1.2004801861442372e-05, + "loss": 3.0856, + "step": 6444 + }, + { + "epoch": 1.87, + "learning_rate": 1.1953759286185584e-05, + "loss": 3.1266, + "step": 6445 + }, + { + "epoch": 1.87, + "learning_rate": 1.1902824141978575e-05, + "loss": 3.0183, + "step": 6446 + }, + { + "epoch": 1.87, + "learning_rate": 1.185199644003332e-05, + "loss": 3.1794, + "step": 6447 + }, + { + "epoch": 1.87, + "learning_rate": 1.1801276191538369e-05, + "loss": 3.1448, + "step": 6448 + }, + { + "epoch": 1.87, + "learning_rate": 1.1750663407658402e-05, + "loss": 3.0938, + "step": 6449 + }, + { + "epoch": 1.87, + "learning_rate": 1.1700158099534618e-05, + "loss": 3.172, + "step": 6450 + }, + { + "epoch": 1.87, + "learning_rate": 1.164976027828446e-05, + "loss": 3.1913, + "step": 6451 + }, + { + "epoch": 1.87, + "learning_rate": 1.1599469955001718e-05, + "loss": 3.0513, + "step": 6452 + }, + { + "epoch": 1.87, + "learning_rate": 1.1549287140756592e-05, + "loss": 3.1635, + "step": 6453 + }, + { + "epoch": 1.87, + "learning_rate": 1.149921184659547e-05, + "loss": 3.0089, + "step": 6454 + }, + { + "epoch": 1.87, + "learning_rate": 1.1449244083541199e-05, + "loss": 3.1717, + "step": 6455 + }, + { + "epoch": 1.87, + "learning_rate": 1.1399383862592927e-05, + "loss": 3.2397, + "step": 6456 + }, + { + "epoch": 1.87, + "learning_rate": 1.1349631194726151e-05, + "loss": 3.073, + "step": 6457 + }, + { + "epoch": 1.87, + "learning_rate": 1.129998609089261e-05, + "loss": 3.0997, + "step": 6458 + }, + { + "epoch": 1.87, + "learning_rate": 1.125044856202051e-05, + "loss": 3.1539, + "step": 6459 + }, + { + "epoch": 1.87, + "learning_rate": 1.1201018619014181e-05, + "loss": 3.1912, + "step": 6460 + }, + { + "epoch": 1.87, + "learning_rate": 1.1151696272754475e-05, + "loss": 3.1343, + "step": 6461 + }, + { + "epoch": 1.87, + "learning_rate": 1.1102481534098374e-05, + "loss": 3.1613, + "step": 6462 + }, + { + "epoch": 1.87, + "learning_rate": 1.1053374413879269e-05, + "loss": 3.1971, + "step": 6463 + }, + { + "epoch": 1.87, + "learning_rate": 1.1004374922906847e-05, + "loss": 3.2249, + "step": 6464 + }, + { + "epoch": 1.87, + "learning_rate": 1.0955483071967144e-05, + "loss": 3.2099, + "step": 6465 + }, + { + "epoch": 1.87, + "learning_rate": 1.0906698871822552e-05, + "loss": 3.1964, + "step": 6466 + }, + { + "epoch": 1.87, + "learning_rate": 1.0858022333211481e-05, + "loss": 3.04, + "step": 6467 + }, + { + "epoch": 1.87, + "learning_rate": 1.0809453466849028e-05, + "loss": 3.1474, + "step": 6468 + }, + { + "epoch": 1.87, + "learning_rate": 1.0760992283426252e-05, + "loss": 3.1436, + "step": 6469 + }, + { + "epoch": 1.87, + "learning_rate": 1.0712638793610785e-05, + "loss": 3.1062, + "step": 6470 + }, + { + "epoch": 1.87, + "learning_rate": 1.0664393008046281e-05, + "loss": 3.1872, + "step": 6471 + }, + { + "epoch": 1.87, + "learning_rate": 1.0616254937352964e-05, + "loss": 2.9585, + "step": 6472 + }, + { + "epoch": 1.87, + "learning_rate": 1.0568224592127197e-05, + "loss": 3.2496, + "step": 6473 + }, + { + "epoch": 1.87, + "learning_rate": 1.0520301982941572e-05, + "loss": 3.164, + "step": 6474 + }, + { + "epoch": 1.88, + "learning_rate": 1.0472487120345097e-05, + "loss": 3.2019, + "step": 6475 + }, + { + "epoch": 1.88, + "learning_rate": 1.0424780014863022e-05, + "loss": 3.1041, + "step": 6476 + }, + { + "epoch": 1.88, + "learning_rate": 1.0377180676996834e-05, + "loss": 3.1425, + "step": 6477 + }, + { + "epoch": 1.88, + "learning_rate": 1.0329689117224261e-05, + "loss": 3.0253, + "step": 6478 + }, + { + "epoch": 1.88, + "learning_rate": 1.0282305345999498e-05, + "loss": 3.1091, + "step": 6479 + }, + { + "epoch": 1.88, + "learning_rate": 1.0235029373752758e-05, + "loss": 3.1686, + "step": 6480 + }, + { + "epoch": 1.88, + "learning_rate": 1.0187861210890714e-05, + "loss": 3.1576, + "step": 6481 + }, + { + "epoch": 1.88, + "learning_rate": 1.014080086779634e-05, + "loss": 3.1219, + "step": 6482 + }, + { + "epoch": 1.88, + "learning_rate": 1.0093848354828572e-05, + "loss": 3.0322, + "step": 6483 + }, + { + "epoch": 1.88, + "learning_rate": 1.004700368232303e-05, + "loss": 2.9939, + "step": 6484 + }, + { + "epoch": 1.88, + "learning_rate": 1.0000266860591245e-05, + "loss": 3.0838, + "step": 6485 + }, + { + "epoch": 1.88, + "learning_rate": 9.953637899921209e-06, + "loss": 3.058, + "step": 6486 + }, + { + "epoch": 1.88, + "learning_rate": 9.907116810577043e-06, + "loss": 3.1716, + "step": 6487 + }, + { + "epoch": 1.88, + "learning_rate": 9.860703602799281e-06, + "loss": 3.1987, + "step": 6488 + }, + { + "epoch": 1.88, + "learning_rate": 9.814398286804582e-06, + "loss": 3.1385, + "step": 6489 + }, + { + "epoch": 1.88, + "learning_rate": 9.768200872785848e-06, + "loss": 3.2481, + "step": 6490 + }, + { + "epoch": 1.88, + "learning_rate": 9.722111370912334e-06, + "loss": 3.0922, + "step": 6491 + }, + { + "epoch": 1.88, + "learning_rate": 9.67612979132948e-06, + "loss": 3.0689, + "step": 6492 + }, + { + "epoch": 1.88, + "learning_rate": 9.63025614415891e-06, + "loss": 3.1911, + "step": 6493 + }, + { + "epoch": 1.88, + "learning_rate": 9.584490439498605e-06, + "loss": 3.1391, + "step": 6494 + }, + { + "epoch": 1.88, + "learning_rate": 9.538832687422728e-06, + "loss": 3.165, + "step": 6495 + }, + { + "epoch": 1.88, + "learning_rate": 9.49328289798157e-06, + "loss": 3.1902, + "step": 6496 + }, + { + "epoch": 1.88, + "learning_rate": 9.447841081201947e-06, + "loss": 3.105, + "step": 6497 + }, + { + "epoch": 1.88, + "learning_rate": 9.402507247086578e-06, + "loss": 3.1774, + "step": 6498 + }, + { + "epoch": 1.88, + "learning_rate": 9.357281405614648e-06, + "loss": 3.2452, + "step": 6499 + }, + { + "epoch": 1.88, + "learning_rate": 9.312163566741416e-06, + "loss": 3.134, + "step": 6500 + }, + { + "epoch": 1.88, + "learning_rate": 9.267153740398494e-06, + "loss": 3.061, + "step": 6501 + }, + { + "epoch": 1.88, + "learning_rate": 9.222251936493674e-06, + "loss": 3.1878, + "step": 6502 + }, + { + "epoch": 1.88, + "learning_rate": 9.17745816491089e-06, + "loss": 3.1199, + "step": 6503 + }, + { + "epoch": 1.88, + "learning_rate": 9.132772435510362e-06, + "loss": 3.0505, + "step": 6504 + }, + { + "epoch": 1.88, + "learning_rate": 9.088194758128555e-06, + "loss": 3.136, + "step": 6505 + }, + { + "epoch": 1.88, + "learning_rate": 9.043725142578118e-06, + "loss": 3.0947, + "step": 6506 + }, + { + "epoch": 1.88, + "learning_rate": 8.999363598647892e-06, + "loss": 3.1023, + "step": 6507 + }, + { + "epoch": 1.88, + "learning_rate": 8.95511013610295e-06, + "loss": 3.1275, + "step": 6508 + }, + { + "epoch": 1.89, + "learning_rate": 8.91096476468467e-06, + "loss": 3.2256, + "step": 6509 + }, + { + "epoch": 1.89, + "learning_rate": 8.866927494110388e-06, + "loss": 3.1785, + "step": 6510 + }, + { + "epoch": 1.89, + "learning_rate": 8.822998334073905e-06, + "loss": 3.0855, + "step": 6511 + }, + { + "epoch": 1.89, + "learning_rate": 8.779177294245044e-06, + "loss": 3.127, + "step": 6512 + }, + { + "epoch": 1.89, + "learning_rate": 8.73546438426992e-06, + "loss": 3.1186, + "step": 6513 + }, + { + "epoch": 1.89, + "learning_rate": 8.691859613770836e-06, + "loss": 3.0505, + "step": 6514 + }, + { + "epoch": 1.89, + "learning_rate": 8.648362992346336e-06, + "loss": 3.1687, + "step": 6515 + }, + { + "epoch": 1.89, + "learning_rate": 8.604974529571042e-06, + "loss": 3.1518, + "step": 6516 + }, + { + "epoch": 1.89, + "learning_rate": 8.561694234995754e-06, + "loss": 3.1206, + "step": 6517 + }, + { + "epoch": 1.89, + "learning_rate": 8.518522118147742e-06, + "loss": 3.227, + "step": 6518 + }, + { + "epoch": 1.89, + "learning_rate": 8.475458188530016e-06, + "loss": 3.1349, + "step": 6519 + }, + { + "epoch": 1.89, + "learning_rate": 8.432502455622215e-06, + "loss": 3.1655, + "step": 6520 + }, + { + "epoch": 1.89, + "learning_rate": 8.389654928879831e-06, + "loss": 3.1374, + "step": 6521 + }, + { + "epoch": 1.89, + "learning_rate": 8.346915617734707e-06, + "loss": 3.1228, + "step": 6522 + }, + { + "epoch": 1.89, + "learning_rate": 8.304284531594819e-06, + "loss": 3.1044, + "step": 6523 + }, + { + "epoch": 1.89, + "learning_rate": 8.261761679844327e-06, + "loss": 3.1756, + "step": 6524 + }, + { + "epoch": 1.89, + "learning_rate": 8.21934707184352e-06, + "loss": 3.2662, + "step": 6525 + }, + { + "epoch": 1.89, + "learning_rate": 8.177040716928986e-06, + "loss": 3.1271, + "step": 6526 + }, + { + "epoch": 1.89, + "learning_rate": 8.134842624413385e-06, + "loss": 3.1252, + "step": 6527 + }, + { + "epoch": 1.89, + "learning_rate": 8.092752803585512e-06, + "loss": 3.1733, + "step": 6528 + }, + { + "epoch": 1.89, + "learning_rate": 8.0507712637104e-06, + "loss": 3.1694, + "step": 6529 + }, + { + "epoch": 1.89, + "learning_rate": 8.008898014029209e-06, + "loss": 3.1329, + "step": 6530 + }, + { + "epoch": 1.89, + "learning_rate": 7.967133063759291e-06, + "loss": 3.2065, + "step": 6531 + }, + { + "epoch": 1.89, + "learning_rate": 7.925476422094124e-06, + "loss": 3.2147, + "step": 6532 + }, + { + "epoch": 1.89, + "learning_rate": 7.883928098203374e-06, + "loss": 3.2289, + "step": 6533 + }, + { + "epoch": 1.89, + "learning_rate": 7.842488101232892e-06, + "loss": 3.1734, + "step": 6534 + }, + { + "epoch": 1.89, + "learning_rate": 7.801156440304657e-06, + "loss": 3.1445, + "step": 6535 + }, + { + "epoch": 1.89, + "learning_rate": 7.759933124516727e-06, + "loss": 3.0373, + "step": 6536 + }, + { + "epoch": 1.89, + "learning_rate": 7.718818162943397e-06, + "loss": 3.1655, + "step": 6537 + }, + { + "epoch": 1.89, + "learning_rate": 7.67781156463515e-06, + "loss": 3.1298, + "step": 6538 + }, + { + "epoch": 1.89, + "learning_rate": 7.636913338618379e-06, + "loss": 3.151, + "step": 6539 + }, + { + "epoch": 1.89, + "learning_rate": 7.59612349389599e-06, + "loss": 3.1662, + "step": 6540 + }, + { + "epoch": 1.89, + "learning_rate": 7.5554420394467475e-06, + "loss": 3.1336, + "step": 6541 + }, + { + "epoch": 1.89, + "learning_rate": 7.514868984225598e-06, + "loss": 3.1456, + "step": 6542 + }, + { + "epoch": 1.89, + "learning_rate": 7.474404337163731e-06, + "loss": 3.1809, + "step": 6543 + }, + { + "epoch": 1.9, + "learning_rate": 7.434048107168523e-06, + "loss": 3.07, + "step": 6544 + }, + { + "epoch": 1.9, + "learning_rate": 7.3938003031231994e-06, + "loss": 3.0673, + "step": 6545 + }, + { + "epoch": 1.9, + "learning_rate": 7.353660933887396e-06, + "loss": 3.3123, + "step": 6546 + }, + { + "epoch": 1.9, + "learning_rate": 7.3136300082967124e-06, + "loss": 3.0501, + "step": 6547 + }, + { + "epoch": 1.9, + "learning_rate": 7.273707535162988e-06, + "loss": 3.1938, + "step": 6548 + }, + { + "epoch": 1.9, + "learning_rate": 7.233893523274193e-06, + "loss": 3.1424, + "step": 6549 + }, + { + "epoch": 1.9, + "learning_rate": 7.1941879813943176e-06, + "loss": 3.1305, + "step": 6550 + }, + { + "epoch": 1.9, + "learning_rate": 7.154590918263482e-06, + "loss": 3.2114, + "step": 6551 + }, + { + "epoch": 1.9, + "learning_rate": 7.115102342598101e-06, + "loss": 2.985, + "step": 6552 + }, + { + "epoch": 1.9, + "learning_rate": 7.075722263090556e-06, + "loss": 2.9967, + "step": 6553 + }, + { + "epoch": 1.9, + "learning_rate": 7.036450688409302e-06, + "loss": 3.2987, + "step": 6554 + }, + { + "epoch": 1.9, + "learning_rate": 6.997287627199034e-06, + "loss": 3.1176, + "step": 6555 + }, + { + "epoch": 1.9, + "learning_rate": 6.9582330880805235e-06, + "loss": 2.9769, + "step": 6556 + }, + { + "epoch": 1.9, + "learning_rate": 6.9192870796506155e-06, + "loss": 3.0675, + "step": 6557 + }, + { + "epoch": 1.9, + "learning_rate": 6.8804496104823425e-06, + "loss": 3.1121, + "step": 6558 + }, + { + "epoch": 1.9, + "learning_rate": 6.841720689124698e-06, + "loss": 2.953, + "step": 6559 + }, + { + "epoch": 1.9, + "learning_rate": 6.803100324102918e-06, + "loss": 3.047, + "step": 6560 + }, + { + "epoch": 1.9, + "learning_rate": 6.764588523918314e-06, + "loss": 3.0858, + "step": 6561 + }, + { + "epoch": 1.9, + "learning_rate": 6.726185297048326e-06, + "loss": 3.0979, + "step": 6562 + }, + { + "epoch": 1.9, + "learning_rate": 6.68789065194636e-06, + "loss": 3.1084, + "step": 6563 + }, + { + "epoch": 1.9, + "learning_rate": 6.649704597042061e-06, + "loss": 3.1673, + "step": 6564 + }, + { + "epoch": 1.9, + "learning_rate": 6.611627140741206e-06, + "loss": 3.0525, + "step": 6565 + }, + { + "epoch": 1.9, + "learning_rate": 6.573658291425421e-06, + "loss": 3.1141, + "step": 6566 + }, + { + "epoch": 1.9, + "learning_rate": 6.535798057452691e-06, + "loss": 3.0109, + "step": 6567 + }, + { + "epoch": 1.9, + "learning_rate": 6.498046447156958e-06, + "loss": 3.0086, + "step": 6568 + }, + { + "epoch": 1.9, + "learning_rate": 6.460403468848353e-06, + "loss": 3.152, + "step": 6569 + }, + { + "epoch": 1.9, + "learning_rate": 6.422869130812914e-06, + "loss": 3.0007, + "step": 6570 + }, + { + "epoch": 1.9, + "learning_rate": 6.385443441312977e-06, + "loss": 3.0666, + "step": 6571 + }, + { + "epoch": 1.9, + "learning_rate": 6.348126408586841e-06, + "loss": 3.1098, + "step": 6572 + }, + { + "epoch": 1.9, + "learning_rate": 6.310918040848823e-06, + "loss": 3.2256, + "step": 6573 + }, + { + "epoch": 1.9, + "learning_rate": 6.273818346289539e-06, + "loss": 3.0961, + "step": 6574 + }, + { + "epoch": 1.9, + "learning_rate": 6.236827333075401e-06, + "loss": 3.0497, + "step": 6575 + }, + { + "epoch": 1.9, + "learning_rate": 6.199945009349173e-06, + "loss": 3.0716, + "step": 6576 + }, + { + "epoch": 1.9, + "learning_rate": 6.163171383229527e-06, + "loss": 3.1237, + "step": 6577 + }, + { + "epoch": 1.91, + "learning_rate": 6.12650646281121e-06, + "loss": 3.0781, + "step": 6578 + }, + { + "epoch": 1.91, + "learning_rate": 6.0899502561651554e-06, + "loss": 3.1141, + "step": 6579 + }, + { + "epoch": 1.91, + "learning_rate": 6.053502771338204e-06, + "loss": 3.161, + "step": 6580 + }, + { + "epoch": 1.91, + "learning_rate": 6.017164016353438e-06, + "loss": 3.1361, + "step": 6581 + }, + { + "epoch": 1.91, + "learning_rate": 5.980933999209792e-06, + "loss": 3.1931, + "step": 6582 + }, + { + "epoch": 1.91, + "learning_rate": 5.9448127278824986e-06, + "loss": 3.1015, + "step": 6583 + }, + { + "epoch": 1.91, + "learning_rate": 5.908800210322696e-06, + "loss": 3.1451, + "step": 6584 + }, + { + "epoch": 1.91, + "learning_rate": 5.872896454457655e-06, + "loss": 3.1517, + "step": 6585 + }, + { + "epoch": 1.91, + "learning_rate": 5.837101468190609e-06, + "loss": 3.0217, + "step": 6586 + }, + { + "epoch": 1.91, + "learning_rate": 5.8014152594010324e-06, + "loss": 3.0352, + "step": 6587 + }, + { + "epoch": 1.91, + "learning_rate": 5.76583783594431e-06, + "loss": 3.0468, + "step": 6588 + }, + { + "epoch": 1.91, + "learning_rate": 5.730369205651842e-06, + "loss": 3.2401, + "step": 6589 + }, + { + "epoch": 1.91, + "learning_rate": 5.695009376331217e-06, + "loss": 3.0762, + "step": 6590 + }, + { + "epoch": 1.91, + "learning_rate": 5.659758355765987e-06, + "loss": 3.1973, + "step": 6591 + }, + { + "epoch": 1.91, + "learning_rate": 5.624616151715834e-06, + "loss": 3.0739, + "step": 6592 + }, + { + "epoch": 1.91, + "learning_rate": 5.589582771916291e-06, + "loss": 3.1586, + "step": 6593 + }, + { + "epoch": 1.91, + "learning_rate": 5.5546582240791345e-06, + "loss": 3.0276, + "step": 6594 + }, + { + "epoch": 1.91, + "learning_rate": 5.51984251589216e-06, + "loss": 3.2684, + "step": 6595 + }, + { + "epoch": 1.91, + "learning_rate": 5.48513565501918e-06, + "loss": 3.1695, + "step": 6596 + }, + { + "epoch": 1.91, + "learning_rate": 5.450537649099918e-06, + "loss": 3.2277, + "step": 6597 + }, + { + "epoch": 1.91, + "learning_rate": 5.416048505750393e-06, + "loss": 3.1205, + "step": 6598 + }, + { + "epoch": 1.91, + "learning_rate": 5.38166823256242e-06, + "loss": 3.0206, + "step": 6599 + }, + { + "epoch": 1.91, + "learning_rate": 5.347396837104057e-06, + "loss": 3.1656, + "step": 6600 + }, + { + "epoch": 1.91, + "learning_rate": 5.313234326919158e-06, + "loss": 3.0998, + "step": 6601 + }, + { + "epoch": 1.91, + "learning_rate": 5.279180709527765e-06, + "loss": 3.1064, + "step": 6602 + }, + { + "epoch": 1.91, + "learning_rate": 5.245235992425934e-06, + "loss": 3.1081, + "step": 6603 + }, + { + "epoch": 1.91, + "learning_rate": 5.211400183085746e-06, + "loss": 3.1318, + "step": 6604 + }, + { + "epoch": 1.91, + "learning_rate": 5.177673288955353e-06, + "loss": 3.0762, + "step": 6605 + }, + { + "epoch": 1.91, + "learning_rate": 5.1440553174588174e-06, + "loss": 3.1076, + "step": 6606 + }, + { + "epoch": 1.91, + "learning_rate": 5.110546275996275e-06, + "loss": 3.1163, + "step": 6607 + }, + { + "epoch": 1.91, + "learning_rate": 5.077146171943936e-06, + "loss": 3.1445, + "step": 6608 + }, + { + "epoch": 1.91, + "learning_rate": 5.0438550126539755e-06, + "loss": 3.1288, + "step": 6609 + }, + { + "epoch": 1.91, + "learning_rate": 5.010672805454586e-06, + "loss": 3.1066, + "step": 6610 + }, + { + "epoch": 1.91, + "learning_rate": 4.977599557649981e-06, + "loss": 3.004, + "step": 6611 + }, + { + "epoch": 1.91, + "learning_rate": 4.944635276520393e-06, + "loss": 3.1546, + "step": 6612 + }, + { + "epoch": 1.92, + "learning_rate": 4.911779969322127e-06, + "loss": 3.2847, + "step": 6613 + }, + { + "epoch": 1.92, + "learning_rate": 4.879033643287456e-06, + "loss": 3.2426, + "step": 6614 + }, + { + "epoch": 1.92, + "learning_rate": 4.846396305624612e-06, + "loss": 3.0965, + "step": 6615 + }, + { + "epoch": 1.92, + "learning_rate": 4.813867963517904e-06, + "loss": 3.1232, + "step": 6616 + }, + { + "epoch": 1.92, + "learning_rate": 4.7814486241276045e-06, + "loss": 3.0834, + "step": 6617 + }, + { + "epoch": 1.92, + "learning_rate": 4.749138294590005e-06, + "loss": 3.1261, + "step": 6618 + }, + { + "epoch": 1.92, + "learning_rate": 4.716936982017472e-06, + "loss": 3.0876, + "step": 6619 + }, + { + "epoch": 1.92, + "learning_rate": 4.684844693498225e-06, + "loss": 3.309, + "step": 6620 + }, + { + "epoch": 1.92, + "learning_rate": 4.652861436096556e-06, + "loss": 3.1672, + "step": 6621 + }, + { + "epoch": 1.92, + "learning_rate": 4.62098721685289e-06, + "loss": 3.1924, + "step": 6622 + }, + { + "epoch": 1.92, + "learning_rate": 4.589222042783447e-06, + "loss": 3.1482, + "step": 6623 + }, + { + "epoch": 1.92, + "learning_rate": 4.55756592088058e-06, + "loss": 3.2945, + "step": 6624 + }, + { + "epoch": 1.92, + "learning_rate": 4.526018858112546e-06, + "loss": 3.1823, + "step": 6625 + }, + { + "epoch": 1.92, + "learning_rate": 4.49458086142357e-06, + "loss": 3.1074, + "step": 6626 + }, + { + "epoch": 1.92, + "learning_rate": 4.463251937734059e-06, + "loss": 3.2325, + "step": 6627 + }, + { + "epoch": 1.92, + "learning_rate": 4.43203209394022e-06, + "loss": 3.0032, + "step": 6628 + }, + { + "epoch": 1.92, + "learning_rate": 4.400921336914276e-06, + "loss": 3.1205, + "step": 6629 + }, + { + "epoch": 1.92, + "learning_rate": 4.369919673504585e-06, + "loss": 3.0627, + "step": 6630 + }, + { + "epoch": 1.92, + "learning_rate": 4.339027110535298e-06, + "loss": 3.1729, + "step": 6631 + }, + { + "epoch": 1.92, + "learning_rate": 4.308243654806643e-06, + "loss": 3.0825, + "step": 6632 + }, + { + "epoch": 1.92, + "learning_rate": 4.277569313094809e-06, + "loss": 2.9559, + "step": 6633 + }, + { + "epoch": 1.92, + "learning_rate": 4.247004092152007e-06, + "loss": 3.1059, + "step": 6634 + }, + { + "epoch": 1.92, + "learning_rate": 4.216547998706355e-06, + "loss": 3.069, + "step": 6635 + }, + { + "epoch": 1.92, + "learning_rate": 4.186201039462045e-06, + "loss": 3.1254, + "step": 6636 + }, + { + "epoch": 1.92, + "learning_rate": 4.155963221099124e-06, + "loss": 3.0965, + "step": 6637 + }, + { + "epoch": 1.92, + "learning_rate": 4.125834550273766e-06, + "loss": 3.1988, + "step": 6638 + }, + { + "epoch": 1.92, + "learning_rate": 4.095815033618e-06, + "loss": 3.0437, + "step": 6639 + }, + { + "epoch": 1.92, + "learning_rate": 4.065904677739873e-06, + "loss": 3.048, + "step": 6640 + }, + { + "epoch": 1.92, + "learning_rate": 4.036103489223397e-06, + "loss": 3.1192, + "step": 6641 + }, + { + "epoch": 1.92, + "learning_rate": 4.006411474628491e-06, + "loss": 3.0545, + "step": 6642 + }, + { + "epoch": 1.92, + "learning_rate": 3.976828640491203e-06, + "loss": 3.0196, + "step": 6643 + }, + { + "epoch": 1.92, + "learning_rate": 3.947354993323326e-06, + "loss": 3.1585, + "step": 6644 + }, + { + "epoch": 1.92, + "learning_rate": 3.917990539612892e-06, + "loss": 3.1762, + "step": 6645 + }, + { + "epoch": 1.92, + "learning_rate": 3.888735285823564e-06, + "loss": 3.1073, + "step": 6646 + }, + { + "epoch": 1.93, + "learning_rate": 3.8595892383953005e-06, + "loss": 3.1317, + "step": 6647 + }, + { + "epoch": 1.93, + "learning_rate": 3.830552403743803e-06, + "loss": 3.059, + "step": 6648 + }, + { + "epoch": 1.93, + "learning_rate": 3.8016247882607937e-06, + "loss": 3.126, + "step": 6649 + }, + { + "epoch": 1.93, + "learning_rate": 3.7728063983139547e-06, + "loss": 3.0564, + "step": 6650 + }, + { + "epoch": 1.93, + "learning_rate": 3.7440972402469355e-06, + "loss": 3.046, + "step": 6651 + }, + { + "epoch": 1.93, + "learning_rate": 3.715497320379346e-06, + "loss": 3.2579, + "step": 6652 + }, + { + "epoch": 1.93, + "learning_rate": 3.6870066450067075e-06, + "loss": 3.0391, + "step": 6653 + }, + { + "epoch": 1.93, + "learning_rate": 3.6586252204005577e-06, + "loss": 3.1155, + "step": 6654 + }, + { + "epoch": 1.93, + "learning_rate": 3.6303530528082883e-06, + "loss": 3.1711, + "step": 6655 + }, + { + "epoch": 1.93, + "learning_rate": 3.6021901484533658e-06, + "loss": 3.0661, + "step": 6656 + }, + { + "epoch": 1.93, + "learning_rate": 3.5741365135351643e-06, + "loss": 3.1122, + "step": 6657 + }, + { + "epoch": 1.93, + "learning_rate": 3.5461921542288558e-06, + "loss": 3.1051, + "step": 6658 + }, + { + "epoch": 1.93, + "learning_rate": 3.518357076685852e-06, + "loss": 2.994, + "step": 6659 + }, + { + "epoch": 1.93, + "learning_rate": 3.4906312870331967e-06, + "loss": 3.1956, + "step": 6660 + }, + { + "epoch": 1.93, + "learning_rate": 3.4630147913741193e-06, + "loss": 3.0257, + "step": 6661 + }, + { + "epoch": 1.93, + "learning_rate": 3.435507595787646e-06, + "loss": 3.2278, + "step": 6662 + }, + { + "epoch": 1.93, + "learning_rate": 3.408109706328766e-06, + "loss": 3.0691, + "step": 6663 + }, + { + "epoch": 1.93, + "learning_rate": 3.3808211290284885e-06, + "loss": 3.0541, + "step": 6664 + }, + { + "epoch": 1.93, + "learning_rate": 3.353641869893731e-06, + "loss": 3.1371, + "step": 6665 + }, + { + "epoch": 1.93, + "learning_rate": 3.326571934907263e-06, + "loss": 3.0868, + "step": 6666 + }, + { + "epoch": 1.93, + "learning_rate": 3.2996113300278186e-06, + "loss": 3.2443, + "step": 6667 + }, + { + "epoch": 1.93, + "learning_rate": 3.2727600611901497e-06, + "loss": 3.0647, + "step": 6668 + }, + { + "epoch": 1.93, + "learning_rate": 3.2460181343048623e-06, + "loss": 3.2229, + "step": 6669 + }, + { + "epoch": 1.93, + "learning_rate": 3.219385555258525e-06, + "loss": 3.2333, + "step": 6670 + }, + { + "epoch": 1.93, + "learning_rate": 3.1928623299136152e-06, + "loss": 3.1131, + "step": 6671 + }, + { + "epoch": 1.93, + "learning_rate": 3.166448464108629e-06, + "loss": 3.1104, + "step": 6672 + }, + { + "epoch": 1.93, + "learning_rate": 3.1401439636577488e-06, + "loss": 3.1835, + "step": 6673 + }, + { + "epoch": 1.93, + "learning_rate": 3.1139488343513987e-06, + "loss": 3.1722, + "step": 6674 + }, + { + "epoch": 1.93, + "learning_rate": 3.0878630819556886e-06, + "loss": 3.1088, + "step": 6675 + }, + { + "epoch": 1.93, + "learning_rate": 3.061886712212747e-06, + "loss": 3.1898, + "step": 6676 + }, + { + "epoch": 1.93, + "learning_rate": 3.0360197308406667e-06, + "loss": 3.2285, + "step": 6677 + }, + { + "epoch": 1.93, + "learning_rate": 3.0102621435333934e-06, + "loss": 3.1814, + "step": 6678 + }, + { + "epoch": 1.93, + "learning_rate": 2.98461395596078e-06, + "loss": 3.0481, + "step": 6679 + }, + { + "epoch": 1.93, + "learning_rate": 2.959075173768588e-06, + "loss": 2.9798, + "step": 6680 + }, + { + "epoch": 1.93, + "learning_rate": 2.933645802578655e-06, + "loss": 3.1877, + "step": 6681 + }, + { + "epoch": 1.94, + "learning_rate": 2.9083258479885023e-06, + "loss": 3.1271, + "step": 6682 + }, + { + "epoch": 1.94, + "learning_rate": 2.883115315571727e-06, + "loss": 3.1703, + "step": 6683 + }, + { + "epoch": 1.94, + "learning_rate": 2.858014210877835e-06, + "loss": 3.0959, + "step": 6684 + }, + { + "epoch": 1.94, + "learning_rate": 2.833022539432073e-06, + "loss": 3.1577, + "step": 6685 + }, + { + "epoch": 1.94, + "learning_rate": 2.8081403067358736e-06, + "loss": 3.0853, + "step": 6686 + }, + { + "epoch": 1.94, + "learning_rate": 2.7833675182663e-06, + "loss": 3.0689, + "step": 6687 + }, + { + "epoch": 1.94, + "learning_rate": 2.758704179476601e-06, + "loss": 3.2256, + "step": 6688 + }, + { + "epoch": 1.94, + "learning_rate": 2.7341502957956564e-06, + "loss": 3.1787, + "step": 6689 + }, + { + "epoch": 1.94, + "learning_rate": 2.7097058726284207e-06, + "loss": 3.1872, + "step": 6690 + }, + { + "epoch": 1.94, + "learning_rate": 2.6853709153557003e-06, + "loss": 3.1374, + "step": 6691 + }, + { + "epoch": 1.94, + "learning_rate": 2.661145429334322e-06, + "loss": 3.1215, + "step": 6692 + }, + { + "epoch": 1.94, + "learning_rate": 2.637029419896797e-06, + "loss": 3.0955, + "step": 6693 + }, + { + "epoch": 1.94, + "learning_rate": 2.613022892351713e-06, + "loss": 3.107, + "step": 6694 + }, + { + "epoch": 1.94, + "learning_rate": 2.589125851983509e-06, + "loss": 3.2329, + "step": 6695 + }, + { + "epoch": 1.94, + "learning_rate": 2.5653383040524227e-06, + "loss": 3.164, + "step": 6696 + }, + { + "epoch": 1.94, + "learning_rate": 2.5416602537948196e-06, + "loss": 3.1893, + "step": 6697 + }, + { + "epoch": 1.94, + "learning_rate": 2.518091706422698e-06, + "loss": 3.0646, + "step": 6698 + }, + { + "epoch": 1.94, + "learning_rate": 2.494632667124186e-06, + "loss": 3.1814, + "step": 6699 + }, + { + "epoch": 1.94, + "learning_rate": 2.4712831410630964e-06, + "loss": 3.0469, + "step": 6700 + }, + { + "epoch": 1.94, + "learning_rate": 2.448043133379374e-06, + "loss": 2.9907, + "step": 6701 + }, + { + "epoch": 1.94, + "learning_rate": 2.424912649188593e-06, + "loss": 3.115, + "step": 6702 + }, + { + "epoch": 1.94, + "learning_rate": 2.4018916935823475e-06, + "loss": 3.1553, + "step": 6703 + }, + { + "epoch": 1.94, + "learning_rate": 2.378980271628195e-06, + "loss": 3.1778, + "step": 6704 + }, + { + "epoch": 1.94, + "learning_rate": 2.3561783883694897e-06, + "loss": 3.1571, + "step": 6705 + }, + { + "epoch": 1.94, + "learning_rate": 2.3334860488254395e-06, + "loss": 3.1781, + "step": 6706 + }, + { + "epoch": 1.94, + "learning_rate": 2.310903257991215e-06, + "loss": 3.1866, + "step": 6707 + }, + { + "epoch": 1.94, + "learning_rate": 2.2884300208378393e-06, + "loss": 3.1239, + "step": 6708 + }, + { + "epoch": 1.94, + "learning_rate": 2.2660663423123005e-06, + "loss": 3.0597, + "step": 6709 + }, + { + "epoch": 1.94, + "learning_rate": 2.243812227337272e-06, + "loss": 3.1686, + "step": 6710 + }, + { + "epoch": 1.94, + "learning_rate": 2.2216676808115566e-06, + "loss": 3.0893, + "step": 6711 + }, + { + "epoch": 1.94, + "learning_rate": 2.1996327076096446e-06, + "loss": 3.0646, + "step": 6712 + }, + { + "epoch": 1.94, + "learning_rate": 2.177707312581989e-06, + "loss": 3.0691, + "step": 6713 + }, + { + "epoch": 1.94, + "learning_rate": 2.1558915005548964e-06, + "loss": 3.0052, + "step": 6714 + }, + { + "epoch": 1.94, + "learning_rate": 2.1341852763306357e-06, + "loss": 3.2306, + "step": 6715 + }, + { + "epoch": 1.95, + "learning_rate": 2.112588644687219e-06, + "loss": 3.0507, + "step": 6716 + }, + { + "epoch": 1.95, + "learning_rate": 2.0911016103786207e-06, + "loss": 3.0642, + "step": 6717 + }, + { + "epoch": 1.95, + "learning_rate": 2.069724178134613e-06, + "loss": 3.0167, + "step": 6718 + }, + { + "epoch": 1.95, + "learning_rate": 2.0484563526609877e-06, + "loss": 3.0788, + "step": 6719 + }, + { + "epoch": 1.95, + "learning_rate": 2.027298138639333e-06, + "loss": 3.1587, + "step": 6720 + }, + { + "epoch": 1.95, + "learning_rate": 2.006249540726979e-06, + "loss": 3.1517, + "step": 6721 + }, + { + "epoch": 1.95, + "learning_rate": 1.98531056355733e-06, + "loss": 3.1374, + "step": 6722 + }, + { + "epoch": 1.95, + "learning_rate": 1.9644812117395327e-06, + "loss": 3.1044, + "step": 6723 + }, + { + "epoch": 1.95, + "learning_rate": 1.943761489858642e-06, + "loss": 3.0862, + "step": 6724 + }, + { + "epoch": 1.95, + "learning_rate": 1.92315140247562e-06, + "loss": 3.0958, + "step": 6725 + }, + { + "epoch": 1.95, + "learning_rate": 1.9026509541272275e-06, + "loss": 3.0533, + "step": 6726 + }, + { + "epoch": 1.95, + "learning_rate": 1.8822601493261315e-06, + "loss": 3.114, + "step": 6727 + }, + { + "epoch": 1.95, + "learning_rate": 1.8619789925608533e-06, + "loss": 3.0877, + "step": 6728 + }, + { + "epoch": 1.95, + "learning_rate": 1.8418074882958213e-06, + "loss": 3.1191, + "step": 6729 + }, + { + "epoch": 1.95, + "learning_rate": 1.8217456409711508e-06, + "loss": 3.1173, + "step": 6730 + }, + { + "epoch": 1.95, + "learning_rate": 1.8017934550030867e-06, + "loss": 3.0344, + "step": 6731 + }, + { + "epoch": 1.95, + "learning_rate": 1.781950934783505e-06, + "loss": 3.1849, + "step": 6732 + }, + { + "epoch": 1.95, + "learning_rate": 1.7622180846803004e-06, + "loss": 3.0601, + "step": 6733 + }, + { + "epoch": 1.95, + "learning_rate": 1.7425949090371097e-06, + "loss": 3.0951, + "step": 6734 + }, + { + "epoch": 1.95, + "learning_rate": 1.7230814121735327e-06, + "loss": 3.0339, + "step": 6735 + }, + { + "epoch": 1.95, + "learning_rate": 1.7036775983849673e-06, + "loss": 3.1062, + "step": 6736 + }, + { + "epoch": 1.95, + "learning_rate": 1.6843834719426077e-06, + "loss": 3.1881, + "step": 6737 + }, + { + "epoch": 1.95, + "learning_rate": 1.6651990370936676e-06, + "loss": 3.0898, + "step": 6738 + }, + { + "epoch": 1.95, + "learning_rate": 1.646124298061047e-06, + "loss": 3.1736, + "step": 6739 + }, + { + "epoch": 1.95, + "learning_rate": 1.6271592590435536e-06, + "loss": 3.0992, + "step": 6740 + }, + { + "epoch": 1.95, + "learning_rate": 1.6083039242159036e-06, + "loss": 3.1851, + "step": 6741 + }, + { + "epoch": 1.95, + "learning_rate": 1.589558297728555e-06, + "loss": 3.0764, + "step": 6742 + }, + { + "epoch": 1.95, + "learning_rate": 1.5709223837079845e-06, + "loss": 3.1067, + "step": 6743 + }, + { + "epoch": 1.95, + "learning_rate": 1.5523961862564107e-06, + "loss": 3.1616, + "step": 6744 + }, + { + "epoch": 1.95, + "learning_rate": 1.5339797094517939e-06, + "loss": 3.0898, + "step": 6745 + }, + { + "epoch": 1.95, + "learning_rate": 1.5156729573481687e-06, + "loss": 3.2208, + "step": 6746 + }, + { + "epoch": 1.95, + "learning_rate": 1.497475933975312e-06, + "loss": 3.0507, + "step": 6747 + }, + { + "epoch": 1.95, + "learning_rate": 1.4793886433387416e-06, + "loss": 3.0989, + "step": 6748 + }, + { + "epoch": 1.95, + "learning_rate": 1.4614110894199394e-06, + "loss": 3.1607, + "step": 6749 + }, + { + "epoch": 1.95, + "learning_rate": 1.4435432761762956e-06, + "loss": 3.2414, + "step": 6750 + }, + { + "epoch": 1.96, + "learning_rate": 1.4257852075408307e-06, + "loss": 3.0513, + "step": 6751 + }, + { + "epoch": 1.96, + "learning_rate": 1.4081368874226396e-06, + "loss": 3.1658, + "step": 6752 + }, + { + "epoch": 1.96, + "learning_rate": 1.3905983197065597e-06, + "loss": 2.9693, + "step": 6753 + }, + { + "epoch": 1.96, + "learning_rate": 1.3731695082531692e-06, + "loss": 3.1665, + "step": 6754 + }, + { + "epoch": 1.96, + "learning_rate": 1.355850456899066e-06, + "loss": 3.0752, + "step": 6755 + }, + { + "epoch": 1.96, + "learning_rate": 1.3386411694565892e-06, + "loss": 3.0448, + "step": 6756 + }, + { + "epoch": 1.96, + "learning_rate": 1.3215416497138755e-06, + "loss": 2.961, + "step": 6757 + }, + { + "epoch": 1.96, + "learning_rate": 1.304551901435025e-06, + "loss": 3.0816, + "step": 6758 + }, + { + "epoch": 1.96, + "learning_rate": 1.2876719283598236e-06, + "loss": 3.2536, + "step": 6759 + }, + { + "epoch": 1.96, + "learning_rate": 1.2709017342039663e-06, + "loss": 3.1568, + "step": 6760 + }, + { + "epoch": 1.96, + "learning_rate": 1.254241322659111e-06, + "loss": 3.1017, + "step": 6761 + }, + { + "epoch": 1.96, + "learning_rate": 1.2376906973924906e-06, + "loss": 3.2607, + "step": 6762 + }, + { + "epoch": 1.96, + "learning_rate": 1.2212498620474132e-06, + "loss": 3.1339, + "step": 6763 + }, + { + "epoch": 1.96, + "learning_rate": 1.204918820242873e-06, + "loss": 3.1992, + "step": 6764 + }, + { + "epoch": 1.96, + "learning_rate": 1.1886975755736606e-06, + "loss": 3.1328, + "step": 6765 + }, + { + "epoch": 1.96, + "learning_rate": 1.1725861316105869e-06, + "loss": 3.1167, + "step": 6766 + }, + { + "epoch": 1.96, + "learning_rate": 1.1565844919001479e-06, + "loss": 3.113, + "step": 6767 + }, + { + "epoch": 1.96, + "learning_rate": 1.1406926599646372e-06, + "loss": 3.0854, + "step": 6768 + }, + { + "epoch": 1.96, + "learning_rate": 1.1249106393023124e-06, + "loss": 3.0557, + "step": 6769 + }, + { + "epoch": 1.96, + "learning_rate": 1.1092384333871719e-06, + "loss": 3.1149, + "step": 6770 + }, + { + "epoch": 1.96, + "learning_rate": 1.093676045669012e-06, + "loss": 3.1887, + "step": 6771 + }, + { + "epoch": 1.96, + "learning_rate": 1.0782234795735369e-06, + "loss": 3.1836, + "step": 6772 + }, + { + "epoch": 1.96, + "learning_rate": 1.062880738502303e-06, + "loss": 3.0745, + "step": 6773 + }, + { + "epoch": 1.96, + "learning_rate": 1.0476478258324984e-06, + "loss": 3.0747, + "step": 6774 + }, + { + "epoch": 1.96, + "learning_rate": 1.032524744917329e-06, + "loss": 3.0798, + "step": 6775 + }, + { + "epoch": 1.96, + "learning_rate": 1.0175114990857438e-06, + "loss": 3.1467, + "step": 6776 + }, + { + "epoch": 1.96, + "learning_rate": 1.0026080916425428e-06, + "loss": 3.1378, + "step": 6777 + }, + { + "epoch": 1.96, + "learning_rate": 9.878145258683802e-07, + "loss": 3.0692, + "step": 6778 + }, + { + "epoch": 1.96, + "learning_rate": 9.731308050195954e-07, + "loss": 3.2619, + "step": 6779 + }, + { + "epoch": 1.96, + "learning_rate": 9.585569323284915e-07, + "loss": 3.1351, + "step": 6780 + }, + { + "epoch": 1.96, + "learning_rate": 9.440929110031138e-07, + "loss": 3.068, + "step": 6781 + }, + { + "epoch": 1.96, + "learning_rate": 9.297387442273597e-07, + "loss": 3.0807, + "step": 6782 + }, + { + "epoch": 1.96, + "learning_rate": 9.15494435160924e-07, + "loss": 3.0431, + "step": 6783 + }, + { + "epoch": 1.96, + "learning_rate": 9.013599869394096e-07, + "loss": 3.0765, + "step": 6784 + }, + { + "epoch": 1.96, + "learning_rate": 8.873354026740499e-07, + "loss": 3.1644, + "step": 6785 + }, + { + "epoch": 1.97, + "learning_rate": 8.734206854520422e-07, + "loss": 3.1755, + "step": 6786 + }, + { + "epoch": 1.97, + "learning_rate": 8.59615838336436e-07, + "loss": 3.0669, + "step": 6787 + }, + { + "epoch": 1.97, + "learning_rate": 8.459208643659122e-07, + "loss": 2.9631, + "step": 6788 + }, + { + "epoch": 1.97, + "learning_rate": 8.323357665551146e-07, + "loss": 3.0475, + "step": 6789 + }, + { + "epoch": 1.97, + "learning_rate": 8.188605478944289e-07, + "loss": 3.0898, + "step": 6790 + }, + { + "epoch": 1.97, + "learning_rate": 8.054952113501491e-07, + "loss": 3.1258, + "step": 6791 + }, + { + "epoch": 1.97, + "learning_rate": 7.92239759864255e-07, + "loss": 3.1756, + "step": 6792 + }, + { + "epoch": 1.97, + "learning_rate": 7.79094196354635e-07, + "loss": 3.2198, + "step": 6793 + }, + { + "epoch": 1.97, + "learning_rate": 7.660585237149742e-07, + "loss": 3.2482, + "step": 6794 + }, + { + "epoch": 1.97, + "learning_rate": 7.531327448146441e-07, + "loss": 3.1899, + "step": 6795 + }, + { + "epoch": 1.97, + "learning_rate": 7.403168624990353e-07, + "loss": 2.9881, + "step": 6796 + }, + { + "epoch": 1.97, + "learning_rate": 7.276108795892245e-07, + "loss": 3.1387, + "step": 6797 + }, + { + "epoch": 1.97, + "learning_rate": 7.150147988820854e-07, + "loss": 3.1051, + "step": 6798 + }, + { + "epoch": 1.97, + "learning_rate": 7.025286231502892e-07, + "loss": 3.3018, + "step": 6799 + }, + { + "epoch": 1.97, + "learning_rate": 6.901523551424705e-07, + "loss": 3.2559, + "step": 6800 + }, + { + "epoch": 1.97, + "learning_rate": 6.778859975828388e-07, + "loss": 3.2025, + "step": 6801 + }, + { + "epoch": 1.97, + "learning_rate": 6.657295531715679e-07, + "loss": 3.1552, + "step": 6802 + }, + { + "epoch": 1.97, + "learning_rate": 6.536830245845726e-07, + "loss": 3.0959, + "step": 6803 + }, + { + "epoch": 1.97, + "learning_rate": 6.417464144736207e-07, + "loss": 3.1548, + "step": 6804 + }, + { + "epoch": 1.97, + "learning_rate": 6.299197254662214e-07, + "loss": 3.0946, + "step": 6805 + }, + { + "epoch": 1.97, + "learning_rate": 6.182029601657368e-07, + "loss": 3.0277, + "step": 6806 + }, + { + "epoch": 1.97, + "learning_rate": 6.06596121151326e-07, + "loss": 3.0718, + "step": 6807 + }, + { + "epoch": 1.97, + "learning_rate": 5.950992109779452e-07, + "loss": 3.1978, + "step": 6808 + }, + { + "epoch": 1.97, + "learning_rate": 5.837122321763477e-07, + "loss": 3.0582, + "step": 6809 + }, + { + "epoch": 1.97, + "learning_rate": 5.724351872530842e-07, + "loss": 3.1567, + "step": 6810 + }, + { + "epoch": 1.97, + "learning_rate": 5.612680786905023e-07, + "loss": 3.3195, + "step": 6811 + }, + { + "epoch": 1.97, + "learning_rate": 5.502109089467466e-07, + "loss": 3.101, + "step": 6812 + }, + { + "epoch": 1.97, + "learning_rate": 5.392636804557593e-07, + "loss": 3.1405, + "step": 6813 + }, + { + "epoch": 1.97, + "learning_rate": 5.284263956273904e-07, + "loss": 3.2588, + "step": 6814 + }, + { + "epoch": 1.97, + "learning_rate": 5.176990568471207e-07, + "loss": 3.0991, + "step": 6815 + }, + { + "epoch": 1.97, + "learning_rate": 5.070816664762834e-07, + "loss": 3.1159, + "step": 6816 + }, + { + "epoch": 1.97, + "learning_rate": 4.9657422685212e-07, + "loss": 3.0886, + "step": 6817 + }, + { + "epoch": 1.97, + "learning_rate": 4.861767402874473e-07, + "loss": 3.0876, + "step": 6818 + }, + { + "epoch": 1.97, + "learning_rate": 4.758892090711009e-07, + "loss": 3.0841, + "step": 6819 + }, + { + "epoch": 1.98, + "learning_rate": 4.657116354676583e-07, + "loss": 2.9826, + "step": 6820 + }, + { + "epoch": 1.98, + "learning_rate": 4.556440217173274e-07, + "loss": 3.1118, + "step": 6821 + }, + { + "epoch": 1.98, + "learning_rate": 4.456863700363356e-07, + "loss": 3.1408, + "step": 6822 + }, + { + "epoch": 1.98, + "learning_rate": 4.3583868261654057e-07, + "loss": 3.197, + "step": 6823 + }, + { + "epoch": 1.98, + "learning_rate": 4.2610096162576383e-07, + "loss": 3.1698, + "step": 6824 + }, + { + "epoch": 1.98, + "learning_rate": 4.1647320920740196e-07, + "loss": 2.9909, + "step": 6825 + }, + { + "epoch": 1.98, + "learning_rate": 4.0695542748081515e-07, + "loss": 3.0803, + "step": 6826 + }, + { + "epoch": 1.98, + "learning_rate": 3.975476185411608e-07, + "loss": 3.0492, + "step": 6827 + }, + { + "epoch": 1.98, + "learning_rate": 3.8824978445922697e-07, + "loss": 2.9804, + "step": 6828 + }, + { + "epoch": 1.98, + "learning_rate": 3.7906192728176525e-07, + "loss": 3.1853, + "step": 6829 + }, + { + "epoch": 1.98, + "learning_rate": 3.6998404903121343e-07, + "loss": 3.1189, + "step": 6830 + }, + { + "epoch": 1.98, + "learning_rate": 3.610161517058619e-07, + "loss": 3.1471, + "step": 6831 + }, + { + "epoch": 1.98, + "learning_rate": 3.521582372797427e-07, + "loss": 3.1151, + "step": 6832 + }, + { + "epoch": 1.98, + "learning_rate": 3.434103077027406e-07, + "loss": 3.1341, + "step": 6833 + }, + { + "epoch": 1.98, + "learning_rate": 3.347723649004264e-07, + "loss": 3.1743, + "step": 6834 + }, + { + "epoch": 1.98, + "learning_rate": 3.2624441077433454e-07, + "loss": 3.1439, + "step": 6835 + }, + { + "epoch": 1.98, + "learning_rate": 3.178264472015191e-07, + "loss": 3.1221, + "step": 6836 + }, + { + "epoch": 1.98, + "learning_rate": 3.0951847603516437e-07, + "loss": 3.0627, + "step": 6837 + }, + { + "epoch": 1.98, + "learning_rate": 3.013204991038632e-07, + "loss": 3.0999, + "step": 6838 + }, + { + "epoch": 1.98, + "learning_rate": 2.9323251821239406e-07, + "loss": 3.1567, + "step": 6839 + }, + { + "epoch": 1.98, + "learning_rate": 2.852545351409996e-07, + "loss": 3.0508, + "step": 6840 + }, + { + "epoch": 1.98, + "learning_rate": 2.773865516458307e-07, + "loss": 3.2437, + "step": 6841 + }, + { + "epoch": 1.98, + "learning_rate": 2.6962856945883516e-07, + "loss": 3.1639, + "step": 6842 + }, + { + "epoch": 1.98, + "learning_rate": 2.6198059028781363e-07, + "loss": 3.188, + "step": 6843 + }, + { + "epoch": 1.98, + "learning_rate": 2.544426158161972e-07, + "loss": 3.2085, + "step": 6844 + }, + { + "epoch": 1.98, + "learning_rate": 2.4701464770326976e-07, + "loss": 3.0681, + "step": 6845 + }, + { + "epoch": 1.98, + "learning_rate": 2.396966875841677e-07, + "loss": 2.9869, + "step": 6846 + }, + { + "epoch": 1.98, + "learning_rate": 2.3248873706971373e-07, + "loss": 3.0449, + "step": 6847 + }, + { + "epoch": 1.98, + "learning_rate": 2.25390797746583e-07, + "loss": 3.1544, + "step": 6848 + }, + { + "epoch": 1.98, + "learning_rate": 2.1840287117713686e-07, + "loss": 3.0971, + "step": 6849 + }, + { + "epoch": 1.98, + "learning_rate": 2.1152495889970036e-07, + "loss": 3.0133, + "step": 6850 + }, + { + "epoch": 1.98, + "learning_rate": 2.0475706242822913e-07, + "loss": 3.162, + "step": 6851 + }, + { + "epoch": 1.98, + "learning_rate": 1.9809918325247589e-07, + "loss": 3.0428, + "step": 6852 + }, + { + "epoch": 1.98, + "learning_rate": 1.915513228380461e-07, + "loss": 3.2029, + "step": 6853 + }, + { + "epoch": 1.98, + "learning_rate": 1.8511348262623128e-07, + "loss": 3.1264, + "step": 6854 + }, + { + "epoch": 1.99, + "learning_rate": 1.7878566403417563e-07, + "loss": 3.0131, + "step": 6855 + }, + { + "epoch": 1.99, + "learning_rate": 1.7256786845482041e-07, + "loss": 3.1825, + "step": 6856 + }, + { + "epoch": 1.99, + "learning_rate": 1.6646009725684864e-07, + "loss": 3.12, + "step": 6857 + }, + { + "epoch": 1.99, + "learning_rate": 1.6046235178474034e-07, + "loss": 3.0983, + "step": 6858 + }, + { + "epoch": 1.99, + "learning_rate": 1.5457463335871725e-07, + "loss": 3.0628, + "step": 6859 + }, + { + "epoch": 1.99, + "learning_rate": 1.487969432747982e-07, + "loss": 3.1142, + "step": 6860 + }, + { + "epoch": 1.99, + "learning_rate": 1.431292828048547e-07, + "loss": 3.0811, + "step": 6861 + }, + { + "epoch": 1.99, + "learning_rate": 1.3757165319644438e-07, + "loss": 3.0236, + "step": 6862 + }, + { + "epoch": 1.99, + "learning_rate": 1.3212405567292195e-07, + "loss": 3.1595, + "step": 6863 + }, + { + "epoch": 1.99, + "learning_rate": 1.2678649143349485e-07, + "loss": 3.1628, + "step": 6864 + }, + { + "epoch": 1.99, + "learning_rate": 1.2155896165300107e-07, + "loss": 3.0549, + "step": 6865 + }, + { + "epoch": 1.99, + "learning_rate": 1.1644146748224226e-07, + "loss": 3.0374, + "step": 6866 + }, + { + "epoch": 1.99, + "learning_rate": 1.1143401004765075e-07, + "loss": 2.98, + "step": 6867 + }, + { + "epoch": 1.99, + "learning_rate": 1.0653659045156694e-07, + "loss": 3.0992, + "step": 6868 + }, + { + "epoch": 1.99, + "learning_rate": 1.0174920977190638e-07, + "loss": 3.0933, + "step": 6869 + }, + { + "epoch": 1.99, + "learning_rate": 9.707186906254827e-08, + "loss": 3.2008, + "step": 6870 + }, + { + "epoch": 1.99, + "learning_rate": 9.250456935316898e-08, + "loss": 3.1931, + "step": 6871 + }, + { + "epoch": 1.99, + "learning_rate": 8.804731164901991e-08, + "loss": 3.0202, + "step": 6872 + }, + { + "epoch": 1.99, + "learning_rate": 8.37000969313162e-08, + "loss": 3.1126, + "step": 6873 + }, + { + "epoch": 1.99, + "learning_rate": 7.946292615701456e-08, + "loss": 3.1199, + "step": 6874 + }, + { + "epoch": 1.99, + "learning_rate": 7.533580025875785e-08, + "loss": 3.0864, + "step": 6875 + }, + { + "epoch": 1.99, + "learning_rate": 7.131872014509711e-08, + "loss": 3.2039, + "step": 6876 + }, + { + "epoch": 1.99, + "learning_rate": 6.741168670021391e-08, + "loss": 3.1242, + "step": 6877 + }, + { + "epoch": 1.99, + "learning_rate": 6.361470078419806e-08, + "loss": 3.1363, + "step": 6878 + }, + { + "epoch": 1.99, + "learning_rate": 5.992776323282545e-08, + "loss": 3.0905, + "step": 6879 + }, + { + "epoch": 1.99, + "learning_rate": 5.635087485772461e-08, + "loss": 3.2045, + "step": 6880 + }, + { + "epoch": 1.99, + "learning_rate": 5.288403644626572e-08, + "loss": 3.2134, + "step": 6881 + }, + { + "epoch": 1.99, + "learning_rate": 4.952724876150505e-08, + "loss": 3.1003, + "step": 6882 + }, + { + "epoch": 1.99, + "learning_rate": 4.628051254240706e-08, + "loss": 3.0644, + "step": 6883 + }, + { + "epoch": 1.99, + "learning_rate": 4.314382850362231e-08, + "loss": 3.2093, + "step": 6884 + }, + { + "epoch": 1.99, + "learning_rate": 4.011719733570951e-08, + "loss": 3.1352, + "step": 6885 + }, + { + "epoch": 1.99, + "learning_rate": 3.720061970480249e-08, + "loss": 3.107, + "step": 6886 + }, + { + "epoch": 1.99, + "learning_rate": 3.439409625294321e-08, + "loss": 3.2134, + "step": 6887 + }, + { + "epoch": 1.99, + "learning_rate": 3.169762759797079e-08, + "loss": 3.1457, + "step": 6888 + }, + { + "epoch": 2.0, + "learning_rate": 2.9111214333354952e-08, + "loss": 3.094, + "step": 6889 + }, + { + "epoch": 2.0, + "learning_rate": 2.663485702847357e-08, + "loss": 3.2528, + "step": 6890 + }, + { + "epoch": 2.0, + "learning_rate": 2.4268556228446147e-08, + "loss": 3.1685, + "step": 6891 + }, + { + "epoch": 2.0, + "learning_rate": 2.2012312454133822e-08, + "loss": 3.0823, + "step": 6892 + }, + { + "epoch": 2.0, + "learning_rate": 1.9866126202250366e-08, + "loss": 3.0776, + "step": 6893 + }, + { + "epoch": 2.0, + "learning_rate": 1.7829997945084665e-08, + "loss": 3.1803, + "step": 6894 + }, + { + "epoch": 2.0, + "learning_rate": 1.5903928131000278e-08, + "loss": 3.1332, + "step": 6895 + }, + { + "epoch": 2.0, + "learning_rate": 1.408791718382485e-08, + "loss": 3.1009, + "step": 6896 + }, + { + "epoch": 2.0, + "learning_rate": 1.2381965503460712e-08, + "loss": 3.1051, + "step": 6897 + }, + { + "epoch": 2.0, + "learning_rate": 1.0786073465274271e-08, + "loss": 3.139, + "step": 6898 + }, + { + "epoch": 2.0, + "learning_rate": 9.300241420706623e-09, + "loss": 3.0286, + "step": 6899 + }, + { + "epoch": 2.0, + "learning_rate": 7.92446969671845e-09, + "loss": 3.1334, + "step": 6900 + }, + { + "epoch": 2.0, + "learning_rate": 6.658758596178594e-09, + "loss": 3.2288, + "step": 6901 + }, + { + "epoch": 2.0, + "learning_rate": 5.503108397753031e-09, + "loss": 3.1797, + "step": 6902 + }, + { + "epoch": 2.0, + "learning_rate": 4.457519355738349e-09, + "loss": 3.0901, + "step": 6903 + }, + { + "epoch": 2.0, + "learning_rate": 3.5219917003948e-09, + "loss": 3.0597, + "step": 6904 + }, + { + "epoch": 2.0, + "step": 6904, + "total_flos": 8.419903045579571e+16, + "train_loss": 3.300872454810391, + "train_runtime": 32006.6725, + "train_samples_per_second": 13.809, + "train_steps_per_second": 0.216 + } + ], + "logging_steps": 1.0, + "max_steps": 6904, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 2000, + "total_flos": 8.419903045579571e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +}