diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,47871 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.9996631229287936, + "eval_steps": 500, + "global_step": 7975, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.166666666666667e-08, + "loss": 0.2325, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 8.333333333333334e-08, + "loss": 0.7352, + "step": 2 + }, + { + "epoch": 0.0, + "learning_rate": 1.2500000000000002e-07, + "loss": 0.7261, + "step": 3 + }, + { + "epoch": 0.0, + "learning_rate": 1.6666666666666668e-07, + "loss": 0.644, + "step": 4 + }, + { + "epoch": 0.0, + "learning_rate": 2.0833333333333333e-07, + "loss": 0.2256, + "step": 5 + }, + { + "epoch": 0.0, + "learning_rate": 2.5000000000000004e-07, + "loss": 0.6668, + "step": 6 + }, + { + "epoch": 0.0, + "learning_rate": 2.916666666666667e-07, + "loss": 0.6968, + "step": 7 + }, + { + "epoch": 0.0, + "learning_rate": 3.3333333333333335e-07, + "loss": 0.6386, + "step": 8 + }, + { + "epoch": 0.0, + "learning_rate": 3.75e-07, + "loss": 0.6669, + "step": 9 + }, + { + "epoch": 0.0, + "learning_rate": 4.1666666666666667e-07, + "loss": 0.6711, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 4.583333333333333e-07, + "loss": 0.6362, + "step": 11 + }, + { + "epoch": 0.0, + "learning_rate": 5.000000000000001e-07, + "loss": 0.689, + "step": 12 + }, + { + "epoch": 0.0, + "learning_rate": 5.416666666666667e-07, + "loss": 0.7024, + "step": 13 + }, + { + "epoch": 0.0, + "learning_rate": 5.833333333333334e-07, + "loss": 0.6963, + "step": 14 + }, + { + "epoch": 0.0, + "learning_rate": 6.25e-07, + "loss": 0.6672, + "step": 15 + }, + { + "epoch": 0.0, + "learning_rate": 6.666666666666667e-07, + "loss": 0.6858, + "step": 16 + }, + { + "epoch": 0.0, + "learning_rate": 7.083333333333334e-07, + "loss": 0.7329, + "step": 17 + }, + { + "epoch": 0.0, + "learning_rate": 7.5e-07, + "loss": 0.6566, + "step": 18 + }, + { + "epoch": 0.0, + "learning_rate": 7.916666666666667e-07, + "loss": 0.6474, + "step": 19 + }, + { + "epoch": 0.0, + "learning_rate": 8.333333333333333e-07, + "loss": 0.6601, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 8.75e-07, + "loss": 0.671, + "step": 21 + }, + { + "epoch": 0.0, + "learning_rate": 9.166666666666666e-07, + "loss": 0.6587, + "step": 22 + }, + { + "epoch": 0.0, + "learning_rate": 9.583333333333334e-07, + "loss": 0.6859, + "step": 23 + }, + { + "epoch": 0.0, + "learning_rate": 1.0000000000000002e-06, + "loss": 0.6542, + "step": 24 + }, + { + "epoch": 0.0, + "learning_rate": 1.0416666666666667e-06, + "loss": 0.7256, + "step": 25 + }, + { + "epoch": 0.0, + "learning_rate": 1.0833333333333335e-06, + "loss": 0.5457, + "step": 26 + }, + { + "epoch": 0.0, + "learning_rate": 1.125e-06, + "loss": 0.5892, + "step": 27 + }, + { + "epoch": 0.0, + "learning_rate": 1.1666666666666668e-06, + "loss": 0.6599, + "step": 28 + }, + { + "epoch": 0.0, + "learning_rate": 1.2083333333333333e-06, + "loss": 0.6432, + "step": 29 + }, + { + "epoch": 0.0, + "learning_rate": 1.25e-06, + "loss": 0.5695, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 1.2916666666666669e-06, + "loss": 0.5649, + "step": 31 + }, + { + "epoch": 0.0, + "learning_rate": 1.3333333333333334e-06, + "loss": 0.6514, + "step": 32 + }, + { + "epoch": 0.0, + "learning_rate": 1.3750000000000002e-06, + "loss": 0.7421, + "step": 33 + }, + { + "epoch": 0.0, + "learning_rate": 1.4166666666666667e-06, + "loss": 0.6724, + "step": 34 + }, + { + "epoch": 0.0, + "learning_rate": 1.4583333333333335e-06, + "loss": 0.2184, + "step": 35 + }, + { + "epoch": 0.0, + "learning_rate": 1.5e-06, + "loss": 0.7177, + "step": 36 + }, + { + "epoch": 0.0, + "learning_rate": 1.5416666666666668e-06, + "loss": 0.6262, + "step": 37 + }, + { + "epoch": 0.0, + "learning_rate": 1.5833333333333333e-06, + "loss": 0.586, + "step": 38 + }, + { + "epoch": 0.0, + "learning_rate": 1.6250000000000001e-06, + "loss": 0.6663, + "step": 39 + }, + { + "epoch": 0.01, + "learning_rate": 1.6666666666666667e-06, + "loss": 0.6753, + "step": 40 + }, + { + "epoch": 0.01, + "learning_rate": 1.7083333333333334e-06, + "loss": 0.728, + "step": 41 + }, + { + "epoch": 0.01, + "learning_rate": 1.75e-06, + "loss": 0.628, + "step": 42 + }, + { + "epoch": 0.01, + "learning_rate": 1.7916666666666667e-06, + "loss": 0.6423, + "step": 43 + }, + { + "epoch": 0.01, + "learning_rate": 1.8333333333333333e-06, + "loss": 0.2003, + "step": 44 + }, + { + "epoch": 0.01, + "learning_rate": 1.8750000000000003e-06, + "loss": 0.5752, + "step": 45 + }, + { + "epoch": 0.01, + "learning_rate": 1.916666666666667e-06, + "loss": 0.6012, + "step": 46 + }, + { + "epoch": 0.01, + "learning_rate": 1.9583333333333334e-06, + "loss": 0.5738, + "step": 47 + }, + { + "epoch": 0.01, + "learning_rate": 2.0000000000000003e-06, + "loss": 0.6032, + "step": 48 + }, + { + "epoch": 0.01, + "learning_rate": 2.041666666666667e-06, + "loss": 0.5578, + "step": 49 + }, + { + "epoch": 0.01, + "learning_rate": 2.0833333333333334e-06, + "loss": 0.5509, + "step": 50 + }, + { + "epoch": 0.01, + "learning_rate": 2.125e-06, + "loss": 0.6438, + "step": 51 + }, + { + "epoch": 0.01, + "learning_rate": 2.166666666666667e-06, + "loss": 0.5713, + "step": 52 + }, + { + "epoch": 0.01, + "learning_rate": 2.2083333333333335e-06, + "loss": 0.6515, + "step": 53 + }, + { + "epoch": 0.01, + "learning_rate": 2.25e-06, + "loss": 0.6336, + "step": 54 + }, + { + "epoch": 0.01, + "learning_rate": 2.2916666666666666e-06, + "loss": 0.6413, + "step": 55 + }, + { + "epoch": 0.01, + "learning_rate": 2.3333333333333336e-06, + "loss": 0.6253, + "step": 56 + }, + { + "epoch": 0.01, + "learning_rate": 2.375e-06, + "loss": 0.6545, + "step": 57 + }, + { + "epoch": 0.01, + "learning_rate": 2.4166666666666667e-06, + "loss": 0.6214, + "step": 58 + }, + { + "epoch": 0.01, + "learning_rate": 2.4583333333333332e-06, + "loss": 0.6089, + "step": 59 + }, + { + "epoch": 0.01, + "learning_rate": 2.5e-06, + "loss": 0.5913, + "step": 60 + }, + { + "epoch": 0.01, + "learning_rate": 2.5416666666666668e-06, + "loss": 0.575, + "step": 61 + }, + { + "epoch": 0.01, + "learning_rate": 2.5833333333333337e-06, + "loss": 0.6221, + "step": 62 + }, + { + "epoch": 0.01, + "learning_rate": 2.6250000000000003e-06, + "loss": 0.6342, + "step": 63 + }, + { + "epoch": 0.01, + "learning_rate": 2.666666666666667e-06, + "loss": 0.6363, + "step": 64 + }, + { + "epoch": 0.01, + "learning_rate": 2.7083333333333334e-06, + "loss": 0.5795, + "step": 65 + }, + { + "epoch": 0.01, + "learning_rate": 2.7500000000000004e-06, + "loss": 0.6473, + "step": 66 + }, + { + "epoch": 0.01, + "learning_rate": 2.791666666666667e-06, + "loss": 0.6792, + "step": 67 + }, + { + "epoch": 0.01, + "learning_rate": 2.8333333333333335e-06, + "loss": 0.5953, + "step": 68 + }, + { + "epoch": 0.01, + "learning_rate": 2.875e-06, + "loss": 0.6185, + "step": 69 + }, + { + "epoch": 0.01, + "learning_rate": 2.916666666666667e-06, + "loss": 0.5861, + "step": 70 + }, + { + "epoch": 0.01, + "learning_rate": 2.9583333333333335e-06, + "loss": 0.5841, + "step": 71 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 0.6024, + "step": 72 + }, + { + "epoch": 0.01, + "learning_rate": 3.0416666666666666e-06, + "loss": 0.6501, + "step": 73 + }, + { + "epoch": 0.01, + "learning_rate": 3.0833333333333336e-06, + "loss": 0.5572, + "step": 74 + }, + { + "epoch": 0.01, + "learning_rate": 3.125e-06, + "loss": 0.5839, + "step": 75 + }, + { + "epoch": 0.01, + "learning_rate": 3.1666666666666667e-06, + "loss": 0.572, + "step": 76 + }, + { + "epoch": 0.01, + "learning_rate": 3.2083333333333337e-06, + "loss": 0.245, + "step": 77 + }, + { + "epoch": 0.01, + "learning_rate": 3.2500000000000002e-06, + "loss": 0.5578, + "step": 78 + }, + { + "epoch": 0.01, + "learning_rate": 3.2916666666666668e-06, + "loss": 0.5925, + "step": 79 + }, + { + "epoch": 0.01, + "learning_rate": 3.3333333333333333e-06, + "loss": 0.5522, + "step": 80 + }, + { + "epoch": 0.01, + "learning_rate": 3.3750000000000003e-06, + "loss": 0.5366, + "step": 81 + }, + { + "epoch": 0.01, + "learning_rate": 3.416666666666667e-06, + "loss": 0.588, + "step": 82 + }, + { + "epoch": 0.01, + "learning_rate": 3.4583333333333334e-06, + "loss": 0.6125, + "step": 83 + }, + { + "epoch": 0.01, + "learning_rate": 3.5e-06, + "loss": 0.6518, + "step": 84 + }, + { + "epoch": 0.01, + "learning_rate": 3.5416666666666673e-06, + "loss": 0.5786, + "step": 85 + }, + { + "epoch": 0.01, + "learning_rate": 3.5833333333333335e-06, + "loss": 0.2138, + "step": 86 + }, + { + "epoch": 0.01, + "learning_rate": 3.625e-06, + "loss": 0.6574, + "step": 87 + }, + { + "epoch": 0.01, + "learning_rate": 3.6666666666666666e-06, + "loss": 0.5071, + "step": 88 + }, + { + "epoch": 0.01, + "learning_rate": 3.708333333333334e-06, + "loss": 0.5822, + "step": 89 + }, + { + "epoch": 0.01, + "learning_rate": 3.7500000000000005e-06, + "loss": 0.6222, + "step": 90 + }, + { + "epoch": 0.01, + "learning_rate": 3.7916666666666666e-06, + "loss": 0.2162, + "step": 91 + }, + { + "epoch": 0.01, + "learning_rate": 3.833333333333334e-06, + "loss": 0.599, + "step": 92 + }, + { + "epoch": 0.01, + "learning_rate": 3.875e-06, + "loss": 0.5512, + "step": 93 + }, + { + "epoch": 0.01, + "learning_rate": 3.916666666666667e-06, + "loss": 0.5883, + "step": 94 + }, + { + "epoch": 0.01, + "learning_rate": 3.958333333333333e-06, + "loss": 0.5838, + "step": 95 + }, + { + "epoch": 0.01, + "learning_rate": 4.000000000000001e-06, + "loss": 0.5399, + "step": 96 + }, + { + "epoch": 0.01, + "learning_rate": 4.041666666666667e-06, + "loss": 0.5778, + "step": 97 + }, + { + "epoch": 0.01, + "learning_rate": 4.083333333333334e-06, + "loss": 0.6076, + "step": 98 + }, + { + "epoch": 0.01, + "learning_rate": 4.125e-06, + "loss": 0.6135, + "step": 99 + }, + { + "epoch": 0.01, + "learning_rate": 4.166666666666667e-06, + "loss": 0.5546, + "step": 100 + }, + { + "epoch": 0.01, + "learning_rate": 4.208333333333333e-06, + "loss": 0.656, + "step": 101 + }, + { + "epoch": 0.01, + "learning_rate": 4.25e-06, + "loss": 0.6278, + "step": 102 + }, + { + "epoch": 0.01, + "learning_rate": 4.2916666666666665e-06, + "loss": 0.5857, + "step": 103 + }, + { + "epoch": 0.01, + "learning_rate": 4.333333333333334e-06, + "loss": 0.5576, + "step": 104 + }, + { + "epoch": 0.01, + "learning_rate": 4.3750000000000005e-06, + "loss": 0.6101, + "step": 105 + }, + { + "epoch": 0.01, + "learning_rate": 4.416666666666667e-06, + "loss": 0.5591, + "step": 106 + }, + { + "epoch": 0.01, + "learning_rate": 4.4583333333333336e-06, + "loss": 0.5338, + "step": 107 + }, + { + "epoch": 0.01, + "learning_rate": 4.5e-06, + "loss": 0.6192, + "step": 108 + }, + { + "epoch": 0.01, + "learning_rate": 4.541666666666667e-06, + "loss": 0.5456, + "step": 109 + }, + { + "epoch": 0.01, + "learning_rate": 4.583333333333333e-06, + "loss": 0.6257, + "step": 110 + }, + { + "epoch": 0.01, + "learning_rate": 4.625000000000001e-06, + "loss": 0.6037, + "step": 111 + }, + { + "epoch": 0.01, + "learning_rate": 4.666666666666667e-06, + "loss": 0.2162, + "step": 112 + }, + { + "epoch": 0.01, + "learning_rate": 4.708333333333334e-06, + "loss": 0.5821, + "step": 113 + }, + { + "epoch": 0.01, + "learning_rate": 4.75e-06, + "loss": 0.5772, + "step": 114 + }, + { + "epoch": 0.01, + "learning_rate": 4.791666666666668e-06, + "loss": 0.6002, + "step": 115 + }, + { + "epoch": 0.01, + "learning_rate": 4.833333333333333e-06, + "loss": 0.6378, + "step": 116 + }, + { + "epoch": 0.01, + "learning_rate": 4.875e-06, + "loss": 0.5878, + "step": 117 + }, + { + "epoch": 0.01, + "learning_rate": 4.9166666666666665e-06, + "loss": 0.6015, + "step": 118 + }, + { + "epoch": 0.01, + "learning_rate": 4.958333333333334e-06, + "loss": 0.5602, + "step": 119 + }, + { + "epoch": 0.02, + "learning_rate": 5e-06, + "loss": 0.5342, + "step": 120 + }, + { + "epoch": 0.02, + "learning_rate": 5.041666666666667e-06, + "loss": 0.5328, + "step": 121 + }, + { + "epoch": 0.02, + "learning_rate": 5.0833333333333335e-06, + "loss": 0.5785, + "step": 122 + }, + { + "epoch": 0.02, + "learning_rate": 5.125e-06, + "loss": 0.6341, + "step": 123 + }, + { + "epoch": 0.02, + "learning_rate": 5.1666666666666675e-06, + "loss": 0.6064, + "step": 124 + }, + { + "epoch": 0.02, + "learning_rate": 5.208333333333334e-06, + "loss": 0.6256, + "step": 125 + }, + { + "epoch": 0.02, + "learning_rate": 5.2500000000000006e-06, + "loss": 0.2104, + "step": 126 + }, + { + "epoch": 0.02, + "learning_rate": 5.291666666666667e-06, + "loss": 0.5632, + "step": 127 + }, + { + "epoch": 0.02, + "learning_rate": 5.333333333333334e-06, + "loss": 0.641, + "step": 128 + }, + { + "epoch": 0.02, + "learning_rate": 5.375e-06, + "loss": 0.638, + "step": 129 + }, + { + "epoch": 0.02, + "learning_rate": 5.416666666666667e-06, + "loss": 0.6184, + "step": 130 + }, + { + "epoch": 0.02, + "learning_rate": 5.458333333333333e-06, + "loss": 0.6315, + "step": 131 + }, + { + "epoch": 0.02, + "learning_rate": 5.500000000000001e-06, + "loss": 0.584, + "step": 132 + }, + { + "epoch": 0.02, + "learning_rate": 5.541666666666667e-06, + "loss": 0.562, + "step": 133 + }, + { + "epoch": 0.02, + "learning_rate": 5.583333333333334e-06, + "loss": 0.6081, + "step": 134 + }, + { + "epoch": 0.02, + "learning_rate": 5.625e-06, + "loss": 0.6062, + "step": 135 + }, + { + "epoch": 0.02, + "learning_rate": 5.666666666666667e-06, + "loss": 0.6005, + "step": 136 + }, + { + "epoch": 0.02, + "learning_rate": 5.7083333333333335e-06, + "loss": 0.6569, + "step": 137 + }, + { + "epoch": 0.02, + "learning_rate": 5.75e-06, + "loss": 0.5816, + "step": 138 + }, + { + "epoch": 0.02, + "learning_rate": 5.791666666666667e-06, + "loss": 0.5179, + "step": 139 + }, + { + "epoch": 0.02, + "learning_rate": 5.833333333333334e-06, + "loss": 0.6415, + "step": 140 + }, + { + "epoch": 0.02, + "learning_rate": 5.8750000000000005e-06, + "loss": 0.5803, + "step": 141 + }, + { + "epoch": 0.02, + "learning_rate": 5.916666666666667e-06, + "loss": 0.588, + "step": 142 + }, + { + "epoch": 0.02, + "learning_rate": 5.958333333333334e-06, + "loss": 0.6044, + "step": 143 + }, + { + "epoch": 0.02, + "learning_rate": 6e-06, + "loss": 0.1987, + "step": 144 + }, + { + "epoch": 0.02, + "learning_rate": 6.041666666666667e-06, + "loss": 0.5191, + "step": 145 + }, + { + "epoch": 0.02, + "learning_rate": 6.083333333333333e-06, + "loss": 0.5932, + "step": 146 + }, + { + "epoch": 0.02, + "learning_rate": 6.125000000000001e-06, + "loss": 0.5814, + "step": 147 + }, + { + "epoch": 0.02, + "learning_rate": 6.166666666666667e-06, + "loss": 0.5848, + "step": 148 + }, + { + "epoch": 0.02, + "learning_rate": 6.208333333333334e-06, + "loss": 0.5875, + "step": 149 + }, + { + "epoch": 0.02, + "learning_rate": 6.25e-06, + "loss": 0.6117, + "step": 150 + }, + { + "epoch": 0.02, + "learning_rate": 6.291666666666667e-06, + "loss": 0.6231, + "step": 151 + }, + { + "epoch": 0.02, + "learning_rate": 6.333333333333333e-06, + "loss": 0.5988, + "step": 152 + }, + { + "epoch": 0.02, + "learning_rate": 6.375e-06, + "loss": 0.5681, + "step": 153 + }, + { + "epoch": 0.02, + "learning_rate": 6.416666666666667e-06, + "loss": 0.577, + "step": 154 + }, + { + "epoch": 0.02, + "learning_rate": 6.458333333333334e-06, + "loss": 0.5775, + "step": 155 + }, + { + "epoch": 0.02, + "learning_rate": 6.5000000000000004e-06, + "loss": 0.5805, + "step": 156 + }, + { + "epoch": 0.02, + "learning_rate": 6.541666666666667e-06, + "loss": 0.5917, + "step": 157 + }, + { + "epoch": 0.02, + "learning_rate": 6.5833333333333335e-06, + "loss": 0.5894, + "step": 158 + }, + { + "epoch": 0.02, + "learning_rate": 6.625e-06, + "loss": 0.5575, + "step": 159 + }, + { + "epoch": 0.02, + "learning_rate": 6.666666666666667e-06, + "loss": 0.5389, + "step": 160 + }, + { + "epoch": 0.02, + "learning_rate": 6.708333333333333e-06, + "loss": 0.5919, + "step": 161 + }, + { + "epoch": 0.02, + "learning_rate": 6.750000000000001e-06, + "loss": 0.5819, + "step": 162 + }, + { + "epoch": 0.02, + "learning_rate": 6.791666666666667e-06, + "loss": 0.6223, + "step": 163 + }, + { + "epoch": 0.02, + "learning_rate": 6.833333333333334e-06, + "loss": 0.601, + "step": 164 + }, + { + "epoch": 0.02, + "learning_rate": 6.875e-06, + "loss": 0.5809, + "step": 165 + }, + { + "epoch": 0.02, + "learning_rate": 6.916666666666667e-06, + "loss": 0.5979, + "step": 166 + }, + { + "epoch": 0.02, + "learning_rate": 6.958333333333333e-06, + "loss": 0.5697, + "step": 167 + }, + { + "epoch": 0.02, + "learning_rate": 7e-06, + "loss": 0.5935, + "step": 168 + }, + { + "epoch": 0.02, + "learning_rate": 7.041666666666668e-06, + "loss": 0.5887, + "step": 169 + }, + { + "epoch": 0.02, + "learning_rate": 7.083333333333335e-06, + "loss": 0.6008, + "step": 170 + }, + { + "epoch": 0.02, + "learning_rate": 7.125e-06, + "loss": 0.578, + "step": 171 + }, + { + "epoch": 0.02, + "learning_rate": 7.166666666666667e-06, + "loss": 0.6232, + "step": 172 + }, + { + "epoch": 0.02, + "learning_rate": 7.2083333333333335e-06, + "loss": 0.5683, + "step": 173 + }, + { + "epoch": 0.02, + "learning_rate": 7.25e-06, + "loss": 0.6231, + "step": 174 + }, + { + "epoch": 0.02, + "learning_rate": 7.291666666666667e-06, + "loss": 0.5999, + "step": 175 + }, + { + "epoch": 0.02, + "learning_rate": 7.333333333333333e-06, + "loss": 0.6148, + "step": 176 + }, + { + "epoch": 0.02, + "learning_rate": 7.375000000000001e-06, + "loss": 0.5682, + "step": 177 + }, + { + "epoch": 0.02, + "learning_rate": 7.416666666666668e-06, + "loss": 0.5964, + "step": 178 + }, + { + "epoch": 0.02, + "learning_rate": 7.4583333333333345e-06, + "loss": 0.5539, + "step": 179 + }, + { + "epoch": 0.02, + "learning_rate": 7.500000000000001e-06, + "loss": 0.6317, + "step": 180 + }, + { + "epoch": 0.02, + "learning_rate": 7.541666666666667e-06, + "loss": 0.6436, + "step": 181 + }, + { + "epoch": 0.02, + "learning_rate": 7.583333333333333e-06, + "loss": 0.6161, + "step": 182 + }, + { + "epoch": 0.02, + "learning_rate": 7.625e-06, + "loss": 0.6657, + "step": 183 + }, + { + "epoch": 0.02, + "learning_rate": 7.666666666666667e-06, + "loss": 0.5726, + "step": 184 + }, + { + "epoch": 0.02, + "learning_rate": 7.708333333333334e-06, + "loss": 0.6122, + "step": 185 + }, + { + "epoch": 0.02, + "learning_rate": 7.75e-06, + "loss": 0.6283, + "step": 186 + }, + { + "epoch": 0.02, + "learning_rate": 7.791666666666667e-06, + "loss": 0.6245, + "step": 187 + }, + { + "epoch": 0.02, + "learning_rate": 7.833333333333333e-06, + "loss": 0.6411, + "step": 188 + }, + { + "epoch": 0.02, + "learning_rate": 7.875e-06, + "loss": 0.5371, + "step": 189 + }, + { + "epoch": 0.02, + "learning_rate": 7.916666666666667e-06, + "loss": 0.2183, + "step": 190 + }, + { + "epoch": 0.02, + "learning_rate": 7.958333333333333e-06, + "loss": 0.6503, + "step": 191 + }, + { + "epoch": 0.02, + "learning_rate": 8.000000000000001e-06, + "loss": 0.54, + "step": 192 + }, + { + "epoch": 0.02, + "learning_rate": 8.041666666666668e-06, + "loss": 0.6219, + "step": 193 + }, + { + "epoch": 0.02, + "learning_rate": 8.083333333333334e-06, + "loss": 0.6087, + "step": 194 + }, + { + "epoch": 0.02, + "learning_rate": 8.125000000000001e-06, + "loss": 0.6626, + "step": 195 + }, + { + "epoch": 0.02, + "learning_rate": 8.166666666666668e-06, + "loss": 0.5579, + "step": 196 + }, + { + "epoch": 0.02, + "learning_rate": 8.208333333333334e-06, + "loss": 0.5969, + "step": 197 + }, + { + "epoch": 0.02, + "learning_rate": 8.25e-06, + "loss": 0.5999, + "step": 198 + }, + { + "epoch": 0.02, + "learning_rate": 8.291666666666667e-06, + "loss": 0.6038, + "step": 199 + }, + { + "epoch": 0.03, + "learning_rate": 8.333333333333334e-06, + "loss": 0.5831, + "step": 200 + }, + { + "epoch": 0.03, + "learning_rate": 8.375e-06, + "loss": 0.566, + "step": 201 + }, + { + "epoch": 0.03, + "learning_rate": 8.416666666666667e-06, + "loss": 0.6036, + "step": 202 + }, + { + "epoch": 0.03, + "learning_rate": 8.458333333333333e-06, + "loss": 0.5905, + "step": 203 + }, + { + "epoch": 0.03, + "learning_rate": 8.5e-06, + "loss": 0.6304, + "step": 204 + }, + { + "epoch": 0.03, + "learning_rate": 8.541666666666666e-06, + "loss": 0.6342, + "step": 205 + }, + { + "epoch": 0.03, + "learning_rate": 8.583333333333333e-06, + "loss": 0.5962, + "step": 206 + }, + { + "epoch": 0.03, + "learning_rate": 8.625000000000001e-06, + "loss": 0.6304, + "step": 207 + }, + { + "epoch": 0.03, + "learning_rate": 8.666666666666668e-06, + "loss": 0.6895, + "step": 208 + }, + { + "epoch": 0.03, + "learning_rate": 8.708333333333334e-06, + "loss": 0.6281, + "step": 209 + }, + { + "epoch": 0.03, + "learning_rate": 8.750000000000001e-06, + "loss": 0.5807, + "step": 210 + }, + { + "epoch": 0.03, + "learning_rate": 8.791666666666667e-06, + "loss": 0.6264, + "step": 211 + }, + { + "epoch": 0.03, + "learning_rate": 8.833333333333334e-06, + "loss": 0.2147, + "step": 212 + }, + { + "epoch": 0.03, + "learning_rate": 8.875e-06, + "loss": 0.5542, + "step": 213 + }, + { + "epoch": 0.03, + "learning_rate": 8.916666666666667e-06, + "loss": 0.6146, + "step": 214 + }, + { + "epoch": 0.03, + "learning_rate": 8.958333333333334e-06, + "loss": 0.641, + "step": 215 + }, + { + "epoch": 0.03, + "learning_rate": 9e-06, + "loss": 0.5829, + "step": 216 + }, + { + "epoch": 0.03, + "learning_rate": 9.041666666666667e-06, + "loss": 0.6136, + "step": 217 + }, + { + "epoch": 0.03, + "learning_rate": 9.083333333333333e-06, + "loss": 0.5541, + "step": 218 + }, + { + "epoch": 0.03, + "learning_rate": 9.125e-06, + "loss": 0.2268, + "step": 219 + }, + { + "epoch": 0.03, + "learning_rate": 9.166666666666666e-06, + "loss": 0.5444, + "step": 220 + }, + { + "epoch": 0.03, + "learning_rate": 9.208333333333333e-06, + "loss": 0.5757, + "step": 221 + }, + { + "epoch": 0.03, + "learning_rate": 9.250000000000001e-06, + "loss": 0.5912, + "step": 222 + }, + { + "epoch": 0.03, + "learning_rate": 9.291666666666668e-06, + "loss": 0.5949, + "step": 223 + }, + { + "epoch": 0.03, + "learning_rate": 9.333333333333334e-06, + "loss": 0.6096, + "step": 224 + }, + { + "epoch": 0.03, + "learning_rate": 9.375000000000001e-06, + "loss": 0.615, + "step": 225 + }, + { + "epoch": 0.03, + "learning_rate": 9.416666666666667e-06, + "loss": 0.5492, + "step": 226 + }, + { + "epoch": 0.03, + "learning_rate": 9.458333333333334e-06, + "loss": 0.5942, + "step": 227 + }, + { + "epoch": 0.03, + "learning_rate": 9.5e-06, + "loss": 0.5752, + "step": 228 + }, + { + "epoch": 0.03, + "learning_rate": 9.541666666666669e-06, + "loss": 0.608, + "step": 229 + }, + { + "epoch": 0.03, + "learning_rate": 9.583333333333335e-06, + "loss": 0.6189, + "step": 230 + }, + { + "epoch": 0.03, + "learning_rate": 9.625e-06, + "loss": 0.581, + "step": 231 + }, + { + "epoch": 0.03, + "learning_rate": 9.666666666666667e-06, + "loss": 0.5915, + "step": 232 + }, + { + "epoch": 0.03, + "learning_rate": 9.708333333333333e-06, + "loss": 0.6113, + "step": 233 + }, + { + "epoch": 0.03, + "learning_rate": 9.75e-06, + "loss": 0.6325, + "step": 234 + }, + { + "epoch": 0.03, + "learning_rate": 9.791666666666666e-06, + "loss": 0.2174, + "step": 235 + }, + { + "epoch": 0.03, + "learning_rate": 9.833333333333333e-06, + "loss": 0.5486, + "step": 236 + }, + { + "epoch": 0.03, + "learning_rate": 9.875000000000001e-06, + "loss": 0.526, + "step": 237 + }, + { + "epoch": 0.03, + "learning_rate": 9.916666666666668e-06, + "loss": 0.587, + "step": 238 + }, + { + "epoch": 0.03, + "learning_rate": 9.958333333333334e-06, + "loss": 0.5934, + "step": 239 + }, + { + "epoch": 0.03, + "learning_rate": 1e-05, + "loss": 0.5745, + "step": 240 + }, + { + "epoch": 0.03, + "learning_rate": 9.99999958781275e-06, + "loss": 0.587, + "step": 241 + }, + { + "epoch": 0.03, + "learning_rate": 9.999998351251067e-06, + "loss": 0.631, + "step": 242 + }, + { + "epoch": 0.03, + "learning_rate": 9.999996290315154e-06, + "loss": 0.6128, + "step": 243 + }, + { + "epoch": 0.03, + "learning_rate": 9.999993405005351e-06, + "loss": 0.559, + "step": 244 + }, + { + "epoch": 0.03, + "learning_rate": 9.999989695322135e-06, + "loss": 0.5833, + "step": 245 + }, + { + "epoch": 0.03, + "learning_rate": 9.999985161266116e-06, + "loss": 0.65, + "step": 246 + }, + { + "epoch": 0.03, + "learning_rate": 9.999979802838044e-06, + "loss": 0.6296, + "step": 247 + }, + { + "epoch": 0.03, + "learning_rate": 9.9999736200388e-06, + "loss": 0.6334, + "step": 248 + }, + { + "epoch": 0.03, + "learning_rate": 9.999966612869404e-06, + "loss": 0.6139, + "step": 249 + }, + { + "epoch": 0.03, + "learning_rate": 9.999958781331013e-06, + "loss": 0.617, + "step": 250 + }, + { + "epoch": 0.03, + "learning_rate": 9.999950125424918e-06, + "loss": 0.2071, + "step": 251 + }, + { + "epoch": 0.03, + "learning_rate": 9.999940645152541e-06, + "loss": 0.2251, + "step": 252 + }, + { + "epoch": 0.03, + "learning_rate": 9.999930340515452e-06, + "loss": 0.6055, + "step": 253 + }, + { + "epoch": 0.03, + "learning_rate": 9.999919211515348e-06, + "loss": 0.6522, + "step": 254 + }, + { + "epoch": 0.03, + "learning_rate": 9.99990725815406e-06, + "loss": 0.5561, + "step": 255 + }, + { + "epoch": 0.03, + "learning_rate": 9.999894480433563e-06, + "loss": 0.6057, + "step": 256 + }, + { + "epoch": 0.03, + "learning_rate": 9.999880878355962e-06, + "loss": 0.596, + "step": 257 + }, + { + "epoch": 0.03, + "learning_rate": 9.999866451923502e-06, + "loss": 0.6368, + "step": 258 + }, + { + "epoch": 0.03, + "learning_rate": 9.999851201138558e-06, + "loss": 0.607, + "step": 259 + }, + { + "epoch": 0.03, + "learning_rate": 9.999835126003645e-06, + "loss": 0.6265, + "step": 260 + }, + { + "epoch": 0.03, + "learning_rate": 9.999818226521416e-06, + "loss": 0.6097, + "step": 261 + }, + { + "epoch": 0.03, + "learning_rate": 9.999800502694656e-06, + "loss": 0.5886, + "step": 262 + }, + { + "epoch": 0.03, + "learning_rate": 9.999781954526287e-06, + "loss": 0.5754, + "step": 263 + }, + { + "epoch": 0.03, + "learning_rate": 9.999762582019365e-06, + "loss": 0.5727, + "step": 264 + }, + { + "epoch": 0.03, + "learning_rate": 9.99974238517709e-06, + "loss": 0.6336, + "step": 265 + }, + { + "epoch": 0.03, + "learning_rate": 9.999721364002786e-06, + "loss": 0.5512, + "step": 266 + }, + { + "epoch": 0.03, + "learning_rate": 9.999699518499922e-06, + "loss": 0.5999, + "step": 267 + }, + { + "epoch": 0.03, + "learning_rate": 9.999676848672098e-06, + "loss": 0.6488, + "step": 268 + }, + { + "epoch": 0.03, + "learning_rate": 9.999653354523051e-06, + "loss": 0.585, + "step": 269 + }, + { + "epoch": 0.03, + "learning_rate": 9.999629036056657e-06, + "loss": 0.6343, + "step": 270 + }, + { + "epoch": 0.03, + "learning_rate": 9.999603893276926e-06, + "loss": 0.6019, + "step": 271 + }, + { + "epoch": 0.03, + "learning_rate": 9.999577926188e-06, + "loss": 0.5656, + "step": 272 + }, + { + "epoch": 0.03, + "learning_rate": 9.999551134794164e-06, + "loss": 0.5847, + "step": 273 + }, + { + "epoch": 0.03, + "learning_rate": 9.999523519099835e-06, + "loss": 0.6118, + "step": 274 + }, + { + "epoch": 0.03, + "learning_rate": 9.999495079109561e-06, + "loss": 0.6406, + "step": 275 + }, + { + "epoch": 0.03, + "learning_rate": 9.999465814828037e-06, + "loss": 0.6574, + "step": 276 + }, + { + "epoch": 0.03, + "learning_rate": 9.999435726260085e-06, + "loss": 0.6184, + "step": 277 + }, + { + "epoch": 0.03, + "learning_rate": 9.999404813410668e-06, + "loss": 0.6, + "step": 278 + }, + { + "epoch": 0.03, + "learning_rate": 9.999373076284877e-06, + "loss": 0.5837, + "step": 279 + }, + { + "epoch": 0.04, + "learning_rate": 9.999340514887953e-06, + "loss": 0.6513, + "step": 280 + }, + { + "epoch": 0.04, + "learning_rate": 9.999307129225259e-06, + "loss": 0.657, + "step": 281 + }, + { + "epoch": 0.04, + "learning_rate": 9.9992729193023e-06, + "loss": 0.6972, + "step": 282 + }, + { + "epoch": 0.04, + "learning_rate": 9.999237885124719e-06, + "loss": 0.609, + "step": 283 + }, + { + "epoch": 0.04, + "learning_rate": 9.999202026698292e-06, + "loss": 0.6326, + "step": 284 + }, + { + "epoch": 0.04, + "learning_rate": 9.999165344028927e-06, + "loss": 0.6, + "step": 285 + }, + { + "epoch": 0.04, + "learning_rate": 9.999127837122675e-06, + "loss": 0.574, + "step": 286 + }, + { + "epoch": 0.04, + "learning_rate": 9.99908950598572e-06, + "loss": 0.5306, + "step": 287 + }, + { + "epoch": 0.04, + "learning_rate": 9.999050350624381e-06, + "loss": 0.589, + "step": 288 + }, + { + "epoch": 0.04, + "learning_rate": 9.999010371045116e-06, + "loss": 0.5737, + "step": 289 + }, + { + "epoch": 0.04, + "learning_rate": 9.998969567254514e-06, + "loss": 0.6044, + "step": 290 + }, + { + "epoch": 0.04, + "learning_rate": 9.998927939259303e-06, + "loss": 0.5794, + "step": 291 + }, + { + "epoch": 0.04, + "learning_rate": 9.998885487066348e-06, + "loss": 0.5682, + "step": 292 + }, + { + "epoch": 0.04, + "learning_rate": 9.998842210682645e-06, + "loss": 0.5115, + "step": 293 + }, + { + "epoch": 0.04, + "learning_rate": 9.998798110115333e-06, + "loss": 0.6187, + "step": 294 + }, + { + "epoch": 0.04, + "learning_rate": 9.998753185371681e-06, + "loss": 0.5645, + "step": 295 + }, + { + "epoch": 0.04, + "learning_rate": 9.998707436459097e-06, + "loss": 0.5987, + "step": 296 + }, + { + "epoch": 0.04, + "learning_rate": 9.998660863385124e-06, + "loss": 0.556, + "step": 297 + }, + { + "epoch": 0.04, + "learning_rate": 9.998613466157437e-06, + "loss": 0.6012, + "step": 298 + }, + { + "epoch": 0.04, + "learning_rate": 9.998565244783855e-06, + "loss": 0.5912, + "step": 299 + }, + { + "epoch": 0.04, + "learning_rate": 9.998516199272327e-06, + "loss": 0.572, + "step": 300 + }, + { + "epoch": 0.04, + "learning_rate": 9.99846632963094e-06, + "loss": 0.6012, + "step": 301 + }, + { + "epoch": 0.04, + "learning_rate": 9.998415635867915e-06, + "loss": 0.6346, + "step": 302 + }, + { + "epoch": 0.04, + "learning_rate": 9.998364117991612e-06, + "loss": 0.5832, + "step": 303 + }, + { + "epoch": 0.04, + "learning_rate": 9.998311776010523e-06, + "loss": 0.6021, + "step": 304 + }, + { + "epoch": 0.04, + "learning_rate": 9.99825860993328e-06, + "loss": 0.5637, + "step": 305 + }, + { + "epoch": 0.04, + "learning_rate": 9.998204619768645e-06, + "loss": 0.5842, + "step": 306 + }, + { + "epoch": 0.04, + "learning_rate": 9.998149805525523e-06, + "loss": 0.6096, + "step": 307 + }, + { + "epoch": 0.04, + "learning_rate": 9.99809416721295e-06, + "loss": 0.648, + "step": 308 + }, + { + "epoch": 0.04, + "learning_rate": 9.998037704840103e-06, + "loss": 0.6455, + "step": 309 + }, + { + "epoch": 0.04, + "learning_rate": 9.997980418416285e-06, + "loss": 0.6036, + "step": 310 + }, + { + "epoch": 0.04, + "learning_rate": 9.997922307950945e-06, + "loss": 0.5672, + "step": 311 + }, + { + "epoch": 0.04, + "learning_rate": 9.997863373453664e-06, + "loss": 0.6748, + "step": 312 + }, + { + "epoch": 0.04, + "learning_rate": 9.997803614934155e-06, + "loss": 0.5883, + "step": 313 + }, + { + "epoch": 0.04, + "learning_rate": 9.997743032402278e-06, + "loss": 0.6474, + "step": 314 + }, + { + "epoch": 0.04, + "learning_rate": 9.997681625868014e-06, + "loss": 0.6223, + "step": 315 + }, + { + "epoch": 0.04, + "learning_rate": 9.997619395341494e-06, + "loss": 0.5873, + "step": 316 + }, + { + "epoch": 0.04, + "learning_rate": 9.997556340832972e-06, + "loss": 0.6793, + "step": 317 + }, + { + "epoch": 0.04, + "learning_rate": 9.997492462352846e-06, + "loss": 0.614, + "step": 318 + }, + { + "epoch": 0.04, + "learning_rate": 9.997427759911654e-06, + "loss": 0.5454, + "step": 319 + }, + { + "epoch": 0.04, + "learning_rate": 9.997362233520054e-06, + "loss": 0.6555, + "step": 320 + }, + { + "epoch": 0.04, + "learning_rate": 9.997295883188855e-06, + "loss": 0.6002, + "step": 321 + }, + { + "epoch": 0.04, + "learning_rate": 9.997228708928999e-06, + "loss": 0.517, + "step": 322 + }, + { + "epoch": 0.04, + "learning_rate": 9.997160710751555e-06, + "loss": 0.6173, + "step": 323 + }, + { + "epoch": 0.04, + "learning_rate": 9.997091888667739e-06, + "loss": 0.673, + "step": 324 + }, + { + "epoch": 0.04, + "learning_rate": 9.997022242688896e-06, + "loss": 0.5856, + "step": 325 + }, + { + "epoch": 0.04, + "learning_rate": 9.99695177282651e-06, + "loss": 0.6048, + "step": 326 + }, + { + "epoch": 0.04, + "learning_rate": 9.996880479092199e-06, + "loss": 0.6193, + "step": 327 + }, + { + "epoch": 0.04, + "learning_rate": 9.996808361497716e-06, + "loss": 0.5978, + "step": 328 + }, + { + "epoch": 0.04, + "learning_rate": 9.996735420054954e-06, + "loss": 0.6309, + "step": 329 + }, + { + "epoch": 0.04, + "learning_rate": 9.996661654775938e-06, + "loss": 0.6157, + "step": 330 + }, + { + "epoch": 0.04, + "learning_rate": 9.996587065672832e-06, + "loss": 0.6343, + "step": 331 + }, + { + "epoch": 0.04, + "learning_rate": 9.996511652757931e-06, + "loss": 0.6075, + "step": 332 + }, + { + "epoch": 0.04, + "learning_rate": 9.99643541604367e-06, + "loss": 0.6116, + "step": 333 + }, + { + "epoch": 0.04, + "learning_rate": 9.996358355542618e-06, + "loss": 0.6163, + "step": 334 + }, + { + "epoch": 0.04, + "learning_rate": 9.996280471267481e-06, + "loss": 0.5516, + "step": 335 + }, + { + "epoch": 0.04, + "learning_rate": 9.9962017632311e-06, + "loss": 0.639, + "step": 336 + }, + { + "epoch": 0.04, + "learning_rate": 9.996122231446451e-06, + "loss": 0.6594, + "step": 337 + }, + { + "epoch": 0.04, + "learning_rate": 9.996041875926648e-06, + "loss": 0.5872, + "step": 338 + }, + { + "epoch": 0.04, + "learning_rate": 9.995960696684939e-06, + "loss": 0.6464, + "step": 339 + }, + { + "epoch": 0.04, + "learning_rate": 9.99587869373471e-06, + "loss": 0.5691, + "step": 340 + }, + { + "epoch": 0.04, + "learning_rate": 9.99579586708948e-06, + "loss": 0.6007, + "step": 341 + }, + { + "epoch": 0.04, + "learning_rate": 9.995712216762903e-06, + "loss": 0.5649, + "step": 342 + }, + { + "epoch": 0.04, + "learning_rate": 9.995627742768774e-06, + "loss": 0.5905, + "step": 343 + }, + { + "epoch": 0.04, + "learning_rate": 9.995542445121019e-06, + "loss": 0.586, + "step": 344 + }, + { + "epoch": 0.04, + "learning_rate": 9.995456323833702e-06, + "loss": 0.5987, + "step": 345 + }, + { + "epoch": 0.04, + "learning_rate": 9.995369378921022e-06, + "loss": 0.6018, + "step": 346 + }, + { + "epoch": 0.04, + "learning_rate": 9.995281610397314e-06, + "loss": 0.6175, + "step": 347 + }, + { + "epoch": 0.04, + "learning_rate": 9.99519301827705e-06, + "loss": 0.5776, + "step": 348 + }, + { + "epoch": 0.04, + "learning_rate": 9.995103602574834e-06, + "loss": 0.5721, + "step": 349 + }, + { + "epoch": 0.04, + "learning_rate": 9.99501336330541e-06, + "loss": 0.6357, + "step": 350 + }, + { + "epoch": 0.04, + "learning_rate": 9.994922300483657e-06, + "loss": 0.6398, + "step": 351 + }, + { + "epoch": 0.04, + "learning_rate": 9.994830414124588e-06, + "loss": 0.5895, + "step": 352 + }, + { + "epoch": 0.04, + "learning_rate": 9.994737704243354e-06, + "loss": 0.5804, + "step": 353 + }, + { + "epoch": 0.04, + "learning_rate": 9.994644170855237e-06, + "loss": 0.5694, + "step": 354 + }, + { + "epoch": 0.04, + "learning_rate": 9.994549813975663e-06, + "loss": 0.2202, + "step": 355 + }, + { + "epoch": 0.04, + "learning_rate": 9.994454633620186e-06, + "loss": 0.5817, + "step": 356 + }, + { + "epoch": 0.04, + "learning_rate": 9.9943586298045e-06, + "loss": 0.2235, + "step": 357 + }, + { + "epoch": 0.04, + "learning_rate": 9.99426180254443e-06, + "loss": 0.6157, + "step": 358 + }, + { + "epoch": 0.05, + "learning_rate": 9.994164151855948e-06, + "loss": 0.5941, + "step": 359 + }, + { + "epoch": 0.05, + "learning_rate": 9.994065677755148e-06, + "loss": 0.6069, + "step": 360 + }, + { + "epoch": 0.05, + "learning_rate": 9.993966380258269e-06, + "loss": 0.5626, + "step": 361 + }, + { + "epoch": 0.05, + "learning_rate": 9.99386625938168e-06, + "loss": 0.632, + "step": 362 + }, + { + "epoch": 0.05, + "learning_rate": 9.99376531514189e-06, + "loss": 0.5902, + "step": 363 + }, + { + "epoch": 0.05, + "learning_rate": 9.993663547555542e-06, + "loss": 0.5985, + "step": 364 + }, + { + "epoch": 0.05, + "learning_rate": 9.993560956639415e-06, + "loss": 0.6676, + "step": 365 + }, + { + "epoch": 0.05, + "learning_rate": 9.993457542410424e-06, + "loss": 0.6173, + "step": 366 + }, + { + "epoch": 0.05, + "learning_rate": 9.993353304885618e-06, + "loss": 0.602, + "step": 367 + }, + { + "epoch": 0.05, + "learning_rate": 9.993248244082185e-06, + "loss": 0.5884, + "step": 368 + }, + { + "epoch": 0.05, + "learning_rate": 9.993142360017447e-06, + "loss": 0.5565, + "step": 369 + }, + { + "epoch": 0.05, + "learning_rate": 9.993035652708857e-06, + "loss": 0.5465, + "step": 370 + }, + { + "epoch": 0.05, + "learning_rate": 9.992928122174017e-06, + "loss": 0.6107, + "step": 371 + }, + { + "epoch": 0.05, + "learning_rate": 9.992819768430648e-06, + "loss": 0.6164, + "step": 372 + }, + { + "epoch": 0.05, + "learning_rate": 9.992710591496618e-06, + "loss": 0.596, + "step": 373 + }, + { + "epoch": 0.05, + "learning_rate": 9.992600591389927e-06, + "loss": 0.5967, + "step": 374 + }, + { + "epoch": 0.05, + "learning_rate": 9.992489768128714e-06, + "loss": 0.2007, + "step": 375 + }, + { + "epoch": 0.05, + "learning_rate": 9.992378121731246e-06, + "loss": 0.5798, + "step": 376 + }, + { + "epoch": 0.05, + "learning_rate": 9.992265652215935e-06, + "loss": 0.6091, + "step": 377 + }, + { + "epoch": 0.05, + "learning_rate": 9.992152359601323e-06, + "loss": 0.5787, + "step": 378 + }, + { + "epoch": 0.05, + "learning_rate": 9.992038243906087e-06, + "loss": 0.5565, + "step": 379 + }, + { + "epoch": 0.05, + "learning_rate": 9.991923305149045e-06, + "loss": 0.6849, + "step": 380 + }, + { + "epoch": 0.05, + "learning_rate": 9.991807543349148e-06, + "loss": 0.2328, + "step": 381 + }, + { + "epoch": 0.05, + "learning_rate": 9.991690958525477e-06, + "loss": 0.5811, + "step": 382 + }, + { + "epoch": 0.05, + "learning_rate": 9.991573550697258e-06, + "loss": 0.6348, + "step": 383 + }, + { + "epoch": 0.05, + "learning_rate": 9.991455319883849e-06, + "loss": 0.5791, + "step": 384 + }, + { + "epoch": 0.05, + "learning_rate": 9.991336266104744e-06, + "loss": 0.5731, + "step": 385 + }, + { + "epoch": 0.05, + "learning_rate": 9.991216389379566e-06, + "loss": 0.5926, + "step": 386 + }, + { + "epoch": 0.05, + "learning_rate": 9.991095689728088e-06, + "loss": 0.5937, + "step": 387 + }, + { + "epoch": 0.05, + "learning_rate": 9.990974167170203e-06, + "loss": 0.6294, + "step": 388 + }, + { + "epoch": 0.05, + "learning_rate": 9.990851821725951e-06, + "loss": 0.6032, + "step": 389 + }, + { + "epoch": 0.05, + "learning_rate": 9.990728653415504e-06, + "loss": 0.5685, + "step": 390 + }, + { + "epoch": 0.05, + "learning_rate": 9.990604662259167e-06, + "loss": 0.6196, + "step": 391 + }, + { + "epoch": 0.05, + "learning_rate": 9.990479848277386e-06, + "loss": 0.6314, + "step": 392 + }, + { + "epoch": 0.05, + "learning_rate": 9.990354211490736e-06, + "loss": 0.6241, + "step": 393 + }, + { + "epoch": 0.05, + "learning_rate": 9.990227751919935e-06, + "loss": 0.5802, + "step": 394 + }, + { + "epoch": 0.05, + "learning_rate": 9.990100469585831e-06, + "loss": 0.6641, + "step": 395 + }, + { + "epoch": 0.05, + "learning_rate": 9.989972364509408e-06, + "loss": 0.6122, + "step": 396 + }, + { + "epoch": 0.05, + "learning_rate": 9.989843436711793e-06, + "loss": 0.5524, + "step": 397 + }, + { + "epoch": 0.05, + "learning_rate": 9.989713686214236e-06, + "loss": 0.639, + "step": 398 + }, + { + "epoch": 0.05, + "learning_rate": 9.989583113038134e-06, + "loss": 0.5761, + "step": 399 + }, + { + "epoch": 0.05, + "learning_rate": 9.989451717205015e-06, + "loss": 0.5865, + "step": 400 + }, + { + "epoch": 0.05, + "learning_rate": 9.989319498736541e-06, + "loss": 0.6391, + "step": 401 + }, + { + "epoch": 0.05, + "learning_rate": 9.989186457654515e-06, + "loss": 0.6181, + "step": 402 + }, + { + "epoch": 0.05, + "learning_rate": 9.989052593980866e-06, + "loss": 0.5554, + "step": 403 + }, + { + "epoch": 0.05, + "learning_rate": 9.98891790773767e-06, + "loss": 0.2303, + "step": 404 + }, + { + "epoch": 0.05, + "learning_rate": 9.988782398947132e-06, + "loss": 0.6037, + "step": 405 + }, + { + "epoch": 0.05, + "learning_rate": 9.988646067631593e-06, + "loss": 0.5795, + "step": 406 + }, + { + "epoch": 0.05, + "learning_rate": 9.988508913813531e-06, + "loss": 0.6261, + "step": 407 + }, + { + "epoch": 0.05, + "learning_rate": 9.988370937515562e-06, + "loss": 0.6231, + "step": 408 + }, + { + "epoch": 0.05, + "learning_rate": 9.98823213876043e-06, + "loss": 0.4999, + "step": 409 + }, + { + "epoch": 0.05, + "learning_rate": 9.988092517571024e-06, + "loss": 0.6461, + "step": 410 + }, + { + "epoch": 0.05, + "learning_rate": 9.98795207397036e-06, + "loss": 0.6034, + "step": 411 + }, + { + "epoch": 0.05, + "learning_rate": 9.987810807981598e-06, + "loss": 0.6398, + "step": 412 + }, + { + "epoch": 0.05, + "learning_rate": 9.987668719628023e-06, + "loss": 0.6375, + "step": 413 + }, + { + "epoch": 0.05, + "learning_rate": 9.987525808933069e-06, + "loss": 0.5566, + "step": 414 + }, + { + "epoch": 0.05, + "learning_rate": 9.987382075920293e-06, + "loss": 0.5792, + "step": 415 + }, + { + "epoch": 0.05, + "learning_rate": 9.987237520613395e-06, + "loss": 0.6348, + "step": 416 + }, + { + "epoch": 0.05, + "learning_rate": 9.98709214303621e-06, + "loss": 0.5908, + "step": 417 + }, + { + "epoch": 0.05, + "learning_rate": 9.986945943212704e-06, + "loss": 0.5547, + "step": 418 + }, + { + "epoch": 0.05, + "learning_rate": 9.986798921166984e-06, + "loss": 0.561, + "step": 419 + }, + { + "epoch": 0.05, + "learning_rate": 9.986651076923288e-06, + "loss": 0.5876, + "step": 420 + }, + { + "epoch": 0.05, + "learning_rate": 9.986502410505996e-06, + "loss": 0.6022, + "step": 421 + }, + { + "epoch": 0.05, + "learning_rate": 9.986352921939615e-06, + "loss": 0.637, + "step": 422 + }, + { + "epoch": 0.05, + "learning_rate": 9.986202611248794e-06, + "loss": 0.6469, + "step": 423 + }, + { + "epoch": 0.05, + "learning_rate": 9.986051478458314e-06, + "loss": 0.5545, + "step": 424 + }, + { + "epoch": 0.05, + "learning_rate": 9.985899523593094e-06, + "loss": 0.5766, + "step": 425 + }, + { + "epoch": 0.05, + "learning_rate": 9.98574674667819e-06, + "loss": 0.2454, + "step": 426 + }, + { + "epoch": 0.05, + "learning_rate": 9.985593147738788e-06, + "loss": 0.5333, + "step": 427 + }, + { + "epoch": 0.05, + "learning_rate": 9.985438726800212e-06, + "loss": 0.6145, + "step": 428 + }, + { + "epoch": 0.05, + "learning_rate": 9.985283483887923e-06, + "loss": 0.6263, + "step": 429 + }, + { + "epoch": 0.05, + "learning_rate": 9.985127419027518e-06, + "loss": 0.5813, + "step": 430 + }, + { + "epoch": 0.05, + "learning_rate": 9.984970532244726e-06, + "loss": 0.2257, + "step": 431 + }, + { + "epoch": 0.05, + "learning_rate": 9.984812823565417e-06, + "loss": 0.5278, + "step": 432 + }, + { + "epoch": 0.05, + "learning_rate": 9.98465429301559e-06, + "loss": 0.621, + "step": 433 + }, + { + "epoch": 0.05, + "learning_rate": 9.984494940621383e-06, + "loss": 0.6338, + "step": 434 + }, + { + "epoch": 0.05, + "learning_rate": 9.984334766409072e-06, + "loss": 0.5992, + "step": 435 + }, + { + "epoch": 0.05, + "learning_rate": 9.984173770405063e-06, + "loss": 0.5939, + "step": 436 + }, + { + "epoch": 0.05, + "learning_rate": 9.9840119526359e-06, + "loss": 0.6485, + "step": 437 + }, + { + "epoch": 0.05, + "learning_rate": 9.983849313128265e-06, + "loss": 0.585, + "step": 438 + }, + { + "epoch": 0.06, + "learning_rate": 9.98368585190897e-06, + "loss": 0.6349, + "step": 439 + }, + { + "epoch": 0.06, + "learning_rate": 9.98352156900497e-06, + "loss": 0.5333, + "step": 440 + }, + { + "epoch": 0.06, + "learning_rate": 9.983356464443349e-06, + "loss": 0.6073, + "step": 441 + }, + { + "epoch": 0.06, + "learning_rate": 9.983190538251325e-06, + "loss": 0.5117, + "step": 442 + }, + { + "epoch": 0.06, + "learning_rate": 9.98302379045626e-06, + "loss": 0.5557, + "step": 443 + }, + { + "epoch": 0.06, + "learning_rate": 9.982856221085644e-06, + "loss": 0.5884, + "step": 444 + }, + { + "epoch": 0.06, + "learning_rate": 9.982687830167106e-06, + "loss": 0.2244, + "step": 445 + }, + { + "epoch": 0.06, + "learning_rate": 9.98251861772841e-06, + "loss": 0.6127, + "step": 446 + }, + { + "epoch": 0.06, + "learning_rate": 9.982348583797454e-06, + "loss": 0.5372, + "step": 447 + }, + { + "epoch": 0.06, + "learning_rate": 9.98217772840227e-06, + "loss": 0.5772, + "step": 448 + }, + { + "epoch": 0.06, + "learning_rate": 9.982006051571034e-06, + "loss": 0.6316, + "step": 449 + }, + { + "epoch": 0.06, + "learning_rate": 9.981833553332045e-06, + "loss": 0.6493, + "step": 450 + }, + { + "epoch": 0.06, + "learning_rate": 9.981660233713748e-06, + "loss": 0.6529, + "step": 451 + }, + { + "epoch": 0.06, + "learning_rate": 9.981486092744715e-06, + "loss": 0.2349, + "step": 452 + }, + { + "epoch": 0.06, + "learning_rate": 9.98131113045366e-06, + "loss": 0.6309, + "step": 453 + }, + { + "epoch": 0.06, + "learning_rate": 9.98113534686943e-06, + "loss": 0.6674, + "step": 454 + }, + { + "epoch": 0.06, + "learning_rate": 9.980958742021006e-06, + "loss": 0.5727, + "step": 455 + }, + { + "epoch": 0.06, + "learning_rate": 9.980781315937507e-06, + "loss": 0.6016, + "step": 456 + }, + { + "epoch": 0.06, + "learning_rate": 9.980603068648186e-06, + "loss": 0.646, + "step": 457 + }, + { + "epoch": 0.06, + "learning_rate": 9.98042400018243e-06, + "loss": 0.5717, + "step": 458 + }, + { + "epoch": 0.06, + "learning_rate": 9.980244110569765e-06, + "loss": 0.6555, + "step": 459 + }, + { + "epoch": 0.06, + "learning_rate": 9.98006339983985e-06, + "loss": 0.6149, + "step": 460 + }, + { + "epoch": 0.06, + "learning_rate": 9.979881868022477e-06, + "loss": 0.6068, + "step": 461 + }, + { + "epoch": 0.06, + "learning_rate": 9.979699515147579e-06, + "loss": 0.6073, + "step": 462 + }, + { + "epoch": 0.06, + "learning_rate": 9.97951634124522e-06, + "loss": 0.6428, + "step": 463 + }, + { + "epoch": 0.06, + "learning_rate": 9.979332346345601e-06, + "loss": 0.2204, + "step": 464 + }, + { + "epoch": 0.06, + "learning_rate": 9.979147530479057e-06, + "loss": 0.5756, + "step": 465 + }, + { + "epoch": 0.06, + "learning_rate": 9.978961893676062e-06, + "loss": 0.5771, + "step": 466 + }, + { + "epoch": 0.06, + "learning_rate": 9.978775435967221e-06, + "loss": 0.5672, + "step": 467 + }, + { + "epoch": 0.06, + "learning_rate": 9.978588157383277e-06, + "loss": 0.5828, + "step": 468 + }, + { + "epoch": 0.06, + "learning_rate": 9.978400057955108e-06, + "loss": 0.6237, + "step": 469 + }, + { + "epoch": 0.06, + "learning_rate": 9.978211137713727e-06, + "loss": 0.2517, + "step": 470 + }, + { + "epoch": 0.06, + "learning_rate": 9.97802139669028e-06, + "loss": 0.2372, + "step": 471 + }, + { + "epoch": 0.06, + "learning_rate": 9.977830834916053e-06, + "loss": 0.5799, + "step": 472 + }, + { + "epoch": 0.06, + "learning_rate": 9.977639452422463e-06, + "loss": 0.6172, + "step": 473 + }, + { + "epoch": 0.06, + "learning_rate": 9.977447249241065e-06, + "loss": 0.5582, + "step": 474 + }, + { + "epoch": 0.06, + "learning_rate": 9.977254225403549e-06, + "loss": 0.5957, + "step": 475 + }, + { + "epoch": 0.06, + "learning_rate": 9.97706038094174e-06, + "loss": 0.5719, + "step": 476 + }, + { + "epoch": 0.06, + "learning_rate": 9.976865715887595e-06, + "loss": 0.5764, + "step": 477 + }, + { + "epoch": 0.06, + "learning_rate": 9.976670230273215e-06, + "loss": 0.5952, + "step": 478 + }, + { + "epoch": 0.06, + "learning_rate": 9.976473924130824e-06, + "loss": 0.6304, + "step": 479 + }, + { + "epoch": 0.06, + "learning_rate": 9.976276797492793e-06, + "loss": 0.6072, + "step": 480 + }, + { + "epoch": 0.06, + "learning_rate": 9.976078850391622e-06, + "loss": 0.5558, + "step": 481 + }, + { + "epoch": 0.06, + "learning_rate": 9.975880082859945e-06, + "loss": 0.5095, + "step": 482 + }, + { + "epoch": 0.06, + "learning_rate": 9.975680494930538e-06, + "loss": 0.6154, + "step": 483 + }, + { + "epoch": 0.06, + "learning_rate": 9.975480086636307e-06, + "loss": 0.5803, + "step": 484 + }, + { + "epoch": 0.06, + "learning_rate": 9.975278858010292e-06, + "loss": 0.659, + "step": 485 + }, + { + "epoch": 0.06, + "learning_rate": 9.97507680908567e-06, + "loss": 0.6322, + "step": 486 + }, + { + "epoch": 0.06, + "learning_rate": 9.974873939895756e-06, + "loss": 0.5983, + "step": 487 + }, + { + "epoch": 0.06, + "learning_rate": 9.974670250474e-06, + "loss": 0.6348, + "step": 488 + }, + { + "epoch": 0.06, + "learning_rate": 9.974465740853981e-06, + "loss": 0.6687, + "step": 489 + }, + { + "epoch": 0.06, + "learning_rate": 9.974260411069419e-06, + "loss": 0.6173, + "step": 490 + }, + { + "epoch": 0.06, + "learning_rate": 9.974054261154168e-06, + "loss": 0.6045, + "step": 491 + }, + { + "epoch": 0.06, + "learning_rate": 9.973847291142218e-06, + "loss": 0.5205, + "step": 492 + }, + { + "epoch": 0.06, + "learning_rate": 9.97363950106769e-06, + "loss": 0.6037, + "step": 493 + }, + { + "epoch": 0.06, + "learning_rate": 9.973430890964849e-06, + "loss": 0.6175, + "step": 494 + }, + { + "epoch": 0.06, + "learning_rate": 9.973221460868086e-06, + "loss": 0.6393, + "step": 495 + }, + { + "epoch": 0.06, + "learning_rate": 9.97301121081193e-06, + "loss": 0.592, + "step": 496 + }, + { + "epoch": 0.06, + "learning_rate": 9.972800140831044e-06, + "loss": 0.2236, + "step": 497 + }, + { + "epoch": 0.06, + "learning_rate": 9.972588250960235e-06, + "loss": 0.6383, + "step": 498 + }, + { + "epoch": 0.06, + "learning_rate": 9.972375541234432e-06, + "loss": 0.6185, + "step": 499 + }, + { + "epoch": 0.06, + "learning_rate": 9.97216201168871e-06, + "loss": 0.6195, + "step": 500 + }, + { + "epoch": 0.06, + "learning_rate": 9.971947662358271e-06, + "loss": 0.5889, + "step": 501 + }, + { + "epoch": 0.06, + "learning_rate": 9.971732493278457e-06, + "loss": 0.608, + "step": 502 + }, + { + "epoch": 0.06, + "learning_rate": 9.971516504484744e-06, + "loss": 0.6111, + "step": 503 + }, + { + "epoch": 0.06, + "learning_rate": 9.971299696012744e-06, + "loss": 0.5625, + "step": 504 + }, + { + "epoch": 0.06, + "learning_rate": 9.971082067898203e-06, + "loss": 0.6523, + "step": 505 + }, + { + "epoch": 0.06, + "learning_rate": 9.970863620177e-06, + "loss": 0.5951, + "step": 506 + }, + { + "epoch": 0.06, + "learning_rate": 9.970644352885157e-06, + "loss": 0.5518, + "step": 507 + }, + { + "epoch": 0.06, + "learning_rate": 9.97042426605882e-06, + "loss": 0.5276, + "step": 508 + }, + { + "epoch": 0.06, + "learning_rate": 9.97020335973428e-06, + "loss": 0.6572, + "step": 509 + }, + { + "epoch": 0.06, + "learning_rate": 9.969981633947956e-06, + "loss": 0.5814, + "step": 510 + }, + { + "epoch": 0.06, + "learning_rate": 9.969759088736407e-06, + "loss": 0.5828, + "step": 511 + }, + { + "epoch": 0.06, + "learning_rate": 9.969535724136321e-06, + "loss": 0.6065, + "step": 512 + }, + { + "epoch": 0.06, + "learning_rate": 9.969311540184532e-06, + "loss": 0.5974, + "step": 513 + }, + { + "epoch": 0.06, + "learning_rate": 9.969086536917996e-06, + "loss": 0.612, + "step": 514 + }, + { + "epoch": 0.06, + "learning_rate": 9.968860714373814e-06, + "loss": 0.6269, + "step": 515 + }, + { + "epoch": 0.06, + "learning_rate": 9.968634072589219e-06, + "loss": 0.584, + "step": 516 + }, + { + "epoch": 0.06, + "learning_rate": 9.968406611601575e-06, + "loss": 0.5693, + "step": 517 + }, + { + "epoch": 0.06, + "learning_rate": 9.968178331448389e-06, + "loss": 0.5545, + "step": 518 + }, + { + "epoch": 0.07, + "learning_rate": 9.967949232167295e-06, + "loss": 0.6075, + "step": 519 + }, + { + "epoch": 0.07, + "learning_rate": 9.967719313796067e-06, + "loss": 0.6013, + "step": 520 + }, + { + "epoch": 0.07, + "learning_rate": 9.967488576372613e-06, + "loss": 0.5387, + "step": 521 + }, + { + "epoch": 0.07, + "learning_rate": 9.967257019934976e-06, + "loss": 0.1871, + "step": 522 + }, + { + "epoch": 0.07, + "learning_rate": 9.967024644521332e-06, + "loss": 0.5975, + "step": 523 + }, + { + "epoch": 0.07, + "learning_rate": 9.966791450169996e-06, + "loss": 0.5445, + "step": 524 + }, + { + "epoch": 0.07, + "learning_rate": 9.966557436919416e-06, + "loss": 0.5583, + "step": 525 + }, + { + "epoch": 0.07, + "learning_rate": 9.966322604808174e-06, + "loss": 0.5938, + "step": 526 + }, + { + "epoch": 0.07, + "learning_rate": 9.966086953874989e-06, + "loss": 0.5686, + "step": 527 + }, + { + "epoch": 0.07, + "learning_rate": 9.96585048415871e-06, + "loss": 0.6126, + "step": 528 + }, + { + "epoch": 0.07, + "learning_rate": 9.965613195698332e-06, + "loss": 0.6308, + "step": 529 + }, + { + "epoch": 0.07, + "learning_rate": 9.96537508853297e-06, + "loss": 0.6082, + "step": 530 + }, + { + "epoch": 0.07, + "learning_rate": 9.965136162701889e-06, + "loss": 0.6479, + "step": 531 + }, + { + "epoch": 0.07, + "learning_rate": 9.964896418244477e-06, + "loss": 0.6339, + "step": 532 + }, + { + "epoch": 0.07, + "learning_rate": 9.964655855200264e-06, + "loss": 0.5589, + "step": 533 + }, + { + "epoch": 0.07, + "learning_rate": 9.964414473608912e-06, + "loss": 0.5807, + "step": 534 + }, + { + "epoch": 0.07, + "learning_rate": 9.96417227351022e-06, + "loss": 0.5579, + "step": 535 + }, + { + "epoch": 0.07, + "learning_rate": 9.963929254944117e-06, + "loss": 0.5956, + "step": 536 + }, + { + "epoch": 0.07, + "learning_rate": 9.963685417950678e-06, + "loss": 0.6111, + "step": 537 + }, + { + "epoch": 0.07, + "learning_rate": 9.963440762570098e-06, + "loss": 0.6783, + "step": 538 + }, + { + "epoch": 0.07, + "learning_rate": 9.963195288842717e-06, + "loss": 0.241, + "step": 539 + }, + { + "epoch": 0.07, + "learning_rate": 9.962948996809008e-06, + "loss": 0.6303, + "step": 540 + }, + { + "epoch": 0.07, + "learning_rate": 9.96270188650958e-06, + "loss": 0.5992, + "step": 541 + }, + { + "epoch": 0.07, + "learning_rate": 9.962453957985173e-06, + "loss": 0.6158, + "step": 542 + }, + { + "epoch": 0.07, + "learning_rate": 9.962205211276666e-06, + "loss": 0.5658, + "step": 543 + }, + { + "epoch": 0.07, + "learning_rate": 9.961955646425068e-06, + "loss": 0.5839, + "step": 544 + }, + { + "epoch": 0.07, + "learning_rate": 9.961705263471528e-06, + "loss": 0.5197, + "step": 545 + }, + { + "epoch": 0.07, + "learning_rate": 9.96145406245733e-06, + "loss": 0.5489, + "step": 546 + }, + { + "epoch": 0.07, + "learning_rate": 9.961202043423887e-06, + "loss": 0.6114, + "step": 547 + }, + { + "epoch": 0.07, + "learning_rate": 9.960949206412751e-06, + "loss": 0.1962, + "step": 548 + }, + { + "epoch": 0.07, + "learning_rate": 9.96069555146561e-06, + "loss": 0.5691, + "step": 549 + }, + { + "epoch": 0.07, + "learning_rate": 9.960441078624288e-06, + "loss": 0.5648, + "step": 550 + }, + { + "epoch": 0.07, + "learning_rate": 9.960185787930734e-06, + "loss": 0.5203, + "step": 551 + }, + { + "epoch": 0.07, + "learning_rate": 9.959929679427047e-06, + "loss": 0.5303, + "step": 552 + }, + { + "epoch": 0.07, + "learning_rate": 9.959672753155448e-06, + "loss": 0.6017, + "step": 553 + }, + { + "epoch": 0.07, + "learning_rate": 9.959415009158296e-06, + "loss": 0.5416, + "step": 554 + }, + { + "epoch": 0.07, + "learning_rate": 9.959156447478091e-06, + "loss": 0.6001, + "step": 555 + }, + { + "epoch": 0.07, + "learning_rate": 9.958897068157462e-06, + "loss": 0.6219, + "step": 556 + }, + { + "epoch": 0.07, + "learning_rate": 9.958636871239174e-06, + "loss": 0.6215, + "step": 557 + }, + { + "epoch": 0.07, + "learning_rate": 9.958375856766128e-06, + "loss": 0.5874, + "step": 558 + }, + { + "epoch": 0.07, + "learning_rate": 9.958114024781354e-06, + "loss": 0.6148, + "step": 559 + }, + { + "epoch": 0.07, + "learning_rate": 9.957851375328027e-06, + "loss": 0.5965, + "step": 560 + }, + { + "epoch": 0.07, + "learning_rate": 9.957587908449448e-06, + "loss": 0.6497, + "step": 561 + }, + { + "epoch": 0.07, + "learning_rate": 9.957323624189058e-06, + "loss": 0.6113, + "step": 562 + }, + { + "epoch": 0.07, + "learning_rate": 9.957058522590431e-06, + "loss": 0.6337, + "step": 563 + }, + { + "epoch": 0.07, + "learning_rate": 9.956792603697274e-06, + "loss": 0.6395, + "step": 564 + }, + { + "epoch": 0.07, + "learning_rate": 9.95652586755343e-06, + "loss": 0.583, + "step": 565 + }, + { + "epoch": 0.07, + "learning_rate": 9.95625831420288e-06, + "loss": 0.5621, + "step": 566 + }, + { + "epoch": 0.07, + "learning_rate": 9.955989943689734e-06, + "loss": 0.5932, + "step": 567 + }, + { + "epoch": 0.07, + "learning_rate": 9.955720756058241e-06, + "loss": 0.604, + "step": 568 + }, + { + "epoch": 0.07, + "learning_rate": 9.955450751352783e-06, + "loss": 0.2085, + "step": 569 + }, + { + "epoch": 0.07, + "learning_rate": 9.955179929617875e-06, + "loss": 0.5475, + "step": 570 + }, + { + "epoch": 0.07, + "learning_rate": 9.954908290898174e-06, + "loss": 0.6218, + "step": 571 + }, + { + "epoch": 0.07, + "learning_rate": 9.95463583523846e-06, + "loss": 0.6282, + "step": 572 + }, + { + "epoch": 0.07, + "learning_rate": 9.954362562683658e-06, + "loss": 0.5963, + "step": 573 + }, + { + "epoch": 0.07, + "learning_rate": 9.954088473278823e-06, + "loss": 0.6141, + "step": 574 + }, + { + "epoch": 0.07, + "learning_rate": 9.953813567069146e-06, + "loss": 0.5903, + "step": 575 + }, + { + "epoch": 0.07, + "learning_rate": 9.95353784409995e-06, + "loss": 0.5325, + "step": 576 + }, + { + "epoch": 0.07, + "learning_rate": 9.953261304416697e-06, + "loss": 0.6287, + "step": 577 + }, + { + "epoch": 0.07, + "learning_rate": 9.95298394806498e-06, + "loss": 0.6103, + "step": 578 + }, + { + "epoch": 0.07, + "learning_rate": 9.95270577509053e-06, + "loss": 0.5664, + "step": 579 + }, + { + "epoch": 0.07, + "learning_rate": 9.952426785539208e-06, + "loss": 0.6346, + "step": 580 + }, + { + "epoch": 0.07, + "learning_rate": 9.952146979457016e-06, + "loss": 0.5744, + "step": 581 + }, + { + "epoch": 0.07, + "learning_rate": 9.951866356890084e-06, + "loss": 0.5939, + "step": 582 + }, + { + "epoch": 0.07, + "learning_rate": 9.951584917884681e-06, + "loss": 0.6099, + "step": 583 + }, + { + "epoch": 0.07, + "learning_rate": 9.951302662487207e-06, + "loss": 0.5885, + "step": 584 + }, + { + "epoch": 0.07, + "learning_rate": 9.951019590744203e-06, + "loss": 0.5999, + "step": 585 + }, + { + "epoch": 0.07, + "learning_rate": 9.950735702702336e-06, + "loss": 0.5764, + "step": 586 + }, + { + "epoch": 0.07, + "learning_rate": 9.950450998408417e-06, + "loss": 0.65, + "step": 587 + }, + { + "epoch": 0.07, + "learning_rate": 9.95016547790938e-06, + "loss": 0.5732, + "step": 588 + }, + { + "epoch": 0.07, + "learning_rate": 9.949879141252306e-06, + "loss": 0.6114, + "step": 589 + }, + { + "epoch": 0.07, + "learning_rate": 9.949591988484401e-06, + "loss": 0.555, + "step": 590 + }, + { + "epoch": 0.07, + "learning_rate": 9.949304019653012e-06, + "loss": 0.6259, + "step": 591 + }, + { + "epoch": 0.07, + "learning_rate": 9.949015234805617e-06, + "loss": 0.5775, + "step": 592 + }, + { + "epoch": 0.07, + "learning_rate": 9.94872563398983e-06, + "loss": 0.5188, + "step": 593 + }, + { + "epoch": 0.07, + "learning_rate": 9.948435217253394e-06, + "loss": 0.5931, + "step": 594 + }, + { + "epoch": 0.07, + "learning_rate": 9.948143984644198e-06, + "loss": 0.6209, + "step": 595 + }, + { + "epoch": 0.07, + "learning_rate": 9.947851936210256e-06, + "loss": 0.5837, + "step": 596 + }, + { + "epoch": 0.07, + "learning_rate": 9.947559071999719e-06, + "loss": 0.2323, + "step": 597 + }, + { + "epoch": 0.07, + "learning_rate": 9.947265392060875e-06, + "loss": 0.6379, + "step": 598 + }, + { + "epoch": 0.08, + "learning_rate": 9.946970896442142e-06, + "loss": 0.6144, + "step": 599 + }, + { + "epoch": 0.08, + "learning_rate": 9.946675585192076e-06, + "loss": 0.5658, + "step": 600 + }, + { + "epoch": 0.08, + "learning_rate": 9.946379458359368e-06, + "loss": 0.6201, + "step": 601 + }, + { + "epoch": 0.08, + "learning_rate": 9.94608251599284e-06, + "loss": 0.2193, + "step": 602 + }, + { + "epoch": 0.08, + "learning_rate": 9.945784758141449e-06, + "loss": 0.5549, + "step": 603 + }, + { + "epoch": 0.08, + "learning_rate": 9.945486184854292e-06, + "loss": 0.6057, + "step": 604 + }, + { + "epoch": 0.08, + "learning_rate": 9.945186796180592e-06, + "loss": 0.664, + "step": 605 + }, + { + "epoch": 0.08, + "learning_rate": 9.944886592169712e-06, + "loss": 0.5841, + "step": 606 + }, + { + "epoch": 0.08, + "learning_rate": 9.94458557287115e-06, + "loss": 0.6322, + "step": 607 + }, + { + "epoch": 0.08, + "learning_rate": 9.944283738334535e-06, + "loss": 0.5567, + "step": 608 + }, + { + "epoch": 0.08, + "learning_rate": 9.94398108860963e-06, + "loss": 0.609, + "step": 609 + }, + { + "epoch": 0.08, + "learning_rate": 9.94367762374634e-06, + "loss": 0.224, + "step": 610 + }, + { + "epoch": 0.08, + "learning_rate": 9.943373343794693e-06, + "loss": 0.6363, + "step": 611 + }, + { + "epoch": 0.08, + "learning_rate": 9.94306824880486e-06, + "loss": 0.6335, + "step": 612 + }, + { + "epoch": 0.08, + "learning_rate": 9.942762338827142e-06, + "loss": 0.6265, + "step": 613 + }, + { + "epoch": 0.08, + "learning_rate": 9.942455613911976e-06, + "loss": 0.6478, + "step": 614 + }, + { + "epoch": 0.08, + "learning_rate": 9.942148074109934e-06, + "loss": 0.6055, + "step": 615 + }, + { + "epoch": 0.08, + "learning_rate": 9.941839719471722e-06, + "loss": 0.5737, + "step": 616 + }, + { + "epoch": 0.08, + "learning_rate": 9.94153055004818e-06, + "loss": 0.5806, + "step": 617 + }, + { + "epoch": 0.08, + "learning_rate": 9.94122056589028e-06, + "loss": 0.5717, + "step": 618 + }, + { + "epoch": 0.08, + "learning_rate": 9.940909767049133e-06, + "loss": 0.5967, + "step": 619 + }, + { + "epoch": 0.08, + "learning_rate": 9.940598153575983e-06, + "loss": 0.676, + "step": 620 + }, + { + "epoch": 0.08, + "learning_rate": 9.940285725522203e-06, + "loss": 0.6189, + "step": 621 + }, + { + "epoch": 0.08, + "learning_rate": 9.939972482939307e-06, + "loss": 0.5996, + "step": 622 + }, + { + "epoch": 0.08, + "learning_rate": 9.939658425878943e-06, + "loss": 0.5705, + "step": 623 + }, + { + "epoch": 0.08, + "learning_rate": 9.939343554392887e-06, + "loss": 0.5623, + "step": 624 + }, + { + "epoch": 0.08, + "learning_rate": 9.939027868533056e-06, + "loss": 0.6253, + "step": 625 + }, + { + "epoch": 0.08, + "learning_rate": 9.938711368351498e-06, + "loss": 0.5422, + "step": 626 + }, + { + "epoch": 0.08, + "learning_rate": 9.938394053900396e-06, + "loss": 0.5604, + "step": 627 + }, + { + "epoch": 0.08, + "learning_rate": 9.938075925232066e-06, + "loss": 0.5581, + "step": 628 + }, + { + "epoch": 0.08, + "learning_rate": 9.937756982398963e-06, + "loss": 0.6624, + "step": 629 + }, + { + "epoch": 0.08, + "learning_rate": 9.937437225453669e-06, + "loss": 0.587, + "step": 630 + }, + { + "epoch": 0.08, + "learning_rate": 9.937116654448904e-06, + "loss": 0.5922, + "step": 631 + }, + { + "epoch": 0.08, + "learning_rate": 9.936795269437526e-06, + "loss": 0.5759, + "step": 632 + }, + { + "epoch": 0.08, + "learning_rate": 9.93647307047252e-06, + "loss": 0.5826, + "step": 633 + }, + { + "epoch": 0.08, + "learning_rate": 9.936150057607005e-06, + "loss": 0.5656, + "step": 634 + }, + { + "epoch": 0.08, + "learning_rate": 9.935826230894246e-06, + "loss": 0.6183, + "step": 635 + }, + { + "epoch": 0.08, + "learning_rate": 9.935501590387629e-06, + "loss": 0.5222, + "step": 636 + }, + { + "epoch": 0.08, + "learning_rate": 9.93517613614068e-06, + "loss": 0.5914, + "step": 637 + }, + { + "epoch": 0.08, + "learning_rate": 9.934849868207057e-06, + "loss": 0.6161, + "step": 638 + }, + { + "epoch": 0.08, + "learning_rate": 9.934522786640555e-06, + "loss": 0.4886, + "step": 639 + }, + { + "epoch": 0.08, + "learning_rate": 9.9341948914951e-06, + "loss": 0.6204, + "step": 640 + }, + { + "epoch": 0.08, + "learning_rate": 9.933866182824757e-06, + "loss": 0.6342, + "step": 641 + }, + { + "epoch": 0.08, + "learning_rate": 9.933536660683718e-06, + "loss": 0.5478, + "step": 642 + }, + { + "epoch": 0.08, + "learning_rate": 9.933206325126312e-06, + "loss": 0.607, + "step": 643 + }, + { + "epoch": 0.08, + "learning_rate": 9.932875176207008e-06, + "loss": 0.5571, + "step": 644 + }, + { + "epoch": 0.08, + "learning_rate": 9.932543213980402e-06, + "loss": 0.5783, + "step": 645 + }, + { + "epoch": 0.08, + "learning_rate": 9.932210438501224e-06, + "loss": 0.6146, + "step": 646 + }, + { + "epoch": 0.08, + "learning_rate": 9.931876849824344e-06, + "loss": 0.633, + "step": 647 + }, + { + "epoch": 0.08, + "learning_rate": 9.93154244800476e-06, + "loss": 0.6162, + "step": 648 + }, + { + "epoch": 0.08, + "learning_rate": 9.931207233097605e-06, + "loss": 0.5834, + "step": 649 + }, + { + "epoch": 0.08, + "learning_rate": 9.930871205158152e-06, + "loss": 0.6159, + "step": 650 + }, + { + "epoch": 0.08, + "learning_rate": 9.930534364241801e-06, + "loss": 0.6551, + "step": 651 + }, + { + "epoch": 0.08, + "learning_rate": 9.930196710404088e-06, + "loss": 0.5751, + "step": 652 + }, + { + "epoch": 0.08, + "learning_rate": 9.929858243700684e-06, + "loss": 0.5927, + "step": 653 + }, + { + "epoch": 0.08, + "learning_rate": 9.929518964187395e-06, + "loss": 0.5991, + "step": 654 + }, + { + "epoch": 0.08, + "learning_rate": 9.929178871920157e-06, + "loss": 0.6327, + "step": 655 + }, + { + "epoch": 0.08, + "learning_rate": 9.928837966955045e-06, + "loss": 0.5563, + "step": 656 + }, + { + "epoch": 0.08, + "learning_rate": 9.928496249348265e-06, + "loss": 0.5872, + "step": 657 + }, + { + "epoch": 0.08, + "learning_rate": 9.92815371915616e-06, + "loss": 0.5714, + "step": 658 + }, + { + "epoch": 0.08, + "learning_rate": 9.9278103764352e-06, + "loss": 0.61, + "step": 659 + }, + { + "epoch": 0.08, + "learning_rate": 9.927466221241995e-06, + "loss": 0.5732, + "step": 660 + }, + { + "epoch": 0.08, + "learning_rate": 9.92712125363329e-06, + "loss": 0.5869, + "step": 661 + }, + { + "epoch": 0.08, + "learning_rate": 9.92677547366596e-06, + "loss": 0.6422, + "step": 662 + }, + { + "epoch": 0.08, + "learning_rate": 9.926428881397015e-06, + "loss": 0.6296, + "step": 663 + }, + { + "epoch": 0.08, + "learning_rate": 9.9260814768836e-06, + "loss": 0.6006, + "step": 664 + }, + { + "epoch": 0.08, + "learning_rate": 9.925733260182994e-06, + "loss": 0.6096, + "step": 665 + }, + { + "epoch": 0.08, + "learning_rate": 9.925384231352607e-06, + "loss": 0.5377, + "step": 666 + }, + { + "epoch": 0.08, + "learning_rate": 9.925034390449987e-06, + "loss": 0.6948, + "step": 667 + }, + { + "epoch": 0.08, + "learning_rate": 9.924683737532812e-06, + "loss": 0.5595, + "step": 668 + }, + { + "epoch": 0.08, + "learning_rate": 9.9243322726589e-06, + "loss": 0.5989, + "step": 669 + }, + { + "epoch": 0.08, + "learning_rate": 9.923979995886192e-06, + "loss": 0.2234, + "step": 670 + }, + { + "epoch": 0.08, + "learning_rate": 9.923626907272777e-06, + "loss": 0.6469, + "step": 671 + }, + { + "epoch": 0.08, + "learning_rate": 9.923273006876865e-06, + "loss": 0.6013, + "step": 672 + }, + { + "epoch": 0.08, + "learning_rate": 9.922918294756807e-06, + "loss": 0.6017, + "step": 673 + }, + { + "epoch": 0.08, + "learning_rate": 9.922562770971088e-06, + "loss": 0.5973, + "step": 674 + }, + { + "epoch": 0.08, + "learning_rate": 9.922206435578324e-06, + "loss": 0.6354, + "step": 675 + }, + { + "epoch": 0.08, + "learning_rate": 9.921849288637262e-06, + "loss": 0.5759, + "step": 676 + }, + { + "epoch": 0.08, + "learning_rate": 9.921491330206792e-06, + "loss": 0.5655, + "step": 677 + }, + { + "epoch": 0.08, + "learning_rate": 9.92113256034593e-06, + "loss": 0.5758, + "step": 678 + }, + { + "epoch": 0.09, + "learning_rate": 9.920772979113827e-06, + "loss": 0.5935, + "step": 679 + }, + { + "epoch": 0.09, + "learning_rate": 9.920412586569769e-06, + "loss": 0.6096, + "step": 680 + }, + { + "epoch": 0.09, + "learning_rate": 9.920051382773179e-06, + "loss": 0.5557, + "step": 681 + }, + { + "epoch": 0.09, + "learning_rate": 9.919689367783609e-06, + "loss": 0.5438, + "step": 682 + }, + { + "epoch": 0.09, + "learning_rate": 9.919326541660744e-06, + "loss": 0.5684, + "step": 683 + }, + { + "epoch": 0.09, + "learning_rate": 9.918962904464406e-06, + "loss": 0.5847, + "step": 684 + }, + { + "epoch": 0.09, + "learning_rate": 9.918598456254551e-06, + "loss": 0.5685, + "step": 685 + }, + { + "epoch": 0.09, + "learning_rate": 9.918233197091265e-06, + "loss": 0.6343, + "step": 686 + }, + { + "epoch": 0.09, + "learning_rate": 9.917867127034773e-06, + "loss": 0.614, + "step": 687 + }, + { + "epoch": 0.09, + "learning_rate": 9.917500246145428e-06, + "loss": 0.6531, + "step": 688 + }, + { + "epoch": 0.09, + "learning_rate": 9.917132554483721e-06, + "loss": 0.613, + "step": 689 + }, + { + "epoch": 0.09, + "learning_rate": 9.916764052110274e-06, + "loss": 0.644, + "step": 690 + }, + { + "epoch": 0.09, + "learning_rate": 9.916394739085846e-06, + "loss": 0.6262, + "step": 691 + }, + { + "epoch": 0.09, + "learning_rate": 9.916024615471324e-06, + "loss": 0.6455, + "step": 692 + }, + { + "epoch": 0.09, + "learning_rate": 9.915653681327736e-06, + "loss": 0.5547, + "step": 693 + }, + { + "epoch": 0.09, + "learning_rate": 9.915281936716238e-06, + "loss": 0.5541, + "step": 694 + }, + { + "epoch": 0.09, + "learning_rate": 9.91490938169812e-06, + "loss": 0.5974, + "step": 695 + }, + { + "epoch": 0.09, + "learning_rate": 9.914536016334808e-06, + "loss": 0.5885, + "step": 696 + }, + { + "epoch": 0.09, + "learning_rate": 9.914161840687861e-06, + "loss": 0.6158, + "step": 697 + }, + { + "epoch": 0.09, + "learning_rate": 9.913786854818972e-06, + "loss": 0.5878, + "step": 698 + }, + { + "epoch": 0.09, + "learning_rate": 9.913411058789964e-06, + "loss": 0.5935, + "step": 699 + }, + { + "epoch": 0.09, + "learning_rate": 9.913034452662799e-06, + "loss": 0.2069, + "step": 700 + }, + { + "epoch": 0.09, + "learning_rate": 9.91265703649957e-06, + "loss": 0.6243, + "step": 701 + }, + { + "epoch": 0.09, + "learning_rate": 9.912278810362499e-06, + "loss": 0.5569, + "step": 702 + }, + { + "epoch": 0.09, + "learning_rate": 9.91189977431395e-06, + "loss": 0.6674, + "step": 703 + }, + { + "epoch": 0.09, + "learning_rate": 9.911519928416419e-06, + "loss": 0.5387, + "step": 704 + }, + { + "epoch": 0.09, + "learning_rate": 9.911139272732528e-06, + "loss": 0.6147, + "step": 705 + }, + { + "epoch": 0.09, + "learning_rate": 9.91075780732504e-06, + "loss": 0.6334, + "step": 706 + }, + { + "epoch": 0.09, + "learning_rate": 9.910375532256846e-06, + "loss": 0.6013, + "step": 707 + }, + { + "epoch": 0.09, + "learning_rate": 9.90999244759098e-06, + "loss": 0.6311, + "step": 708 + }, + { + "epoch": 0.09, + "learning_rate": 9.909608553390596e-06, + "loss": 0.6001, + "step": 709 + }, + { + "epoch": 0.09, + "learning_rate": 9.909223849718993e-06, + "loss": 0.6098, + "step": 710 + }, + { + "epoch": 0.09, + "learning_rate": 9.908838336639597e-06, + "loss": 0.6461, + "step": 711 + }, + { + "epoch": 0.09, + "learning_rate": 9.908452014215971e-06, + "loss": 0.6094, + "step": 712 + }, + { + "epoch": 0.09, + "learning_rate": 9.908064882511809e-06, + "loss": 0.6237, + "step": 713 + }, + { + "epoch": 0.09, + "learning_rate": 9.90767694159094e-06, + "loss": 0.6172, + "step": 714 + }, + { + "epoch": 0.09, + "learning_rate": 9.907288191517324e-06, + "loss": 0.5798, + "step": 715 + }, + { + "epoch": 0.09, + "learning_rate": 9.906898632355055e-06, + "loss": 0.6297, + "step": 716 + }, + { + "epoch": 0.09, + "learning_rate": 9.906508264168366e-06, + "loss": 0.2401, + "step": 717 + }, + { + "epoch": 0.09, + "learning_rate": 9.906117087021617e-06, + "loss": 0.6247, + "step": 718 + }, + { + "epoch": 0.09, + "learning_rate": 9.905725100979301e-06, + "loss": 0.5312, + "step": 719 + }, + { + "epoch": 0.09, + "learning_rate": 9.905332306106051e-06, + "loss": 0.5843, + "step": 720 + }, + { + "epoch": 0.09, + "learning_rate": 9.904938702466625e-06, + "loss": 0.672, + "step": 721 + }, + { + "epoch": 0.09, + "learning_rate": 9.90454429012592e-06, + "loss": 0.6128, + "step": 722 + }, + { + "epoch": 0.09, + "learning_rate": 9.904149069148962e-06, + "loss": 0.5679, + "step": 723 + }, + { + "epoch": 0.09, + "learning_rate": 9.90375303960092e-06, + "loss": 0.6327, + "step": 724 + }, + { + "epoch": 0.09, + "learning_rate": 9.903356201547082e-06, + "loss": 0.5678, + "step": 725 + }, + { + "epoch": 0.09, + "learning_rate": 9.902958555052882e-06, + "loss": 0.6306, + "step": 726 + }, + { + "epoch": 0.09, + "learning_rate": 9.902560100183879e-06, + "loss": 0.2201, + "step": 727 + }, + { + "epoch": 0.09, + "learning_rate": 9.902160837005767e-06, + "loss": 0.5416, + "step": 728 + }, + { + "epoch": 0.09, + "learning_rate": 9.901760765584376e-06, + "loss": 0.5729, + "step": 729 + }, + { + "epoch": 0.09, + "learning_rate": 9.90135988598567e-06, + "loss": 0.6231, + "step": 730 + }, + { + "epoch": 0.09, + "learning_rate": 9.90095819827574e-06, + "loss": 0.6192, + "step": 731 + }, + { + "epoch": 0.09, + "learning_rate": 9.900555702520817e-06, + "loss": 0.5715, + "step": 732 + }, + { + "epoch": 0.09, + "learning_rate": 9.900152398787261e-06, + "loss": 0.5804, + "step": 733 + }, + { + "epoch": 0.09, + "learning_rate": 9.899748287141568e-06, + "loss": 0.5328, + "step": 734 + }, + { + "epoch": 0.09, + "learning_rate": 9.899343367650364e-06, + "loss": 0.5721, + "step": 735 + }, + { + "epoch": 0.09, + "learning_rate": 9.898937640380412e-06, + "loss": 0.6342, + "step": 736 + }, + { + "epoch": 0.09, + "learning_rate": 9.898531105398605e-06, + "loss": 0.6424, + "step": 737 + }, + { + "epoch": 0.09, + "learning_rate": 9.898123762771972e-06, + "loss": 0.5949, + "step": 738 + }, + { + "epoch": 0.09, + "learning_rate": 9.897715612567671e-06, + "loss": 0.6565, + "step": 739 + }, + { + "epoch": 0.09, + "learning_rate": 9.897306654852998e-06, + "loss": 0.6249, + "step": 740 + }, + { + "epoch": 0.09, + "learning_rate": 9.896896889695377e-06, + "loss": 0.6231, + "step": 741 + }, + { + "epoch": 0.09, + "learning_rate": 9.896486317162372e-06, + "loss": 0.5453, + "step": 742 + }, + { + "epoch": 0.09, + "learning_rate": 9.896074937321674e-06, + "loss": 0.5024, + "step": 743 + }, + { + "epoch": 0.09, + "learning_rate": 9.895662750241109e-06, + "loss": 0.5524, + "step": 744 + }, + { + "epoch": 0.09, + "learning_rate": 9.895249755988636e-06, + "loss": 0.5478, + "step": 745 + }, + { + "epoch": 0.09, + "learning_rate": 9.894835954632349e-06, + "loss": 0.6237, + "step": 746 + }, + { + "epoch": 0.09, + "learning_rate": 9.894421346240472e-06, + "loss": 0.5784, + "step": 747 + }, + { + "epoch": 0.09, + "learning_rate": 9.894005930881363e-06, + "loss": 0.5953, + "step": 748 + }, + { + "epoch": 0.09, + "learning_rate": 9.893589708623516e-06, + "loss": 0.663, + "step": 749 + }, + { + "epoch": 0.09, + "learning_rate": 9.893172679535554e-06, + "loss": 0.5606, + "step": 750 + }, + { + "epoch": 0.09, + "learning_rate": 9.892754843686234e-06, + "loss": 0.5602, + "step": 751 + }, + { + "epoch": 0.09, + "learning_rate": 9.892336201144447e-06, + "loss": 0.6451, + "step": 752 + }, + { + "epoch": 0.09, + "learning_rate": 9.891916751979218e-06, + "loss": 0.6009, + "step": 753 + }, + { + "epoch": 0.09, + "learning_rate": 9.891496496259701e-06, + "loss": 0.6182, + "step": 754 + }, + { + "epoch": 0.09, + "learning_rate": 9.891075434055189e-06, + "loss": 0.5515, + "step": 755 + }, + { + "epoch": 0.09, + "learning_rate": 9.890653565435102e-06, + "loss": 0.5481, + "step": 756 + }, + { + "epoch": 0.09, + "learning_rate": 9.890230890468998e-06, + "loss": 0.602, + "step": 757 + }, + { + "epoch": 0.1, + "learning_rate": 9.889807409226563e-06, + "loss": 0.517, + "step": 758 + }, + { + "epoch": 0.1, + "learning_rate": 9.889383121777618e-06, + "loss": 0.5693, + "step": 759 + }, + { + "epoch": 0.1, + "learning_rate": 9.88895802819212e-06, + "loss": 0.6089, + "step": 760 + }, + { + "epoch": 0.1, + "learning_rate": 9.888532128540155e-06, + "loss": 0.586, + "step": 761 + }, + { + "epoch": 0.1, + "learning_rate": 9.888105422891942e-06, + "loss": 0.6161, + "step": 762 + }, + { + "epoch": 0.1, + "learning_rate": 9.887677911317837e-06, + "loss": 0.5185, + "step": 763 + }, + { + "epoch": 0.1, + "learning_rate": 9.887249593888321e-06, + "loss": 0.5982, + "step": 764 + }, + { + "epoch": 0.1, + "learning_rate": 9.88682047067402e-06, + "loss": 0.5708, + "step": 765 + }, + { + "epoch": 0.1, + "learning_rate": 9.886390541745678e-06, + "loss": 0.2191, + "step": 766 + }, + { + "epoch": 0.1, + "learning_rate": 9.885959807174185e-06, + "loss": 0.5921, + "step": 767 + }, + { + "epoch": 0.1, + "learning_rate": 9.885528267030556e-06, + "loss": 0.598, + "step": 768 + }, + { + "epoch": 0.1, + "learning_rate": 9.88509592138594e-06, + "loss": 0.58, + "step": 769 + }, + { + "epoch": 0.1, + "learning_rate": 9.884662770311625e-06, + "loss": 0.5702, + "step": 770 + }, + { + "epoch": 0.1, + "learning_rate": 9.88422881387902e-06, + "loss": 0.6054, + "step": 771 + }, + { + "epoch": 0.1, + "learning_rate": 9.88379405215968e-06, + "loss": 0.5855, + "step": 772 + }, + { + "epoch": 0.1, + "learning_rate": 9.883358485225278e-06, + "loss": 0.4872, + "step": 773 + }, + { + "epoch": 0.1, + "learning_rate": 9.882922113147637e-06, + "loss": 0.6107, + "step": 774 + }, + { + "epoch": 0.1, + "learning_rate": 9.882484935998698e-06, + "loss": 0.5996, + "step": 775 + }, + { + "epoch": 0.1, + "learning_rate": 9.882046953850543e-06, + "loss": 0.5306, + "step": 776 + }, + { + "epoch": 0.1, + "learning_rate": 9.881608166775384e-06, + "loss": 0.5787, + "step": 777 + }, + { + "epoch": 0.1, + "learning_rate": 9.881168574845566e-06, + "loss": 0.5673, + "step": 778 + }, + { + "epoch": 0.1, + "learning_rate": 9.880728178133565e-06, + "loss": 0.5453, + "step": 779 + }, + { + "epoch": 0.1, + "learning_rate": 9.880286976711992e-06, + "loss": 0.5976, + "step": 780 + }, + { + "epoch": 0.1, + "learning_rate": 9.879844970653594e-06, + "loss": 0.5884, + "step": 781 + }, + { + "epoch": 0.1, + "learning_rate": 9.87940216003124e-06, + "loss": 0.6075, + "step": 782 + }, + { + "epoch": 0.1, + "learning_rate": 9.878958544917943e-06, + "loss": 0.6671, + "step": 783 + }, + { + "epoch": 0.1, + "learning_rate": 9.878514125386843e-06, + "loss": 0.6639, + "step": 784 + }, + { + "epoch": 0.1, + "learning_rate": 9.878068901511213e-06, + "loss": 0.5539, + "step": 785 + }, + { + "epoch": 0.1, + "learning_rate": 9.877622873364461e-06, + "loss": 0.5591, + "step": 786 + }, + { + "epoch": 0.1, + "learning_rate": 9.877176041020122e-06, + "loss": 0.5356, + "step": 787 + }, + { + "epoch": 0.1, + "learning_rate": 9.876728404551872e-06, + "loss": 0.5888, + "step": 788 + }, + { + "epoch": 0.1, + "learning_rate": 9.876279964033513e-06, + "loss": 0.5756, + "step": 789 + }, + { + "epoch": 0.1, + "learning_rate": 9.87583071953898e-06, + "loss": 0.6108, + "step": 790 + }, + { + "epoch": 0.1, + "learning_rate": 9.875380671142345e-06, + "loss": 0.5993, + "step": 791 + }, + { + "epoch": 0.1, + "learning_rate": 9.874929818917806e-06, + "loss": 0.5937, + "step": 792 + }, + { + "epoch": 0.1, + "learning_rate": 9.874478162939703e-06, + "loss": 0.7102, + "step": 793 + }, + { + "epoch": 0.1, + "learning_rate": 9.874025703282497e-06, + "loss": 0.5561, + "step": 794 + }, + { + "epoch": 0.1, + "learning_rate": 9.873572440020792e-06, + "loss": 0.5898, + "step": 795 + }, + { + "epoch": 0.1, + "learning_rate": 9.873118373229313e-06, + "loss": 0.6502, + "step": 796 + }, + { + "epoch": 0.1, + "learning_rate": 9.872663502982931e-06, + "loss": 0.6323, + "step": 797 + }, + { + "epoch": 0.1, + "learning_rate": 9.872207829356641e-06, + "loss": 0.6389, + "step": 798 + }, + { + "epoch": 0.1, + "learning_rate": 9.87175135242557e-06, + "loss": 0.6274, + "step": 799 + }, + { + "epoch": 0.1, + "learning_rate": 9.87129407226498e-06, + "loss": 0.5743, + "step": 800 + }, + { + "epoch": 0.1, + "learning_rate": 9.87083598895027e-06, + "loss": 0.6138, + "step": 801 + }, + { + "epoch": 0.1, + "learning_rate": 9.870377102556959e-06, + "loss": 0.542, + "step": 802 + }, + { + "epoch": 0.1, + "learning_rate": 9.86991741316071e-06, + "loss": 0.5499, + "step": 803 + }, + { + "epoch": 0.1, + "learning_rate": 9.869456920837312e-06, + "loss": 0.5809, + "step": 804 + }, + { + "epoch": 0.1, + "learning_rate": 9.868995625662692e-06, + "loss": 0.5767, + "step": 805 + }, + { + "epoch": 0.1, + "learning_rate": 9.868533527712903e-06, + "loss": 0.6074, + "step": 806 + }, + { + "epoch": 0.1, + "learning_rate": 9.868070627064135e-06, + "loss": 0.5577, + "step": 807 + }, + { + "epoch": 0.1, + "learning_rate": 9.867606923792708e-06, + "loss": 0.5511, + "step": 808 + }, + { + "epoch": 0.1, + "learning_rate": 9.867142417975075e-06, + "loss": 0.5939, + "step": 809 + }, + { + "epoch": 0.1, + "learning_rate": 9.866677109687823e-06, + "loss": 0.5714, + "step": 810 + }, + { + "epoch": 0.1, + "learning_rate": 9.866210999007666e-06, + "loss": 0.5807, + "step": 811 + }, + { + "epoch": 0.1, + "learning_rate": 9.865744086011457e-06, + "loss": 0.5693, + "step": 812 + }, + { + "epoch": 0.1, + "learning_rate": 9.865276370776178e-06, + "loss": 0.6031, + "step": 813 + }, + { + "epoch": 0.1, + "learning_rate": 9.864807853378942e-06, + "loss": 0.6144, + "step": 814 + }, + { + "epoch": 0.1, + "learning_rate": 9.864338533896997e-06, + "loss": 0.6147, + "step": 815 + }, + { + "epoch": 0.1, + "learning_rate": 9.863868412407721e-06, + "loss": 0.2277, + "step": 816 + }, + { + "epoch": 0.1, + "learning_rate": 9.863397488988627e-06, + "loss": 0.5818, + "step": 817 + }, + { + "epoch": 0.1, + "learning_rate": 9.862925763717355e-06, + "loss": 0.5865, + "step": 818 + }, + { + "epoch": 0.1, + "learning_rate": 9.862453236671685e-06, + "loss": 0.6213, + "step": 819 + }, + { + "epoch": 0.1, + "learning_rate": 9.861979907929523e-06, + "loss": 0.1995, + "step": 820 + }, + { + "epoch": 0.1, + "learning_rate": 9.861505777568908e-06, + "loss": 0.6239, + "step": 821 + }, + { + "epoch": 0.1, + "learning_rate": 9.861030845668014e-06, + "loss": 0.6241, + "step": 822 + }, + { + "epoch": 0.1, + "learning_rate": 9.860555112305144e-06, + "loss": 0.5795, + "step": 823 + }, + { + "epoch": 0.1, + "learning_rate": 9.860078577558733e-06, + "loss": 0.6183, + "step": 824 + }, + { + "epoch": 0.1, + "learning_rate": 9.859601241507354e-06, + "loss": 0.6137, + "step": 825 + }, + { + "epoch": 0.1, + "learning_rate": 9.859123104229704e-06, + "loss": 0.5679, + "step": 826 + }, + { + "epoch": 0.1, + "learning_rate": 9.858644165804619e-06, + "loss": 0.6286, + "step": 827 + }, + { + "epoch": 0.1, + "learning_rate": 9.858164426311059e-06, + "loss": 0.5704, + "step": 828 + }, + { + "epoch": 0.1, + "learning_rate": 9.857683885828126e-06, + "loss": 0.5868, + "step": 829 + }, + { + "epoch": 0.1, + "learning_rate": 9.857202544435045e-06, + "loss": 0.6139, + "step": 830 + }, + { + "epoch": 0.1, + "learning_rate": 9.856720402211182e-06, + "loss": 0.5504, + "step": 831 + }, + { + "epoch": 0.1, + "learning_rate": 9.856237459236026e-06, + "loss": 0.5999, + "step": 832 + }, + { + "epoch": 0.1, + "learning_rate": 9.855753715589202e-06, + "loss": 0.5837, + "step": 833 + }, + { + "epoch": 0.1, + "learning_rate": 9.855269171350471e-06, + "loss": 0.5637, + "step": 834 + }, + { + "epoch": 0.1, + "learning_rate": 9.854783826599718e-06, + "loss": 0.5127, + "step": 835 + }, + { + "epoch": 0.1, + "learning_rate": 9.854297681416967e-06, + "loss": 0.5112, + "step": 836 + }, + { + "epoch": 0.1, + "learning_rate": 9.853810735882371e-06, + "loss": 0.5792, + "step": 837 + }, + { + "epoch": 0.11, + "learning_rate": 9.853322990076213e-06, + "loss": 0.5458, + "step": 838 + }, + { + "epoch": 0.11, + "learning_rate": 9.852834444078913e-06, + "loss": 0.5314, + "step": 839 + }, + { + "epoch": 0.11, + "learning_rate": 9.852345097971017e-06, + "loss": 0.5716, + "step": 840 + }, + { + "epoch": 0.11, + "learning_rate": 9.851854951833207e-06, + "loss": 0.6343, + "step": 841 + }, + { + "epoch": 0.11, + "learning_rate": 9.851364005746295e-06, + "loss": 0.5909, + "step": 842 + }, + { + "epoch": 0.11, + "learning_rate": 9.850872259791228e-06, + "loss": 0.6328, + "step": 843 + }, + { + "epoch": 0.11, + "learning_rate": 9.850379714049082e-06, + "loss": 0.54, + "step": 844 + }, + { + "epoch": 0.11, + "learning_rate": 9.849886368601063e-06, + "loss": 0.5672, + "step": 845 + }, + { + "epoch": 0.11, + "learning_rate": 9.849392223528514e-06, + "loss": 0.5366, + "step": 846 + }, + { + "epoch": 0.11, + "learning_rate": 9.848897278912906e-06, + "loss": 0.5723, + "step": 847 + }, + { + "epoch": 0.11, + "learning_rate": 9.848401534835842e-06, + "loss": 0.6475, + "step": 848 + }, + { + "epoch": 0.11, + "learning_rate": 9.847904991379061e-06, + "loss": 0.5951, + "step": 849 + }, + { + "epoch": 0.11, + "learning_rate": 9.847407648624425e-06, + "loss": 0.6213, + "step": 850 + }, + { + "epoch": 0.11, + "learning_rate": 9.846909506653938e-06, + "loss": 0.545, + "step": 851 + }, + { + "epoch": 0.11, + "learning_rate": 9.846410565549732e-06, + "loss": 0.6012, + "step": 852 + }, + { + "epoch": 0.11, + "learning_rate": 9.845910825394065e-06, + "loss": 0.5238, + "step": 853 + }, + { + "epoch": 0.11, + "learning_rate": 9.845410286269335e-06, + "loss": 0.6183, + "step": 854 + }, + { + "epoch": 0.11, + "learning_rate": 9.844908948258067e-06, + "loss": 0.5591, + "step": 855 + }, + { + "epoch": 0.11, + "learning_rate": 9.844406811442922e-06, + "loss": 0.5265, + "step": 856 + }, + { + "epoch": 0.11, + "learning_rate": 9.843903875906685e-06, + "loss": 0.4906, + "step": 857 + }, + { + "epoch": 0.11, + "learning_rate": 9.84340014173228e-06, + "loss": 0.583, + "step": 858 + }, + { + "epoch": 0.11, + "learning_rate": 9.84289560900276e-06, + "loss": 0.5695, + "step": 859 + }, + { + "epoch": 0.11, + "learning_rate": 9.84239027780131e-06, + "loss": 0.5896, + "step": 860 + }, + { + "epoch": 0.11, + "learning_rate": 9.841884148211248e-06, + "loss": 0.6301, + "step": 861 + }, + { + "epoch": 0.11, + "learning_rate": 9.841377220316018e-06, + "loss": 0.6967, + "step": 862 + }, + { + "epoch": 0.11, + "learning_rate": 9.8408694941992e-06, + "loss": 0.6311, + "step": 863 + }, + { + "epoch": 0.11, + "learning_rate": 9.840360969944511e-06, + "loss": 0.533, + "step": 864 + }, + { + "epoch": 0.11, + "learning_rate": 9.839851647635789e-06, + "loss": 0.6242, + "step": 865 + }, + { + "epoch": 0.11, + "learning_rate": 9.839341527357009e-06, + "loss": 0.6026, + "step": 866 + }, + { + "epoch": 0.11, + "learning_rate": 9.838830609192277e-06, + "loss": 0.6324, + "step": 867 + }, + { + "epoch": 0.11, + "learning_rate": 9.838318893225833e-06, + "loss": 0.6646, + "step": 868 + }, + { + "epoch": 0.11, + "learning_rate": 9.837806379542044e-06, + "loss": 0.5919, + "step": 869 + }, + { + "epoch": 0.11, + "learning_rate": 9.837293068225408e-06, + "loss": 0.528, + "step": 870 + }, + { + "epoch": 0.11, + "learning_rate": 9.836778959360563e-06, + "loss": 0.528, + "step": 871 + }, + { + "epoch": 0.11, + "learning_rate": 9.83626405303227e-06, + "loss": 0.5889, + "step": 872 + }, + { + "epoch": 0.11, + "learning_rate": 9.835748349325423e-06, + "loss": 0.5688, + "step": 873 + }, + { + "epoch": 0.11, + "learning_rate": 9.835231848325049e-06, + "loss": 0.5691, + "step": 874 + }, + { + "epoch": 0.11, + "learning_rate": 9.834714550116306e-06, + "loss": 0.5658, + "step": 875 + }, + { + "epoch": 0.11, + "learning_rate": 9.834196454784485e-06, + "loss": 0.603, + "step": 876 + }, + { + "epoch": 0.11, + "learning_rate": 9.833677562415006e-06, + "loss": 0.5971, + "step": 877 + }, + { + "epoch": 0.11, + "learning_rate": 9.83315787309342e-06, + "loss": 0.5216, + "step": 878 + }, + { + "epoch": 0.11, + "learning_rate": 9.832637386905413e-06, + "loss": 0.5748, + "step": 879 + }, + { + "epoch": 0.11, + "learning_rate": 9.8321161039368e-06, + "loss": 0.6219, + "step": 880 + }, + { + "epoch": 0.11, + "learning_rate": 9.831594024273526e-06, + "loss": 0.6129, + "step": 881 + }, + { + "epoch": 0.11, + "learning_rate": 9.831071148001668e-06, + "loss": 0.5884, + "step": 882 + }, + { + "epoch": 0.11, + "learning_rate": 9.830547475207437e-06, + "loss": 0.5665, + "step": 883 + }, + { + "epoch": 0.11, + "learning_rate": 9.830023005977175e-06, + "loss": 0.2234, + "step": 884 + }, + { + "epoch": 0.11, + "learning_rate": 9.829497740397349e-06, + "loss": 0.5621, + "step": 885 + }, + { + "epoch": 0.11, + "learning_rate": 9.828971678554566e-06, + "loss": 0.5689, + "step": 886 + }, + { + "epoch": 0.11, + "learning_rate": 9.82844482053556e-06, + "loss": 0.5671, + "step": 887 + }, + { + "epoch": 0.11, + "learning_rate": 9.827917166427196e-06, + "loss": 0.6206, + "step": 888 + }, + { + "epoch": 0.11, + "learning_rate": 9.82738871631647e-06, + "loss": 0.5505, + "step": 889 + }, + { + "epoch": 0.11, + "learning_rate": 9.826859470290512e-06, + "loss": 0.6904, + "step": 890 + }, + { + "epoch": 0.11, + "learning_rate": 9.82632942843658e-06, + "loss": 0.5573, + "step": 891 + }, + { + "epoch": 0.11, + "learning_rate": 9.825798590842065e-06, + "loss": 0.5604, + "step": 892 + }, + { + "epoch": 0.11, + "learning_rate": 9.825266957594488e-06, + "loss": 0.5641, + "step": 893 + }, + { + "epoch": 0.11, + "learning_rate": 9.824734528781506e-06, + "loss": 0.5135, + "step": 894 + }, + { + "epoch": 0.11, + "learning_rate": 9.824201304490898e-06, + "loss": 0.6004, + "step": 895 + }, + { + "epoch": 0.11, + "learning_rate": 9.82366728481058e-06, + "loss": 0.5813, + "step": 896 + }, + { + "epoch": 0.11, + "learning_rate": 9.823132469828603e-06, + "loss": 0.6597, + "step": 897 + }, + { + "epoch": 0.11, + "learning_rate": 9.822596859633139e-06, + "loss": 0.6173, + "step": 898 + }, + { + "epoch": 0.11, + "learning_rate": 9.822060454312499e-06, + "loss": 0.6364, + "step": 899 + }, + { + "epoch": 0.11, + "learning_rate": 9.821523253955123e-06, + "loss": 0.6162, + "step": 900 + }, + { + "epoch": 0.11, + "learning_rate": 9.820985258649583e-06, + "loss": 0.6032, + "step": 901 + }, + { + "epoch": 0.11, + "learning_rate": 9.820446468484578e-06, + "loss": 0.5535, + "step": 902 + }, + { + "epoch": 0.11, + "learning_rate": 9.819906883548943e-06, + "loss": 0.5834, + "step": 903 + }, + { + "epoch": 0.11, + "learning_rate": 9.819366503931642e-06, + "loss": 0.6076, + "step": 904 + }, + { + "epoch": 0.11, + "learning_rate": 9.818825329721768e-06, + "loss": 0.514, + "step": 905 + }, + { + "epoch": 0.11, + "learning_rate": 9.81828336100855e-06, + "loss": 0.5708, + "step": 906 + }, + { + "epoch": 0.11, + "learning_rate": 9.817740597881345e-06, + "loss": 0.5914, + "step": 907 + }, + { + "epoch": 0.11, + "learning_rate": 9.817197040429636e-06, + "loss": 0.6225, + "step": 908 + }, + { + "epoch": 0.11, + "learning_rate": 9.81665268874305e-06, + "loss": 0.6126, + "step": 909 + }, + { + "epoch": 0.11, + "learning_rate": 9.816107542911329e-06, + "loss": 0.6496, + "step": 910 + }, + { + "epoch": 0.11, + "learning_rate": 9.81556160302436e-06, + "loss": 0.6274, + "step": 911 + }, + { + "epoch": 0.11, + "learning_rate": 9.81501486917215e-06, + "loss": 0.6175, + "step": 912 + }, + { + "epoch": 0.11, + "learning_rate": 9.814467341444845e-06, + "loss": 0.2388, + "step": 913 + }, + { + "epoch": 0.11, + "learning_rate": 9.813919019932718e-06, + "loss": 0.6477, + "step": 914 + }, + { + "epoch": 0.11, + "learning_rate": 9.81336990472617e-06, + "loss": 0.6152, + "step": 915 + }, + { + "epoch": 0.11, + "learning_rate": 9.812819995915743e-06, + "loss": 0.6016, + "step": 916 + }, + { + "epoch": 0.11, + "learning_rate": 9.812269293592098e-06, + "loss": 0.527, + "step": 917 + }, + { + "epoch": 0.12, + "learning_rate": 9.811717797846035e-06, + "loss": 0.6536, + "step": 918 + }, + { + "epoch": 0.12, + "learning_rate": 9.811165508768477e-06, + "loss": 0.6152, + "step": 919 + }, + { + "epoch": 0.12, + "learning_rate": 9.810612426450487e-06, + "loss": 0.5085, + "step": 920 + }, + { + "epoch": 0.12, + "learning_rate": 9.810058550983255e-06, + "loss": 0.6071, + "step": 921 + }, + { + "epoch": 0.12, + "learning_rate": 9.809503882458098e-06, + "loss": 0.6125, + "step": 922 + }, + { + "epoch": 0.12, + "learning_rate": 9.808948420966469e-06, + "loss": 0.2357, + "step": 923 + }, + { + "epoch": 0.12, + "learning_rate": 9.808392166599948e-06, + "loss": 0.5546, + "step": 924 + }, + { + "epoch": 0.12, + "learning_rate": 9.807835119450248e-06, + "loss": 0.5891, + "step": 925 + }, + { + "epoch": 0.12, + "learning_rate": 9.807277279609214e-06, + "loss": 0.6278, + "step": 926 + }, + { + "epoch": 0.12, + "learning_rate": 9.806718647168818e-06, + "loss": 0.5593, + "step": 927 + }, + { + "epoch": 0.12, + "learning_rate": 9.806159222221163e-06, + "loss": 0.5563, + "step": 928 + }, + { + "epoch": 0.12, + "learning_rate": 9.805599004858488e-06, + "loss": 0.602, + "step": 929 + }, + { + "epoch": 0.12, + "learning_rate": 9.805037995173156e-06, + "loss": 0.5958, + "step": 930 + }, + { + "epoch": 0.12, + "learning_rate": 9.804476193257663e-06, + "loss": 0.5868, + "step": 931 + }, + { + "epoch": 0.12, + "learning_rate": 9.803913599204637e-06, + "loss": 0.5708, + "step": 932 + }, + { + "epoch": 0.12, + "learning_rate": 9.803350213106837e-06, + "loss": 0.6148, + "step": 933 + }, + { + "epoch": 0.12, + "learning_rate": 9.80278603505715e-06, + "loss": 0.5619, + "step": 934 + }, + { + "epoch": 0.12, + "learning_rate": 9.802221065148594e-06, + "loss": 0.6821, + "step": 935 + }, + { + "epoch": 0.12, + "learning_rate": 9.801655303474319e-06, + "loss": 0.5825, + "step": 936 + }, + { + "epoch": 0.12, + "learning_rate": 9.801088750127605e-06, + "loss": 0.5599, + "step": 937 + }, + { + "epoch": 0.12, + "learning_rate": 9.800521405201862e-06, + "loss": 0.6246, + "step": 938 + }, + { + "epoch": 0.12, + "learning_rate": 9.799953268790633e-06, + "loss": 0.5854, + "step": 939 + }, + { + "epoch": 0.12, + "learning_rate": 9.799384340987586e-06, + "loss": 0.6445, + "step": 940 + }, + { + "epoch": 0.12, + "learning_rate": 9.798814621886525e-06, + "loss": 0.5867, + "step": 941 + }, + { + "epoch": 0.12, + "learning_rate": 9.798244111581382e-06, + "loss": 0.6664, + "step": 942 + }, + { + "epoch": 0.12, + "learning_rate": 9.79767281016622e-06, + "loss": 0.5908, + "step": 943 + }, + { + "epoch": 0.12, + "learning_rate": 9.797100717735234e-06, + "loss": 0.6836, + "step": 944 + }, + { + "epoch": 0.12, + "learning_rate": 9.796527834382745e-06, + "loss": 0.5926, + "step": 945 + }, + { + "epoch": 0.12, + "learning_rate": 9.795954160203207e-06, + "loss": 0.548, + "step": 946 + }, + { + "epoch": 0.12, + "learning_rate": 9.795379695291205e-06, + "loss": 0.6578, + "step": 947 + }, + { + "epoch": 0.12, + "learning_rate": 9.794804439741455e-06, + "loss": 0.5908, + "step": 948 + }, + { + "epoch": 0.12, + "learning_rate": 9.794228393648801e-06, + "loss": 0.5796, + "step": 949 + }, + { + "epoch": 0.12, + "learning_rate": 9.79365155710822e-06, + "loss": 0.5984, + "step": 950 + }, + { + "epoch": 0.12, + "learning_rate": 9.793073930214817e-06, + "loss": 0.5935, + "step": 951 + }, + { + "epoch": 0.12, + "learning_rate": 9.792495513063827e-06, + "loss": 0.6256, + "step": 952 + }, + { + "epoch": 0.12, + "learning_rate": 9.791916305750619e-06, + "loss": 0.5566, + "step": 953 + }, + { + "epoch": 0.12, + "learning_rate": 9.791336308370687e-06, + "loss": 0.6112, + "step": 954 + }, + { + "epoch": 0.12, + "learning_rate": 9.790755521019659e-06, + "loss": 0.6395, + "step": 955 + }, + { + "epoch": 0.12, + "learning_rate": 9.790173943793293e-06, + "loss": 0.5033, + "step": 956 + }, + { + "epoch": 0.12, + "learning_rate": 9.789591576787476e-06, + "loss": 0.5516, + "step": 957 + }, + { + "epoch": 0.12, + "learning_rate": 9.789008420098226e-06, + "loss": 0.5874, + "step": 958 + }, + { + "epoch": 0.12, + "learning_rate": 9.78842447382169e-06, + "loss": 0.5839, + "step": 959 + }, + { + "epoch": 0.12, + "learning_rate": 9.787839738054147e-06, + "loss": 0.5615, + "step": 960 + }, + { + "epoch": 0.12, + "learning_rate": 9.787254212892005e-06, + "loss": 0.6318, + "step": 961 + }, + { + "epoch": 0.12, + "learning_rate": 9.786667898431801e-06, + "loss": 0.5692, + "step": 962 + }, + { + "epoch": 0.12, + "learning_rate": 9.786080794770208e-06, + "loss": 0.6741, + "step": 963 + }, + { + "epoch": 0.12, + "learning_rate": 9.78549290200402e-06, + "loss": 0.6123, + "step": 964 + }, + { + "epoch": 0.12, + "learning_rate": 9.784904220230167e-06, + "loss": 0.5776, + "step": 965 + }, + { + "epoch": 0.12, + "learning_rate": 9.784314749545707e-06, + "loss": 0.615, + "step": 966 + }, + { + "epoch": 0.12, + "learning_rate": 9.783724490047832e-06, + "loss": 0.634, + "step": 967 + }, + { + "epoch": 0.12, + "learning_rate": 9.783133441833859e-06, + "loss": 0.567, + "step": 968 + }, + { + "epoch": 0.12, + "learning_rate": 9.782541605001235e-06, + "loss": 0.5839, + "step": 969 + }, + { + "epoch": 0.12, + "learning_rate": 9.781948979647543e-06, + "loss": 0.6019, + "step": 970 + }, + { + "epoch": 0.12, + "learning_rate": 9.781355565870489e-06, + "loss": 0.6108, + "step": 971 + }, + { + "epoch": 0.12, + "learning_rate": 9.780761363767914e-06, + "loss": 0.5858, + "step": 972 + }, + { + "epoch": 0.12, + "learning_rate": 9.780166373437787e-06, + "loss": 0.2259, + "step": 973 + }, + { + "epoch": 0.12, + "learning_rate": 9.779570594978206e-06, + "loss": 0.5639, + "step": 974 + }, + { + "epoch": 0.12, + "learning_rate": 9.778974028487398e-06, + "loss": 0.6203, + "step": 975 + }, + { + "epoch": 0.12, + "learning_rate": 9.778376674063726e-06, + "loss": 0.6458, + "step": 976 + }, + { + "epoch": 0.12, + "learning_rate": 9.777778531805677e-06, + "loss": 0.6239, + "step": 977 + }, + { + "epoch": 0.12, + "learning_rate": 9.777179601811867e-06, + "loss": 0.5968, + "step": 978 + }, + { + "epoch": 0.12, + "learning_rate": 9.776579884181047e-06, + "loss": 0.4886, + "step": 979 + }, + { + "epoch": 0.12, + "learning_rate": 9.775979379012097e-06, + "loss": 0.585, + "step": 980 + }, + { + "epoch": 0.12, + "learning_rate": 9.775378086404025e-06, + "loss": 0.6263, + "step": 981 + }, + { + "epoch": 0.12, + "learning_rate": 9.774776006455964e-06, + "loss": 0.6015, + "step": 982 + }, + { + "epoch": 0.12, + "learning_rate": 9.774173139267187e-06, + "loss": 0.5606, + "step": 983 + }, + { + "epoch": 0.12, + "learning_rate": 9.77356948493709e-06, + "loss": 0.5788, + "step": 984 + }, + { + "epoch": 0.12, + "learning_rate": 9.772965043565202e-06, + "loss": 0.5556, + "step": 985 + }, + { + "epoch": 0.12, + "learning_rate": 9.772359815251176e-06, + "loss": 0.5677, + "step": 986 + }, + { + "epoch": 0.12, + "learning_rate": 9.771753800094803e-06, + "loss": 0.644, + "step": 987 + }, + { + "epoch": 0.12, + "learning_rate": 9.771146998195997e-06, + "loss": 0.5799, + "step": 988 + }, + { + "epoch": 0.12, + "learning_rate": 9.770539409654807e-06, + "loss": 0.5958, + "step": 989 + }, + { + "epoch": 0.12, + "learning_rate": 9.769931034571408e-06, + "loss": 0.2212, + "step": 990 + }, + { + "epoch": 0.12, + "learning_rate": 9.769321873046106e-06, + "loss": 0.6353, + "step": 991 + }, + { + "epoch": 0.12, + "learning_rate": 9.768711925179335e-06, + "loss": 0.5915, + "step": 992 + }, + { + "epoch": 0.12, + "learning_rate": 9.768101191071661e-06, + "loss": 0.5442, + "step": 993 + }, + { + "epoch": 0.12, + "learning_rate": 9.767489670823781e-06, + "loss": 0.5902, + "step": 994 + }, + { + "epoch": 0.12, + "learning_rate": 9.766877364536516e-06, + "loss": 0.5839, + "step": 995 + }, + { + "epoch": 0.12, + "learning_rate": 9.766264272310821e-06, + "loss": 0.5489, + "step": 996 + }, + { + "epoch": 0.12, + "learning_rate": 9.76565039424778e-06, + "loss": 0.6051, + "step": 997 + }, + { + "epoch": 0.13, + "learning_rate": 9.765035730448606e-06, + "loss": 0.5299, + "step": 998 + }, + { + "epoch": 0.13, + "learning_rate": 9.764420281014641e-06, + "loss": 0.5447, + "step": 999 + }, + { + "epoch": 0.13, + "learning_rate": 9.763804046047358e-06, + "loss": 0.6029, + "step": 1000 + }, + { + "epoch": 0.13, + "learning_rate": 9.763187025648357e-06, + "loss": 0.5906, + "step": 1001 + }, + { + "epoch": 0.13, + "learning_rate": 9.762569219919373e-06, + "loss": 0.6244, + "step": 1002 + }, + { + "epoch": 0.13, + "learning_rate": 9.761950628962262e-06, + "loss": 0.5677, + "step": 1003 + }, + { + "epoch": 0.13, + "learning_rate": 9.761331252879016e-06, + "loss": 0.544, + "step": 1004 + }, + { + "epoch": 0.13, + "learning_rate": 9.760711091771756e-06, + "loss": 0.5579, + "step": 1005 + }, + { + "epoch": 0.13, + "learning_rate": 9.76009014574273e-06, + "loss": 0.6821, + "step": 1006 + }, + { + "epoch": 0.13, + "learning_rate": 9.759468414894315e-06, + "loss": 0.5437, + "step": 1007 + }, + { + "epoch": 0.13, + "learning_rate": 9.758845899329021e-06, + "loss": 0.6028, + "step": 1008 + }, + { + "epoch": 0.13, + "learning_rate": 9.758222599149483e-06, + "loss": 0.6211, + "step": 1009 + }, + { + "epoch": 0.13, + "learning_rate": 9.75759851445847e-06, + "loss": 0.6099, + "step": 1010 + }, + { + "epoch": 0.13, + "learning_rate": 9.756973645358876e-06, + "loss": 0.6157, + "step": 1011 + }, + { + "epoch": 0.13, + "learning_rate": 9.756347991953727e-06, + "loss": 0.1836, + "step": 1012 + }, + { + "epoch": 0.13, + "learning_rate": 9.755721554346179e-06, + "loss": 0.6001, + "step": 1013 + }, + { + "epoch": 0.13, + "learning_rate": 9.755094332639513e-06, + "loss": 0.5949, + "step": 1014 + }, + { + "epoch": 0.13, + "learning_rate": 9.754466326937142e-06, + "loss": 0.6299, + "step": 1015 + }, + { + "epoch": 0.13, + "learning_rate": 9.753837537342612e-06, + "loss": 0.5668, + "step": 1016 + }, + { + "epoch": 0.13, + "learning_rate": 9.75320796395959e-06, + "loss": 0.6139, + "step": 1017 + }, + { + "epoch": 0.13, + "learning_rate": 9.752577606891882e-06, + "loss": 0.6538, + "step": 1018 + }, + { + "epoch": 0.13, + "learning_rate": 9.751946466243414e-06, + "loss": 0.6217, + "step": 1019 + }, + { + "epoch": 0.13, + "learning_rate": 9.751314542118246e-06, + "loss": 0.6091, + "step": 1020 + }, + { + "epoch": 0.13, + "learning_rate": 9.750681834620568e-06, + "loss": 0.582, + "step": 1021 + }, + { + "epoch": 0.13, + "learning_rate": 9.750048343854696e-06, + "loss": 0.6097, + "step": 1022 + }, + { + "epoch": 0.13, + "learning_rate": 9.749414069925079e-06, + "loss": 0.2357, + "step": 1023 + }, + { + "epoch": 0.13, + "learning_rate": 9.748779012936288e-06, + "loss": 0.5627, + "step": 1024 + }, + { + "epoch": 0.13, + "learning_rate": 9.748143172993034e-06, + "loss": 0.5533, + "step": 1025 + }, + { + "epoch": 0.13, + "learning_rate": 9.747506550200147e-06, + "loss": 0.5933, + "step": 1026 + }, + { + "epoch": 0.13, + "learning_rate": 9.74686914466259e-06, + "loss": 0.6606, + "step": 1027 + }, + { + "epoch": 0.13, + "learning_rate": 9.74623095648546e-06, + "loss": 0.5931, + "step": 1028 + }, + { + "epoch": 0.13, + "learning_rate": 9.745591985773972e-06, + "loss": 0.5546, + "step": 1029 + }, + { + "epoch": 0.13, + "learning_rate": 9.744952232633478e-06, + "loss": 0.6579, + "step": 1030 + }, + { + "epoch": 0.13, + "learning_rate": 9.74431169716946e-06, + "loss": 0.583, + "step": 1031 + }, + { + "epoch": 0.13, + "learning_rate": 9.743670379487522e-06, + "loss": 0.6387, + "step": 1032 + }, + { + "epoch": 0.13, + "learning_rate": 9.743028279693408e-06, + "loss": 0.5861, + "step": 1033 + }, + { + "epoch": 0.13, + "learning_rate": 9.742385397892976e-06, + "loss": 0.5721, + "step": 1034 + }, + { + "epoch": 0.13, + "learning_rate": 9.741741734192225e-06, + "loss": 0.5337, + "step": 1035 + }, + { + "epoch": 0.13, + "learning_rate": 9.74109728869728e-06, + "loss": 0.5509, + "step": 1036 + }, + { + "epoch": 0.13, + "learning_rate": 9.740452061514392e-06, + "loss": 0.6635, + "step": 1037 + }, + { + "epoch": 0.13, + "learning_rate": 9.739806052749943e-06, + "loss": 0.5746, + "step": 1038 + }, + { + "epoch": 0.13, + "learning_rate": 9.739159262510445e-06, + "loss": 0.6508, + "step": 1039 + }, + { + "epoch": 0.13, + "learning_rate": 9.738511690902535e-06, + "loss": 0.5697, + "step": 1040 + }, + { + "epoch": 0.13, + "learning_rate": 9.737863338032984e-06, + "loss": 0.6135, + "step": 1041 + }, + { + "epoch": 0.13, + "learning_rate": 9.737214204008689e-06, + "loss": 0.5965, + "step": 1042 + }, + { + "epoch": 0.13, + "learning_rate": 9.736564288936673e-06, + "loss": 0.5928, + "step": 1043 + }, + { + "epoch": 0.13, + "learning_rate": 9.735913592924093e-06, + "loss": 0.5453, + "step": 1044 + }, + { + "epoch": 0.13, + "learning_rate": 9.735262116078233e-06, + "loss": 0.5545, + "step": 1045 + }, + { + "epoch": 0.13, + "learning_rate": 9.734609858506501e-06, + "loss": 0.581, + "step": 1046 + }, + { + "epoch": 0.13, + "learning_rate": 9.733956820316444e-06, + "loss": 0.625, + "step": 1047 + }, + { + "epoch": 0.13, + "learning_rate": 9.733303001615728e-06, + "loss": 0.6149, + "step": 1048 + }, + { + "epoch": 0.13, + "learning_rate": 9.73264840251215e-06, + "loss": 0.6033, + "step": 1049 + }, + { + "epoch": 0.13, + "learning_rate": 9.731993023113642e-06, + "loss": 0.5724, + "step": 1050 + }, + { + "epoch": 0.13, + "learning_rate": 9.731336863528256e-06, + "loss": 0.5983, + "step": 1051 + }, + { + "epoch": 0.13, + "learning_rate": 9.730679923864174e-06, + "loss": 0.5895, + "step": 1052 + }, + { + "epoch": 0.13, + "learning_rate": 9.730022204229715e-06, + "loss": 0.6874, + "step": 1053 + }, + { + "epoch": 0.13, + "learning_rate": 9.729363704733315e-06, + "loss": 0.5933, + "step": 1054 + }, + { + "epoch": 0.13, + "learning_rate": 9.728704425483547e-06, + "loss": 0.5865, + "step": 1055 + }, + { + "epoch": 0.13, + "learning_rate": 9.728044366589108e-06, + "loss": 0.6233, + "step": 1056 + }, + { + "epoch": 0.13, + "learning_rate": 9.727383528158827e-06, + "loss": 0.6557, + "step": 1057 + }, + { + "epoch": 0.13, + "learning_rate": 9.726721910301657e-06, + "loss": 0.5647, + "step": 1058 + }, + { + "epoch": 0.13, + "learning_rate": 9.726059513126686e-06, + "loss": 0.514, + "step": 1059 + }, + { + "epoch": 0.13, + "learning_rate": 9.725396336743123e-06, + "loss": 0.5766, + "step": 1060 + }, + { + "epoch": 0.13, + "learning_rate": 9.724732381260308e-06, + "loss": 0.5033, + "step": 1061 + }, + { + "epoch": 0.13, + "learning_rate": 9.724067646787716e-06, + "loss": 0.5728, + "step": 1062 + }, + { + "epoch": 0.13, + "learning_rate": 9.723402133434943e-06, + "loss": 0.5771, + "step": 1063 + }, + { + "epoch": 0.13, + "learning_rate": 9.722735841311712e-06, + "loss": 0.6248, + "step": 1064 + }, + { + "epoch": 0.13, + "learning_rate": 9.722068770527883e-06, + "loss": 0.5477, + "step": 1065 + }, + { + "epoch": 0.13, + "learning_rate": 9.721400921193434e-06, + "loss": 0.6014, + "step": 1066 + }, + { + "epoch": 0.13, + "learning_rate": 9.720732293418482e-06, + "loss": 0.63, + "step": 1067 + }, + { + "epoch": 0.13, + "learning_rate": 9.720062887313262e-06, + "loss": 0.5884, + "step": 1068 + }, + { + "epoch": 0.13, + "learning_rate": 9.719392702988146e-06, + "loss": 0.5721, + "step": 1069 + }, + { + "epoch": 0.13, + "learning_rate": 9.718721740553628e-06, + "loss": 0.5699, + "step": 1070 + }, + { + "epoch": 0.13, + "learning_rate": 9.718050000120334e-06, + "loss": 0.6401, + "step": 1071 + }, + { + "epoch": 0.13, + "learning_rate": 9.717377481799017e-06, + "loss": 0.6272, + "step": 1072 + }, + { + "epoch": 0.13, + "learning_rate": 9.716704185700558e-06, + "loss": 0.6875, + "step": 1073 + }, + { + "epoch": 0.13, + "learning_rate": 9.716030111935968e-06, + "loss": 0.5007, + "step": 1074 + }, + { + "epoch": 0.13, + "learning_rate": 9.715355260616383e-06, + "loss": 0.5772, + "step": 1075 + }, + { + "epoch": 0.13, + "learning_rate": 9.714679631853071e-06, + "loss": 0.5895, + "step": 1076 + }, + { + "epoch": 0.14, + "learning_rate": 9.714003225757425e-06, + "loss": 0.5652, + "step": 1077 + }, + { + "epoch": 0.14, + "learning_rate": 9.713326042440967e-06, + "loss": 0.6175, + "step": 1078 + }, + { + "epoch": 0.14, + "learning_rate": 9.712648082015348e-06, + "loss": 0.5044, + "step": 1079 + }, + { + "epoch": 0.14, + "learning_rate": 9.711969344592347e-06, + "loss": 0.567, + "step": 1080 + }, + { + "epoch": 0.14, + "learning_rate": 9.711289830283871e-06, + "loss": 0.5898, + "step": 1081 + }, + { + "epoch": 0.14, + "learning_rate": 9.710609539201954e-06, + "loss": 0.5652, + "step": 1082 + }, + { + "epoch": 0.14, + "learning_rate": 9.709928471458759e-06, + "loss": 0.614, + "step": 1083 + }, + { + "epoch": 0.14, + "learning_rate": 9.709246627166578e-06, + "loss": 0.2334, + "step": 1084 + }, + { + "epoch": 0.14, + "learning_rate": 9.708564006437829e-06, + "loss": 0.6172, + "step": 1085 + }, + { + "epoch": 0.14, + "learning_rate": 9.707880609385058e-06, + "loss": 0.5691, + "step": 1086 + }, + { + "epoch": 0.14, + "learning_rate": 9.707196436120942e-06, + "loss": 0.6002, + "step": 1087 + }, + { + "epoch": 0.14, + "learning_rate": 9.706511486758284e-06, + "loss": 0.5382, + "step": 1088 + }, + { + "epoch": 0.14, + "learning_rate": 9.705825761410015e-06, + "loss": 0.6361, + "step": 1089 + }, + { + "epoch": 0.14, + "learning_rate": 9.705139260189192e-06, + "loss": 0.5983, + "step": 1090 + }, + { + "epoch": 0.14, + "learning_rate": 9.704451983209002e-06, + "loss": 0.6336, + "step": 1091 + }, + { + "epoch": 0.14, + "learning_rate": 9.703763930582762e-06, + "loss": 0.5381, + "step": 1092 + }, + { + "epoch": 0.14, + "learning_rate": 9.703075102423914e-06, + "loss": 0.611, + "step": 1093 + }, + { + "epoch": 0.14, + "learning_rate": 9.702385498846025e-06, + "loss": 0.6002, + "step": 1094 + }, + { + "epoch": 0.14, + "learning_rate": 9.7016951199628e-06, + "loss": 0.5997, + "step": 1095 + }, + { + "epoch": 0.14, + "learning_rate": 9.701003965888059e-06, + "loss": 0.6281, + "step": 1096 + }, + { + "epoch": 0.14, + "learning_rate": 9.700312036735757e-06, + "loss": 0.677, + "step": 1097 + }, + { + "epoch": 0.14, + "learning_rate": 9.699619332619979e-06, + "loss": 0.5239, + "step": 1098 + }, + { + "epoch": 0.14, + "learning_rate": 9.698925853654932e-06, + "loss": 0.5885, + "step": 1099 + }, + { + "epoch": 0.14, + "learning_rate": 9.698231599954954e-06, + "loss": 0.5514, + "step": 1100 + }, + { + "epoch": 0.14, + "learning_rate": 9.69753657163451e-06, + "loss": 0.6249, + "step": 1101 + }, + { + "epoch": 0.14, + "learning_rate": 9.696840768808192e-06, + "loss": 0.595, + "step": 1102 + }, + { + "epoch": 0.14, + "learning_rate": 9.69614419159072e-06, + "loss": 0.598, + "step": 1103 + }, + { + "epoch": 0.14, + "learning_rate": 9.695446840096945e-06, + "loss": 0.5982, + "step": 1104 + }, + { + "epoch": 0.14, + "learning_rate": 9.69474871444184e-06, + "loss": 0.6039, + "step": 1105 + }, + { + "epoch": 0.14, + "learning_rate": 9.694049814740509e-06, + "loss": 0.5523, + "step": 1106 + }, + { + "epoch": 0.14, + "learning_rate": 9.693350141108182e-06, + "loss": 0.5745, + "step": 1107 + }, + { + "epoch": 0.14, + "learning_rate": 9.69264969366022e-06, + "loss": 0.6449, + "step": 1108 + }, + { + "epoch": 0.14, + "learning_rate": 9.691948472512108e-06, + "loss": 0.6722, + "step": 1109 + }, + { + "epoch": 0.14, + "learning_rate": 9.69124647777946e-06, + "loss": 0.547, + "step": 1110 + }, + { + "epoch": 0.14, + "learning_rate": 9.690543709578017e-06, + "loss": 0.5359, + "step": 1111 + }, + { + "epoch": 0.14, + "learning_rate": 9.689840168023648e-06, + "loss": 0.5755, + "step": 1112 + }, + { + "epoch": 0.14, + "learning_rate": 9.68913585323235e-06, + "loss": 0.5781, + "step": 1113 + }, + { + "epoch": 0.14, + "learning_rate": 9.688430765320245e-06, + "loss": 0.5948, + "step": 1114 + }, + { + "epoch": 0.14, + "learning_rate": 9.687724904403584e-06, + "loss": 0.6372, + "step": 1115 + }, + { + "epoch": 0.14, + "learning_rate": 9.687018270598749e-06, + "loss": 0.649, + "step": 1116 + }, + { + "epoch": 0.14, + "learning_rate": 9.686310864022246e-06, + "loss": 0.5485, + "step": 1117 + }, + { + "epoch": 0.14, + "learning_rate": 9.685602684790704e-06, + "loss": 0.2323, + "step": 1118 + }, + { + "epoch": 0.14, + "learning_rate": 9.684893733020888e-06, + "loss": 0.6618, + "step": 1119 + }, + { + "epoch": 0.14, + "learning_rate": 9.684184008829684e-06, + "loss": 0.601, + "step": 1120 + }, + { + "epoch": 0.14, + "learning_rate": 9.683473512334111e-06, + "loss": 0.6056, + "step": 1121 + }, + { + "epoch": 0.14, + "learning_rate": 9.682762243651308e-06, + "loss": 0.6344, + "step": 1122 + }, + { + "epoch": 0.14, + "learning_rate": 9.68205020289855e-06, + "loss": 0.643, + "step": 1123 + }, + { + "epoch": 0.14, + "learning_rate": 9.681337390193231e-06, + "loss": 0.6621, + "step": 1124 + }, + { + "epoch": 0.14, + "learning_rate": 9.680623805652877e-06, + "loss": 0.6043, + "step": 1125 + }, + { + "epoch": 0.14, + "learning_rate": 9.67990944939514e-06, + "loss": 0.6001, + "step": 1126 + }, + { + "epoch": 0.14, + "learning_rate": 9.679194321537799e-06, + "loss": 0.5521, + "step": 1127 + }, + { + "epoch": 0.14, + "learning_rate": 9.67847842219876e-06, + "loss": 0.5539, + "step": 1128 + }, + { + "epoch": 0.14, + "learning_rate": 9.67776175149606e-06, + "loss": 0.5549, + "step": 1129 + }, + { + "epoch": 0.14, + "learning_rate": 9.677044309547858e-06, + "loss": 0.5345, + "step": 1130 + }, + { + "epoch": 0.14, + "learning_rate": 9.676326096472442e-06, + "loss": 0.5963, + "step": 1131 + }, + { + "epoch": 0.14, + "learning_rate": 9.675607112388226e-06, + "loss": 0.5288, + "step": 1132 + }, + { + "epoch": 0.14, + "learning_rate": 9.674887357413755e-06, + "loss": 0.5966, + "step": 1133 + }, + { + "epoch": 0.14, + "learning_rate": 9.674166831667697e-06, + "loss": 0.5816, + "step": 1134 + }, + { + "epoch": 0.14, + "learning_rate": 9.67344553526885e-06, + "loss": 0.591, + "step": 1135 + }, + { + "epoch": 0.14, + "learning_rate": 9.672723468336135e-06, + "loss": 0.6065, + "step": 1136 + }, + { + "epoch": 0.14, + "learning_rate": 9.672000630988607e-06, + "loss": 0.6008, + "step": 1137 + }, + { + "epoch": 0.14, + "learning_rate": 9.67127702334544e-06, + "loss": 0.5678, + "step": 1138 + }, + { + "epoch": 0.14, + "learning_rate": 9.670552645525939e-06, + "loss": 0.5574, + "step": 1139 + }, + { + "epoch": 0.14, + "learning_rate": 9.669827497649536e-06, + "loss": 0.576, + "step": 1140 + }, + { + "epoch": 0.14, + "learning_rate": 9.669101579835792e-06, + "loss": 0.6102, + "step": 1141 + }, + { + "epoch": 0.14, + "learning_rate": 9.66837489220439e-06, + "loss": 0.5629, + "step": 1142 + }, + { + "epoch": 0.14, + "learning_rate": 9.667647434875145e-06, + "loss": 0.6434, + "step": 1143 + }, + { + "epoch": 0.14, + "learning_rate": 9.666919207967992e-06, + "loss": 0.6199, + "step": 1144 + }, + { + "epoch": 0.14, + "learning_rate": 9.666190211603005e-06, + "loss": 0.6003, + "step": 1145 + }, + { + "epoch": 0.14, + "learning_rate": 9.66546044590037e-06, + "loss": 0.6506, + "step": 1146 + }, + { + "epoch": 0.14, + "learning_rate": 9.664729910980408e-06, + "loss": 0.5973, + "step": 1147 + }, + { + "epoch": 0.14, + "learning_rate": 9.663998606963568e-06, + "loss": 0.5636, + "step": 1148 + }, + { + "epoch": 0.14, + "learning_rate": 9.663266533970424e-06, + "loss": 0.5777, + "step": 1149 + }, + { + "epoch": 0.14, + "learning_rate": 9.662533692121676e-06, + "loss": 0.605, + "step": 1150 + }, + { + "epoch": 0.14, + "learning_rate": 9.661800081538149e-06, + "loss": 0.5716, + "step": 1151 + }, + { + "epoch": 0.14, + "learning_rate": 9.661065702340801e-06, + "loss": 0.5989, + "step": 1152 + }, + { + "epoch": 0.14, + "learning_rate": 9.66033055465071e-06, + "loss": 0.5976, + "step": 1153 + }, + { + "epoch": 0.14, + "learning_rate": 9.659594638589083e-06, + "loss": 0.5076, + "step": 1154 + }, + { + "epoch": 0.14, + "learning_rate": 9.658857954277255e-06, + "loss": 0.5535, + "step": 1155 + }, + { + "epoch": 0.14, + "learning_rate": 9.658120501836685e-06, + "loss": 0.5957, + "step": 1156 + }, + { + "epoch": 0.15, + "learning_rate": 9.657382281388966e-06, + "loss": 0.5535, + "step": 1157 + }, + { + "epoch": 0.15, + "learning_rate": 9.656643293055804e-06, + "loss": 0.2209, + "step": 1158 + }, + { + "epoch": 0.15, + "learning_rate": 9.655903536959046e-06, + "loss": 0.5736, + "step": 1159 + }, + { + "epoch": 0.15, + "learning_rate": 9.655163013220657e-06, + "loss": 0.6224, + "step": 1160 + }, + { + "epoch": 0.15, + "learning_rate": 9.65442172196273e-06, + "loss": 0.5955, + "step": 1161 + }, + { + "epoch": 0.15, + "learning_rate": 9.653679663307487e-06, + "loss": 0.6299, + "step": 1162 + }, + { + "epoch": 0.15, + "learning_rate": 9.652936837377272e-06, + "loss": 0.5661, + "step": 1163 + }, + { + "epoch": 0.15, + "learning_rate": 9.652193244294563e-06, + "loss": 0.5894, + "step": 1164 + }, + { + "epoch": 0.15, + "learning_rate": 9.651448884181954e-06, + "loss": 0.6168, + "step": 1165 + }, + { + "epoch": 0.15, + "learning_rate": 9.650703757162176e-06, + "loss": 0.6442, + "step": 1166 + }, + { + "epoch": 0.15, + "learning_rate": 9.64995786335808e-06, + "loss": 0.6317, + "step": 1167 + }, + { + "epoch": 0.15, + "learning_rate": 9.649211202892645e-06, + "loss": 0.525, + "step": 1168 + }, + { + "epoch": 0.15, + "learning_rate": 9.64846377588898e-06, + "loss": 0.6275, + "step": 1169 + }, + { + "epoch": 0.15, + "learning_rate": 9.647715582470309e-06, + "loss": 0.2202, + "step": 1170 + }, + { + "epoch": 0.15, + "learning_rate": 9.646966622759996e-06, + "loss": 0.6144, + "step": 1171 + }, + { + "epoch": 0.15, + "learning_rate": 9.646216896881527e-06, + "loss": 0.5787, + "step": 1172 + }, + { + "epoch": 0.15, + "learning_rate": 9.64546640495851e-06, + "loss": 0.5999, + "step": 1173 + }, + { + "epoch": 0.15, + "learning_rate": 9.644715147114683e-06, + "loss": 0.5925, + "step": 1174 + }, + { + "epoch": 0.15, + "learning_rate": 9.64396312347391e-06, + "loss": 0.5382, + "step": 1175 + }, + { + "epoch": 0.15, + "learning_rate": 9.643210334160178e-06, + "loss": 0.5639, + "step": 1176 + }, + { + "epoch": 0.15, + "learning_rate": 9.642456779297607e-06, + "loss": 0.538, + "step": 1177 + }, + { + "epoch": 0.15, + "learning_rate": 9.641702459010439e-06, + "loss": 0.5687, + "step": 1178 + }, + { + "epoch": 0.15, + "learning_rate": 9.64094737342304e-06, + "loss": 0.597, + "step": 1179 + }, + { + "epoch": 0.15, + "learning_rate": 9.640191522659906e-06, + "loss": 0.6073, + "step": 1180 + }, + { + "epoch": 0.15, + "learning_rate": 9.639434906845658e-06, + "loss": 0.5982, + "step": 1181 + }, + { + "epoch": 0.15, + "learning_rate": 9.638677526105042e-06, + "loss": 0.6213, + "step": 1182 + }, + { + "epoch": 0.15, + "learning_rate": 9.637919380562933e-06, + "loss": 0.6182, + "step": 1183 + }, + { + "epoch": 0.15, + "learning_rate": 9.63716047034433e-06, + "loss": 0.576, + "step": 1184 + }, + { + "epoch": 0.15, + "learning_rate": 9.636400795574354e-06, + "loss": 0.5786, + "step": 1185 + }, + { + "epoch": 0.15, + "learning_rate": 9.635640356378261e-06, + "loss": 0.5815, + "step": 1186 + }, + { + "epoch": 0.15, + "learning_rate": 9.634879152881426e-06, + "loss": 0.6119, + "step": 1187 + }, + { + "epoch": 0.15, + "learning_rate": 9.634117185209356e-06, + "loss": 0.6051, + "step": 1188 + }, + { + "epoch": 0.15, + "learning_rate": 9.633354453487673e-06, + "loss": 0.6524, + "step": 1189 + }, + { + "epoch": 0.15, + "learning_rate": 9.63259095784214e-06, + "loss": 0.6196, + "step": 1190 + }, + { + "epoch": 0.15, + "learning_rate": 9.631826698398634e-06, + "loss": 0.5756, + "step": 1191 + }, + { + "epoch": 0.15, + "learning_rate": 9.631061675283165e-06, + "loss": 0.602, + "step": 1192 + }, + { + "epoch": 0.15, + "learning_rate": 9.630295888621863e-06, + "loss": 0.5395, + "step": 1193 + }, + { + "epoch": 0.15, + "learning_rate": 9.629529338540987e-06, + "loss": 0.5845, + "step": 1194 + }, + { + "epoch": 0.15, + "learning_rate": 9.628762025166927e-06, + "loss": 0.5916, + "step": 1195 + }, + { + "epoch": 0.15, + "learning_rate": 9.627993948626187e-06, + "loss": 0.5639, + "step": 1196 + }, + { + "epoch": 0.15, + "learning_rate": 9.627225109045407e-06, + "loss": 0.6664, + "step": 1197 + }, + { + "epoch": 0.15, + "learning_rate": 9.62645550655135e-06, + "loss": 0.6183, + "step": 1198 + }, + { + "epoch": 0.15, + "learning_rate": 9.625685141270902e-06, + "loss": 0.6215, + "step": 1199 + }, + { + "epoch": 0.15, + "learning_rate": 9.624914013331077e-06, + "loss": 0.5576, + "step": 1200 + }, + { + "epoch": 0.15, + "learning_rate": 9.624142122859017e-06, + "loss": 0.5854, + "step": 1201 + }, + { + "epoch": 0.15, + "learning_rate": 9.623369469981986e-06, + "loss": 0.5723, + "step": 1202 + }, + { + "epoch": 0.15, + "learning_rate": 9.622596054827374e-06, + "loss": 0.6161, + "step": 1203 + }, + { + "epoch": 0.15, + "learning_rate": 9.6218218775227e-06, + "loss": 0.5832, + "step": 1204 + }, + { + "epoch": 0.15, + "learning_rate": 9.621046938195604e-06, + "loss": 0.6096, + "step": 1205 + }, + { + "epoch": 0.15, + "learning_rate": 9.620271236973856e-06, + "loss": 0.5506, + "step": 1206 + }, + { + "epoch": 0.15, + "learning_rate": 9.61949477398535e-06, + "loss": 0.5643, + "step": 1207 + }, + { + "epoch": 0.15, + "learning_rate": 9.6187175493581e-06, + "loss": 0.5967, + "step": 1208 + }, + { + "epoch": 0.15, + "learning_rate": 9.617939563220259e-06, + "loss": 0.5728, + "step": 1209 + }, + { + "epoch": 0.15, + "learning_rate": 9.617160815700091e-06, + "loss": 0.6101, + "step": 1210 + }, + { + "epoch": 0.15, + "learning_rate": 9.616381306925996e-06, + "loss": 0.5931, + "step": 1211 + }, + { + "epoch": 0.15, + "learning_rate": 9.615601037026493e-06, + "loss": 0.5843, + "step": 1212 + }, + { + "epoch": 0.15, + "learning_rate": 9.61482000613023e-06, + "loss": 0.5594, + "step": 1213 + }, + { + "epoch": 0.15, + "learning_rate": 9.614038214365977e-06, + "loss": 0.5564, + "step": 1214 + }, + { + "epoch": 0.15, + "learning_rate": 9.613255661862636e-06, + "loss": 0.5546, + "step": 1215 + }, + { + "epoch": 0.15, + "learning_rate": 9.612472348749226e-06, + "loss": 0.5477, + "step": 1216 + }, + { + "epoch": 0.15, + "learning_rate": 9.611688275154901e-06, + "loss": 0.6207, + "step": 1217 + }, + { + "epoch": 0.15, + "learning_rate": 9.61090344120893e-06, + "loss": 0.653, + "step": 1218 + }, + { + "epoch": 0.15, + "learning_rate": 9.610117847040714e-06, + "loss": 0.5868, + "step": 1219 + }, + { + "epoch": 0.15, + "learning_rate": 9.609331492779777e-06, + "loss": 0.5737, + "step": 1220 + }, + { + "epoch": 0.15, + "learning_rate": 9.608544378555772e-06, + "loss": 0.6006, + "step": 1221 + }, + { + "epoch": 0.15, + "learning_rate": 9.60775650449847e-06, + "loss": 0.6171, + "step": 1222 + }, + { + "epoch": 0.15, + "learning_rate": 9.606967870737779e-06, + "loss": 0.6366, + "step": 1223 + }, + { + "epoch": 0.15, + "learning_rate": 9.606178477403716e-06, + "loss": 0.5323, + "step": 1224 + }, + { + "epoch": 0.15, + "learning_rate": 9.605388324626438e-06, + "loss": 0.5681, + "step": 1225 + }, + { + "epoch": 0.15, + "learning_rate": 9.604597412536216e-06, + "loss": 0.6459, + "step": 1226 + }, + { + "epoch": 0.15, + "learning_rate": 9.603805741263458e-06, + "loss": 0.5842, + "step": 1227 + }, + { + "epoch": 0.15, + "learning_rate": 9.603013310938688e-06, + "loss": 0.6114, + "step": 1228 + }, + { + "epoch": 0.15, + "learning_rate": 9.602220121692558e-06, + "loss": 0.5665, + "step": 1229 + }, + { + "epoch": 0.15, + "learning_rate": 9.601426173655844e-06, + "loss": 0.5707, + "step": 1230 + }, + { + "epoch": 0.15, + "learning_rate": 9.600631466959447e-06, + "loss": 0.246, + "step": 1231 + }, + { + "epoch": 0.15, + "learning_rate": 9.599836001734397e-06, + "loss": 0.5636, + "step": 1232 + }, + { + "epoch": 0.15, + "learning_rate": 9.599039778111847e-06, + "loss": 0.6342, + "step": 1233 + }, + { + "epoch": 0.15, + "learning_rate": 9.598242796223072e-06, + "loss": 0.6215, + "step": 1234 + }, + { + "epoch": 0.15, + "learning_rate": 9.597445056199474e-06, + "loss": 0.5909, + "step": 1235 + }, + { + "epoch": 0.15, + "learning_rate": 9.59664655817258e-06, + "loss": 0.6338, + "step": 1236 + }, + { + "epoch": 0.16, + "learning_rate": 9.595847302274047e-06, + "loss": 0.6892, + "step": 1237 + }, + { + "epoch": 0.16, + "learning_rate": 9.595047288635646e-06, + "loss": 0.5701, + "step": 1238 + }, + { + "epoch": 0.16, + "learning_rate": 9.59424651738928e-06, + "loss": 0.5797, + "step": 1239 + }, + { + "epoch": 0.16, + "learning_rate": 9.59344498866698e-06, + "loss": 0.5589, + "step": 1240 + }, + { + "epoch": 0.16, + "learning_rate": 9.592642702600896e-06, + "loss": 0.5737, + "step": 1241 + }, + { + "epoch": 0.16, + "learning_rate": 9.591839659323306e-06, + "loss": 0.5822, + "step": 1242 + }, + { + "epoch": 0.16, + "learning_rate": 9.591035858966609e-06, + "loss": 0.624, + "step": 1243 + }, + { + "epoch": 0.16, + "learning_rate": 9.590231301663334e-06, + "loss": 0.5667, + "step": 1244 + }, + { + "epoch": 0.16, + "learning_rate": 9.589425987546128e-06, + "loss": 0.7111, + "step": 1245 + }, + { + "epoch": 0.16, + "learning_rate": 9.588619916747772e-06, + "loss": 0.5809, + "step": 1246 + }, + { + "epoch": 0.16, + "learning_rate": 9.587813089401165e-06, + "loss": 0.566, + "step": 1247 + }, + { + "epoch": 0.16, + "learning_rate": 9.587005505639334e-06, + "loss": 0.6091, + "step": 1248 + }, + { + "epoch": 0.16, + "learning_rate": 9.586197165595427e-06, + "loss": 0.5194, + "step": 1249 + }, + { + "epoch": 0.16, + "learning_rate": 9.58538806940272e-06, + "loss": 0.6092, + "step": 1250 + }, + { + "epoch": 0.16, + "learning_rate": 9.584578217194614e-06, + "loss": 0.5704, + "step": 1251 + }, + { + "epoch": 0.16, + "learning_rate": 9.58376760910463e-06, + "loss": 0.5224, + "step": 1252 + }, + { + "epoch": 0.16, + "learning_rate": 9.582956245266419e-06, + "loss": 0.5527, + "step": 1253 + }, + { + "epoch": 0.16, + "learning_rate": 9.582144125813756e-06, + "loss": 0.598, + "step": 1254 + }, + { + "epoch": 0.16, + "learning_rate": 9.581331250880535e-06, + "loss": 0.6015, + "step": 1255 + }, + { + "epoch": 0.16, + "learning_rate": 9.580517620600782e-06, + "loss": 0.5808, + "step": 1256 + }, + { + "epoch": 0.16, + "learning_rate": 9.579703235108644e-06, + "loss": 0.6045, + "step": 1257 + }, + { + "epoch": 0.16, + "learning_rate": 9.578888094538392e-06, + "loss": 0.5608, + "step": 1258 + }, + { + "epoch": 0.16, + "learning_rate": 9.578072199024424e-06, + "loss": 0.5663, + "step": 1259 + }, + { + "epoch": 0.16, + "learning_rate": 9.577255548701256e-06, + "loss": 0.557, + "step": 1260 + }, + { + "epoch": 0.16, + "learning_rate": 9.576438143703537e-06, + "loss": 0.5503, + "step": 1261 + }, + { + "epoch": 0.16, + "learning_rate": 9.575619984166037e-06, + "loss": 0.5756, + "step": 1262 + }, + { + "epoch": 0.16, + "learning_rate": 9.574801070223649e-06, + "loss": 0.6098, + "step": 1263 + }, + { + "epoch": 0.16, + "learning_rate": 9.573981402011388e-06, + "loss": 0.5882, + "step": 1264 + }, + { + "epoch": 0.16, + "learning_rate": 9.573160979664403e-06, + "loss": 0.6156, + "step": 1265 + }, + { + "epoch": 0.16, + "learning_rate": 9.572339803317956e-06, + "loss": 0.585, + "step": 1266 + }, + { + "epoch": 0.16, + "learning_rate": 9.571517873107442e-06, + "loss": 0.5871, + "step": 1267 + }, + { + "epoch": 0.16, + "learning_rate": 9.570695189168372e-06, + "loss": 0.6065, + "step": 1268 + }, + { + "epoch": 0.16, + "learning_rate": 9.56987175163639e-06, + "loss": 0.5625, + "step": 1269 + }, + { + "epoch": 0.16, + "learning_rate": 9.56904756064726e-06, + "loss": 0.5938, + "step": 1270 + }, + { + "epoch": 0.16, + "learning_rate": 9.56822261633687e-06, + "loss": 0.6505, + "step": 1271 + }, + { + "epoch": 0.16, + "learning_rate": 9.56739691884123e-06, + "loss": 0.5353, + "step": 1272 + }, + { + "epoch": 0.16, + "learning_rate": 9.566570468296479e-06, + "loss": 0.5633, + "step": 1273 + }, + { + "epoch": 0.16, + "learning_rate": 9.565743264838878e-06, + "loss": 0.5816, + "step": 1274 + }, + { + "epoch": 0.16, + "learning_rate": 9.564915308604813e-06, + "loss": 0.5973, + "step": 1275 + }, + { + "epoch": 0.16, + "learning_rate": 9.564086599730791e-06, + "loss": 0.661, + "step": 1276 + }, + { + "epoch": 0.16, + "learning_rate": 9.563257138353446e-06, + "loss": 0.2084, + "step": 1277 + }, + { + "epoch": 0.16, + "learning_rate": 9.562426924609535e-06, + "loss": 0.5892, + "step": 1278 + }, + { + "epoch": 0.16, + "learning_rate": 9.561595958635941e-06, + "loss": 0.6019, + "step": 1279 + }, + { + "epoch": 0.16, + "learning_rate": 9.56076424056967e-06, + "loss": 0.6136, + "step": 1280 + }, + { + "epoch": 0.16, + "learning_rate": 9.559931770547849e-06, + "loss": 0.5402, + "step": 1281 + }, + { + "epoch": 0.16, + "learning_rate": 9.55909854870773e-06, + "loss": 0.5541, + "step": 1282 + }, + { + "epoch": 0.16, + "learning_rate": 9.558264575186696e-06, + "loss": 0.5632, + "step": 1283 + }, + { + "epoch": 0.16, + "learning_rate": 9.557429850122244e-06, + "loss": 0.5862, + "step": 1284 + }, + { + "epoch": 0.16, + "learning_rate": 9.556594373652e-06, + "loss": 0.6551, + "step": 1285 + }, + { + "epoch": 0.16, + "learning_rate": 9.555758145913712e-06, + "loss": 0.5544, + "step": 1286 + }, + { + "epoch": 0.16, + "learning_rate": 9.554921167045256e-06, + "loss": 0.5859, + "step": 1287 + }, + { + "epoch": 0.16, + "learning_rate": 9.554083437184627e-06, + "loss": 0.6028, + "step": 1288 + }, + { + "epoch": 0.16, + "learning_rate": 9.553244956469945e-06, + "loss": 0.6496, + "step": 1289 + }, + { + "epoch": 0.16, + "learning_rate": 9.552405725039455e-06, + "loss": 0.6026, + "step": 1290 + }, + { + "epoch": 0.16, + "learning_rate": 9.551565743031526e-06, + "loss": 0.5968, + "step": 1291 + }, + { + "epoch": 0.16, + "learning_rate": 9.550725010584649e-06, + "loss": 0.5667, + "step": 1292 + }, + { + "epoch": 0.16, + "learning_rate": 9.549883527837441e-06, + "loss": 0.2367, + "step": 1293 + }, + { + "epoch": 0.16, + "learning_rate": 9.549041294928639e-06, + "loss": 0.6017, + "step": 1294 + }, + { + "epoch": 0.16, + "learning_rate": 9.548198311997108e-06, + "loss": 0.5771, + "step": 1295 + }, + { + "epoch": 0.16, + "learning_rate": 9.547354579181834e-06, + "loss": 0.5859, + "step": 1296 + }, + { + "epoch": 0.16, + "learning_rate": 9.546510096621927e-06, + "loss": 0.5851, + "step": 1297 + }, + { + "epoch": 0.16, + "learning_rate": 9.545664864456622e-06, + "loss": 0.542, + "step": 1298 + }, + { + "epoch": 0.16, + "learning_rate": 9.544818882825278e-06, + "loss": 0.5598, + "step": 1299 + }, + { + "epoch": 0.16, + "learning_rate": 9.543972151867372e-06, + "loss": 0.5639, + "step": 1300 + }, + { + "epoch": 0.16, + "learning_rate": 9.543124671722512e-06, + "loss": 0.6218, + "step": 1301 + }, + { + "epoch": 0.16, + "learning_rate": 9.542276442530423e-06, + "loss": 0.5281, + "step": 1302 + }, + { + "epoch": 0.16, + "learning_rate": 9.54142746443096e-06, + "loss": 0.5628, + "step": 1303 + }, + { + "epoch": 0.16, + "learning_rate": 9.540577737564098e-06, + "loss": 0.5585, + "step": 1304 + }, + { + "epoch": 0.16, + "learning_rate": 9.539727262069934e-06, + "loss": 0.6091, + "step": 1305 + }, + { + "epoch": 0.16, + "learning_rate": 9.53887603808869e-06, + "loss": 0.5747, + "step": 1306 + }, + { + "epoch": 0.16, + "learning_rate": 9.538024065760714e-06, + "loss": 0.6193, + "step": 1307 + }, + { + "epoch": 0.16, + "learning_rate": 9.53717134522647e-06, + "loss": 0.5733, + "step": 1308 + }, + { + "epoch": 0.16, + "learning_rate": 9.536317876626556e-06, + "loss": 0.5416, + "step": 1309 + }, + { + "epoch": 0.16, + "learning_rate": 9.535463660101682e-06, + "loss": 0.6018, + "step": 1310 + }, + { + "epoch": 0.16, + "learning_rate": 9.534608695792692e-06, + "loss": 0.6455, + "step": 1311 + }, + { + "epoch": 0.16, + "learning_rate": 9.533752983840546e-06, + "loss": 0.5938, + "step": 1312 + }, + { + "epoch": 0.16, + "learning_rate": 9.532896524386328e-06, + "loss": 0.5923, + "step": 1313 + }, + { + "epoch": 0.16, + "learning_rate": 9.532039317571247e-06, + "loss": 0.2178, + "step": 1314 + }, + { + "epoch": 0.16, + "learning_rate": 9.531181363536637e-06, + "loss": 0.6203, + "step": 1315 + }, + { + "epoch": 0.16, + "learning_rate": 9.530322662423952e-06, + "loss": 0.6078, + "step": 1316 + }, + { + "epoch": 0.17, + "learning_rate": 9.52946321437477e-06, + "loss": 0.6257, + "step": 1317 + }, + { + "epoch": 0.17, + "learning_rate": 9.528603019530793e-06, + "loss": 0.5668, + "step": 1318 + }, + { + "epoch": 0.17, + "learning_rate": 9.527742078033844e-06, + "loss": 0.5393, + "step": 1319 + }, + { + "epoch": 0.17, + "learning_rate": 9.526880390025872e-06, + "loss": 0.5076, + "step": 1320 + }, + { + "epoch": 0.17, + "learning_rate": 9.526017955648947e-06, + "loss": 0.6417, + "step": 1321 + }, + { + "epoch": 0.17, + "learning_rate": 9.525154775045264e-06, + "loss": 0.5655, + "step": 1322 + }, + { + "epoch": 0.17, + "learning_rate": 9.524290848357138e-06, + "loss": 0.5811, + "step": 1323 + }, + { + "epoch": 0.17, + "learning_rate": 9.523426175727011e-06, + "loss": 0.5634, + "step": 1324 + }, + { + "epoch": 0.17, + "learning_rate": 9.522560757297445e-06, + "loss": 0.5825, + "step": 1325 + }, + { + "epoch": 0.17, + "learning_rate": 9.521694593211123e-06, + "loss": 0.5704, + "step": 1326 + }, + { + "epoch": 0.17, + "learning_rate": 9.520827683610858e-06, + "loss": 0.5702, + "step": 1327 + }, + { + "epoch": 0.17, + "learning_rate": 9.519960028639579e-06, + "loss": 0.5745, + "step": 1328 + }, + { + "epoch": 0.17, + "learning_rate": 9.519091628440343e-06, + "loss": 0.5791, + "step": 1329 + }, + { + "epoch": 0.17, + "learning_rate": 9.518222483156326e-06, + "loss": 0.5586, + "step": 1330 + }, + { + "epoch": 0.17, + "learning_rate": 9.517352592930824e-06, + "loss": 0.6258, + "step": 1331 + }, + { + "epoch": 0.17, + "learning_rate": 9.516481957907267e-06, + "loss": 0.5046, + "step": 1332 + }, + { + "epoch": 0.17, + "learning_rate": 9.515610578229198e-06, + "loss": 0.5835, + "step": 1333 + }, + { + "epoch": 0.17, + "learning_rate": 9.514738454040285e-06, + "loss": 0.566, + "step": 1334 + }, + { + "epoch": 0.17, + "learning_rate": 9.51386558548432e-06, + "loss": 0.5709, + "step": 1335 + }, + { + "epoch": 0.17, + "learning_rate": 9.512991972705217e-06, + "loss": 0.6079, + "step": 1336 + }, + { + "epoch": 0.17, + "learning_rate": 9.512117615847012e-06, + "loss": 0.592, + "step": 1337 + }, + { + "epoch": 0.17, + "learning_rate": 9.511242515053867e-06, + "loss": 0.6035, + "step": 1338 + }, + { + "epoch": 0.17, + "learning_rate": 9.510366670470061e-06, + "loss": 0.597, + "step": 1339 + }, + { + "epoch": 0.17, + "learning_rate": 9.509490082240001e-06, + "loss": 0.5377, + "step": 1340 + }, + { + "epoch": 0.17, + "learning_rate": 9.508612750508212e-06, + "loss": 0.5842, + "step": 1341 + }, + { + "epoch": 0.17, + "learning_rate": 9.50773467541935e-06, + "loss": 0.6179, + "step": 1342 + }, + { + "epoch": 0.17, + "learning_rate": 9.506855857118178e-06, + "loss": 0.5879, + "step": 1343 + }, + { + "epoch": 0.17, + "learning_rate": 9.5059762957496e-06, + "loss": 0.5795, + "step": 1344 + }, + { + "epoch": 0.17, + "learning_rate": 9.505095991458629e-06, + "loss": 0.5118, + "step": 1345 + }, + { + "epoch": 0.17, + "learning_rate": 9.504214944390404e-06, + "loss": 0.5494, + "step": 1346 + }, + { + "epoch": 0.17, + "learning_rate": 9.503333154690191e-06, + "loss": 0.5195, + "step": 1347 + }, + { + "epoch": 0.17, + "learning_rate": 9.502450622503374e-06, + "loss": 0.6012, + "step": 1348 + }, + { + "epoch": 0.17, + "learning_rate": 9.501567347975458e-06, + "loss": 0.6236, + "step": 1349 + }, + { + "epoch": 0.17, + "learning_rate": 9.500683331252077e-06, + "loss": 0.6263, + "step": 1350 + }, + { + "epoch": 0.17, + "learning_rate": 9.49979857247898e-06, + "loss": 0.5288, + "step": 1351 + }, + { + "epoch": 0.17, + "learning_rate": 9.498913071802043e-06, + "loss": 0.5638, + "step": 1352 + }, + { + "epoch": 0.17, + "learning_rate": 9.498026829367261e-06, + "loss": 0.6752, + "step": 1353 + }, + { + "epoch": 0.17, + "learning_rate": 9.497139845320756e-06, + "loss": 0.5721, + "step": 1354 + }, + { + "epoch": 0.17, + "learning_rate": 9.496252119808766e-06, + "loss": 0.5671, + "step": 1355 + }, + { + "epoch": 0.17, + "learning_rate": 9.495363652977656e-06, + "loss": 0.5732, + "step": 1356 + }, + { + "epoch": 0.17, + "learning_rate": 9.494474444973913e-06, + "loss": 0.6018, + "step": 1357 + }, + { + "epoch": 0.17, + "learning_rate": 9.493584495944146e-06, + "loss": 0.6579, + "step": 1358 + }, + { + "epoch": 0.17, + "learning_rate": 9.492693806035083e-06, + "loss": 0.6168, + "step": 1359 + }, + { + "epoch": 0.17, + "learning_rate": 9.491802375393576e-06, + "loss": 0.6192, + "step": 1360 + }, + { + "epoch": 0.17, + "learning_rate": 9.4909102041666e-06, + "loss": 0.5928, + "step": 1361 + }, + { + "epoch": 0.17, + "learning_rate": 9.490017292501255e-06, + "loss": 0.5543, + "step": 1362 + }, + { + "epoch": 0.17, + "learning_rate": 9.489123640544755e-06, + "loss": 0.5395, + "step": 1363 + }, + { + "epoch": 0.17, + "learning_rate": 9.488229248444443e-06, + "loss": 0.5454, + "step": 1364 + }, + { + "epoch": 0.17, + "learning_rate": 9.487334116347781e-06, + "loss": 0.5665, + "step": 1365 + }, + { + "epoch": 0.17, + "learning_rate": 9.486438244402355e-06, + "loss": 0.5925, + "step": 1366 + }, + { + "epoch": 0.17, + "learning_rate": 9.48554163275587e-06, + "loss": 0.6278, + "step": 1367 + }, + { + "epoch": 0.17, + "learning_rate": 9.484644281556157e-06, + "loss": 0.6282, + "step": 1368 + }, + { + "epoch": 0.17, + "learning_rate": 9.483746190951167e-06, + "loss": 0.6337, + "step": 1369 + }, + { + "epoch": 0.17, + "learning_rate": 9.48284736108897e-06, + "loss": 0.5457, + "step": 1370 + }, + { + "epoch": 0.17, + "learning_rate": 9.48194779211776e-06, + "loss": 0.5231, + "step": 1371 + }, + { + "epoch": 0.17, + "learning_rate": 9.481047484185855e-06, + "loss": 0.6195, + "step": 1372 + }, + { + "epoch": 0.17, + "learning_rate": 9.480146437441695e-06, + "loss": 0.5481, + "step": 1373 + }, + { + "epoch": 0.17, + "learning_rate": 9.47924465203384e-06, + "loss": 0.6382, + "step": 1374 + }, + { + "epoch": 0.17, + "learning_rate": 9.478342128110967e-06, + "loss": 0.5852, + "step": 1375 + }, + { + "epoch": 0.17, + "learning_rate": 9.477438865821882e-06, + "loss": 0.5508, + "step": 1376 + }, + { + "epoch": 0.17, + "learning_rate": 9.476534865315513e-06, + "loss": 0.5844, + "step": 1377 + }, + { + "epoch": 0.17, + "learning_rate": 9.475630126740903e-06, + "loss": 0.6208, + "step": 1378 + }, + { + "epoch": 0.17, + "learning_rate": 9.474724650247222e-06, + "loss": 0.5061, + "step": 1379 + }, + { + "epoch": 0.17, + "learning_rate": 9.473818435983763e-06, + "loss": 0.5733, + "step": 1380 + }, + { + "epoch": 0.17, + "learning_rate": 9.472911484099935e-06, + "loss": 0.5893, + "step": 1381 + }, + { + "epoch": 0.17, + "learning_rate": 9.472003794745273e-06, + "loss": 0.6065, + "step": 1382 + }, + { + "epoch": 0.17, + "learning_rate": 9.471095368069429e-06, + "loss": 0.2379, + "step": 1383 + }, + { + "epoch": 0.17, + "learning_rate": 9.470186204222184e-06, + "loss": 0.5141, + "step": 1384 + }, + { + "epoch": 0.17, + "learning_rate": 9.469276303353436e-06, + "loss": 0.6672, + "step": 1385 + }, + { + "epoch": 0.17, + "learning_rate": 9.468365665613201e-06, + "loss": 0.4994, + "step": 1386 + }, + { + "epoch": 0.17, + "learning_rate": 9.467454291151626e-06, + "loss": 0.6148, + "step": 1387 + }, + { + "epoch": 0.17, + "learning_rate": 9.466542180118968e-06, + "loss": 0.5891, + "step": 1388 + }, + { + "epoch": 0.17, + "learning_rate": 9.465629332665613e-06, + "loss": 0.574, + "step": 1389 + }, + { + "epoch": 0.17, + "learning_rate": 9.46471574894207e-06, + "loss": 0.5718, + "step": 1390 + }, + { + "epoch": 0.17, + "learning_rate": 9.463801429098961e-06, + "loss": 0.5943, + "step": 1391 + }, + { + "epoch": 0.17, + "learning_rate": 9.46288637328704e-06, + "loss": 0.5193, + "step": 1392 + }, + { + "epoch": 0.17, + "learning_rate": 9.46197058165717e-06, + "loss": 0.5863, + "step": 1393 + }, + { + "epoch": 0.17, + "learning_rate": 9.461054054360348e-06, + "loss": 0.5558, + "step": 1394 + }, + { + "epoch": 0.17, + "learning_rate": 9.460136791547683e-06, + "loss": 0.6084, + "step": 1395 + }, + { + "epoch": 0.17, + "learning_rate": 9.45921879337041e-06, + "loss": 0.6026, + "step": 1396 + }, + { + "epoch": 0.18, + "learning_rate": 9.458300059979882e-06, + "loss": 0.5495, + "step": 1397 + }, + { + "epoch": 0.18, + "learning_rate": 9.457380591527579e-06, + "loss": 0.6072, + "step": 1398 + }, + { + "epoch": 0.18, + "learning_rate": 9.456460388165093e-06, + "loss": 0.5943, + "step": 1399 + }, + { + "epoch": 0.18, + "learning_rate": 9.455539450044147e-06, + "loss": 0.5667, + "step": 1400 + }, + { + "epoch": 0.18, + "learning_rate": 9.454617777316578e-06, + "loss": 0.5497, + "step": 1401 + }, + { + "epoch": 0.18, + "learning_rate": 9.453695370134348e-06, + "loss": 0.6111, + "step": 1402 + }, + { + "epoch": 0.18, + "learning_rate": 9.452772228649538e-06, + "loss": 0.5199, + "step": 1403 + }, + { + "epoch": 0.18, + "learning_rate": 9.45184835301435e-06, + "loss": 0.2083, + "step": 1404 + }, + { + "epoch": 0.18, + "learning_rate": 9.450923743381111e-06, + "loss": 0.5888, + "step": 1405 + }, + { + "epoch": 0.18, + "learning_rate": 9.449998399902263e-06, + "loss": 0.6167, + "step": 1406 + }, + { + "epoch": 0.18, + "learning_rate": 9.449072322730372e-06, + "loss": 0.6278, + "step": 1407 + }, + { + "epoch": 0.18, + "learning_rate": 9.448145512018128e-06, + "loss": 0.6051, + "step": 1408 + }, + { + "epoch": 0.18, + "learning_rate": 9.447217967918335e-06, + "loss": 0.6271, + "step": 1409 + }, + { + "epoch": 0.18, + "learning_rate": 9.446289690583924e-06, + "loss": 0.6733, + "step": 1410 + }, + { + "epoch": 0.18, + "learning_rate": 9.445360680167943e-06, + "loss": 0.5804, + "step": 1411 + }, + { + "epoch": 0.18, + "learning_rate": 9.444430936823564e-06, + "loss": 0.6189, + "step": 1412 + }, + { + "epoch": 0.18, + "learning_rate": 9.443500460704078e-06, + "loss": 0.5827, + "step": 1413 + }, + { + "epoch": 0.18, + "learning_rate": 9.442569251962896e-06, + "loss": 0.5906, + "step": 1414 + }, + { + "epoch": 0.18, + "learning_rate": 9.441637310753553e-06, + "loss": 0.5602, + "step": 1415 + }, + { + "epoch": 0.18, + "learning_rate": 9.440704637229702e-06, + "loss": 0.6038, + "step": 1416 + }, + { + "epoch": 0.18, + "learning_rate": 9.439771231545116e-06, + "loss": 0.6225, + "step": 1417 + }, + { + "epoch": 0.18, + "learning_rate": 9.438837093853691e-06, + "loss": 0.5307, + "step": 1418 + }, + { + "epoch": 0.18, + "learning_rate": 9.437902224309444e-06, + "loss": 0.5745, + "step": 1419 + }, + { + "epoch": 0.18, + "learning_rate": 9.436966623066511e-06, + "loss": 0.5699, + "step": 1420 + }, + { + "epoch": 0.18, + "learning_rate": 9.436030290279148e-06, + "loss": 0.5872, + "step": 1421 + }, + { + "epoch": 0.18, + "learning_rate": 9.435093226101733e-06, + "loss": 0.6075, + "step": 1422 + }, + { + "epoch": 0.18, + "learning_rate": 9.434155430688766e-06, + "loss": 0.2088, + "step": 1423 + }, + { + "epoch": 0.18, + "learning_rate": 9.433216904194864e-06, + "loss": 0.5878, + "step": 1424 + }, + { + "epoch": 0.18, + "learning_rate": 9.432277646774768e-06, + "loss": 0.5776, + "step": 1425 + }, + { + "epoch": 0.18, + "learning_rate": 9.431337658583338e-06, + "loss": 0.6157, + "step": 1426 + }, + { + "epoch": 0.18, + "learning_rate": 9.430396939775551e-06, + "loss": 0.5706, + "step": 1427 + }, + { + "epoch": 0.18, + "learning_rate": 9.429455490506512e-06, + "loss": 0.5717, + "step": 1428 + }, + { + "epoch": 0.18, + "learning_rate": 9.428513310931441e-06, + "loss": 0.5436, + "step": 1429 + }, + { + "epoch": 0.18, + "learning_rate": 9.42757040120568e-06, + "loss": 0.5731, + "step": 1430 + }, + { + "epoch": 0.18, + "learning_rate": 9.426626761484691e-06, + "loss": 0.5338, + "step": 1431 + }, + { + "epoch": 0.18, + "learning_rate": 9.425682391924058e-06, + "loss": 0.5935, + "step": 1432 + }, + { + "epoch": 0.18, + "learning_rate": 9.42473729267948e-06, + "loss": 0.5653, + "step": 1433 + }, + { + "epoch": 0.18, + "learning_rate": 9.423791463906784e-06, + "loss": 0.6284, + "step": 1434 + }, + { + "epoch": 0.18, + "learning_rate": 9.42284490576191e-06, + "loss": 0.206, + "step": 1435 + }, + { + "epoch": 0.18, + "learning_rate": 9.421897618400925e-06, + "loss": 0.5482, + "step": 1436 + }, + { + "epoch": 0.18, + "learning_rate": 9.420949601980011e-06, + "loss": 0.5853, + "step": 1437 + }, + { + "epoch": 0.18, + "learning_rate": 9.420000856655472e-06, + "loss": 0.6067, + "step": 1438 + }, + { + "epoch": 0.18, + "learning_rate": 9.419051382583732e-06, + "loss": 0.5908, + "step": 1439 + }, + { + "epoch": 0.18, + "learning_rate": 9.41810117992134e-06, + "loss": 0.5993, + "step": 1440 + }, + { + "epoch": 0.18, + "learning_rate": 9.417150248824953e-06, + "loss": 0.5789, + "step": 1441 + }, + { + "epoch": 0.18, + "learning_rate": 9.41619858945136e-06, + "loss": 0.607, + "step": 1442 + }, + { + "epoch": 0.18, + "learning_rate": 9.415246201957466e-06, + "loss": 0.5756, + "step": 1443 + }, + { + "epoch": 0.18, + "learning_rate": 9.414293086500295e-06, + "loss": 0.5875, + "step": 1444 + }, + { + "epoch": 0.18, + "learning_rate": 9.413339243236993e-06, + "loss": 0.5889, + "step": 1445 + }, + { + "epoch": 0.18, + "learning_rate": 9.412384672324822e-06, + "loss": 0.5812, + "step": 1446 + }, + { + "epoch": 0.18, + "learning_rate": 9.411429373921169e-06, + "loss": 0.5396, + "step": 1447 + }, + { + "epoch": 0.18, + "learning_rate": 9.410473348183539e-06, + "loss": 0.6726, + "step": 1448 + }, + { + "epoch": 0.18, + "learning_rate": 9.409516595269555e-06, + "loss": 0.5262, + "step": 1449 + }, + { + "epoch": 0.18, + "learning_rate": 9.40855911533696e-06, + "loss": 0.5807, + "step": 1450 + }, + { + "epoch": 0.18, + "learning_rate": 9.407600908543626e-06, + "loss": 0.6224, + "step": 1451 + }, + { + "epoch": 0.18, + "learning_rate": 9.406641975047528e-06, + "loss": 0.6171, + "step": 1452 + }, + { + "epoch": 0.18, + "learning_rate": 9.405682315006777e-06, + "loss": 0.5939, + "step": 1453 + }, + { + "epoch": 0.18, + "learning_rate": 9.404721928579591e-06, + "loss": 0.2416, + "step": 1454 + }, + { + "epoch": 0.18, + "learning_rate": 9.403760815924317e-06, + "loss": 0.6442, + "step": 1455 + }, + { + "epoch": 0.18, + "learning_rate": 9.40279897719942e-06, + "loss": 0.5655, + "step": 1456 + }, + { + "epoch": 0.18, + "learning_rate": 9.40183641256348e-06, + "loss": 0.6253, + "step": 1457 + }, + { + "epoch": 0.18, + "learning_rate": 9.4008731221752e-06, + "loss": 0.5983, + "step": 1458 + }, + { + "epoch": 0.18, + "learning_rate": 9.399909106193404e-06, + "loss": 0.5867, + "step": 1459 + }, + { + "epoch": 0.18, + "learning_rate": 9.398944364777032e-06, + "loss": 0.5721, + "step": 1460 + }, + { + "epoch": 0.18, + "learning_rate": 9.397978898085148e-06, + "loss": 0.5609, + "step": 1461 + }, + { + "epoch": 0.18, + "learning_rate": 9.397012706276932e-06, + "loss": 0.5757, + "step": 1462 + }, + { + "epoch": 0.18, + "learning_rate": 9.396045789511683e-06, + "loss": 0.6366, + "step": 1463 + }, + { + "epoch": 0.18, + "learning_rate": 9.395078147948825e-06, + "loss": 0.5101, + "step": 1464 + }, + { + "epoch": 0.18, + "learning_rate": 9.394109781747895e-06, + "loss": 0.6169, + "step": 1465 + }, + { + "epoch": 0.18, + "learning_rate": 9.393140691068555e-06, + "loss": 0.617, + "step": 1466 + }, + { + "epoch": 0.18, + "learning_rate": 9.39217087607058e-06, + "loss": 0.5926, + "step": 1467 + }, + { + "epoch": 0.18, + "learning_rate": 9.391200336913871e-06, + "loss": 0.5256, + "step": 1468 + }, + { + "epoch": 0.18, + "learning_rate": 9.390229073758443e-06, + "loss": 0.6164, + "step": 1469 + }, + { + "epoch": 0.18, + "learning_rate": 9.389257086764437e-06, + "loss": 0.6342, + "step": 1470 + }, + { + "epoch": 0.18, + "learning_rate": 9.388284376092104e-06, + "loss": 0.5032, + "step": 1471 + }, + { + "epoch": 0.18, + "learning_rate": 9.387310941901826e-06, + "loss": 0.2108, + "step": 1472 + }, + { + "epoch": 0.18, + "learning_rate": 9.386336784354092e-06, + "loss": 0.5472, + "step": 1473 + }, + { + "epoch": 0.18, + "learning_rate": 9.385361903609518e-06, + "loss": 0.6118, + "step": 1474 + }, + { + "epoch": 0.18, + "learning_rate": 9.38438629982884e-06, + "loss": 0.5215, + "step": 1475 + }, + { + "epoch": 0.19, + "learning_rate": 9.383409973172906e-06, + "loss": 0.5886, + "step": 1476 + }, + { + "epoch": 0.19, + "learning_rate": 9.382432923802692e-06, + "loss": 0.5698, + "step": 1477 + }, + { + "epoch": 0.19, + "learning_rate": 9.381455151879286e-06, + "loss": 0.5573, + "step": 1478 + }, + { + "epoch": 0.19, + "learning_rate": 9.380476657563897e-06, + "loss": 0.5356, + "step": 1479 + }, + { + "epoch": 0.19, + "learning_rate": 9.379497441017859e-06, + "loss": 0.5582, + "step": 1480 + }, + { + "epoch": 0.19, + "learning_rate": 9.378517502402617e-06, + "loss": 0.6419, + "step": 1481 + }, + { + "epoch": 0.19, + "learning_rate": 9.377536841879737e-06, + "loss": 0.6165, + "step": 1482 + }, + { + "epoch": 0.19, + "learning_rate": 9.37655545961091e-06, + "loss": 0.5735, + "step": 1483 + }, + { + "epoch": 0.19, + "learning_rate": 9.375573355757936e-06, + "loss": 0.6062, + "step": 1484 + }, + { + "epoch": 0.19, + "learning_rate": 9.374590530482741e-06, + "loss": 0.6555, + "step": 1485 + }, + { + "epoch": 0.19, + "learning_rate": 9.37360698394737e-06, + "loss": 0.57, + "step": 1486 + }, + { + "epoch": 0.19, + "learning_rate": 9.372622716313985e-06, + "loss": 0.5464, + "step": 1487 + }, + { + "epoch": 0.19, + "learning_rate": 9.371637727744865e-06, + "loss": 0.6073, + "step": 1488 + }, + { + "epoch": 0.19, + "learning_rate": 9.370652018402411e-06, + "loss": 0.5208, + "step": 1489 + }, + { + "epoch": 0.19, + "learning_rate": 9.369665588449142e-06, + "loss": 0.594, + "step": 1490 + }, + { + "epoch": 0.19, + "learning_rate": 9.368678438047694e-06, + "loss": 0.5718, + "step": 1491 + }, + { + "epoch": 0.19, + "learning_rate": 9.367690567360826e-06, + "loss": 0.6049, + "step": 1492 + }, + { + "epoch": 0.19, + "learning_rate": 9.366701976551409e-06, + "loss": 0.6048, + "step": 1493 + }, + { + "epoch": 0.19, + "learning_rate": 9.36571266578244e-06, + "loss": 0.621, + "step": 1494 + }, + { + "epoch": 0.19, + "learning_rate": 9.364722635217035e-06, + "loss": 0.552, + "step": 1495 + }, + { + "epoch": 0.19, + "learning_rate": 9.363731885018417e-06, + "loss": 0.5561, + "step": 1496 + }, + { + "epoch": 0.19, + "learning_rate": 9.36274041534994e-06, + "loss": 0.574, + "step": 1497 + }, + { + "epoch": 0.19, + "learning_rate": 9.361748226375073e-06, + "loss": 0.5907, + "step": 1498 + }, + { + "epoch": 0.19, + "learning_rate": 9.360755318257402e-06, + "loss": 0.5371, + "step": 1499 + }, + { + "epoch": 0.19, + "learning_rate": 9.359761691160635e-06, + "loss": 0.5486, + "step": 1500 + }, + { + "epoch": 0.19, + "learning_rate": 9.358767345248592e-06, + "loss": 0.587, + "step": 1501 + }, + { + "epoch": 0.19, + "learning_rate": 9.357772280685218e-06, + "loss": 0.6087, + "step": 1502 + }, + { + "epoch": 0.19, + "learning_rate": 9.356776497634575e-06, + "loss": 0.5514, + "step": 1503 + }, + { + "epoch": 0.19, + "learning_rate": 9.355779996260842e-06, + "loss": 0.616, + "step": 1504 + }, + { + "epoch": 0.19, + "learning_rate": 9.354782776728316e-06, + "loss": 0.5573, + "step": 1505 + }, + { + "epoch": 0.19, + "learning_rate": 9.353784839201416e-06, + "loss": 0.5796, + "step": 1506 + }, + { + "epoch": 0.19, + "learning_rate": 9.352786183844674e-06, + "loss": 0.5341, + "step": 1507 + }, + { + "epoch": 0.19, + "learning_rate": 9.351786810822742e-06, + "loss": 0.5749, + "step": 1508 + }, + { + "epoch": 0.19, + "learning_rate": 9.350786720300395e-06, + "loss": 0.5429, + "step": 1509 + }, + { + "epoch": 0.19, + "learning_rate": 9.349785912442524e-06, + "loss": 0.6109, + "step": 1510 + }, + { + "epoch": 0.19, + "learning_rate": 9.348784387414133e-06, + "loss": 0.5525, + "step": 1511 + }, + { + "epoch": 0.19, + "learning_rate": 9.34778214538035e-06, + "loss": 0.4908, + "step": 1512 + }, + { + "epoch": 0.19, + "learning_rate": 9.34677918650642e-06, + "loss": 0.5624, + "step": 1513 + }, + { + "epoch": 0.19, + "learning_rate": 9.345775510957705e-06, + "loss": 0.5776, + "step": 1514 + }, + { + "epoch": 0.19, + "learning_rate": 9.344771118899687e-06, + "loss": 0.6355, + "step": 1515 + }, + { + "epoch": 0.19, + "learning_rate": 9.343766010497963e-06, + "loss": 0.6017, + "step": 1516 + }, + { + "epoch": 0.19, + "learning_rate": 9.342760185918252e-06, + "loss": 0.6072, + "step": 1517 + }, + { + "epoch": 0.19, + "learning_rate": 9.34175364532639e-06, + "loss": 0.5269, + "step": 1518 + }, + { + "epoch": 0.19, + "learning_rate": 9.340746388888328e-06, + "loss": 0.6304, + "step": 1519 + }, + { + "epoch": 0.19, + "learning_rate": 9.339738416770138e-06, + "loss": 0.5749, + "step": 1520 + }, + { + "epoch": 0.19, + "learning_rate": 9.33872972913801e-06, + "loss": 0.5686, + "step": 1521 + }, + { + "epoch": 0.19, + "learning_rate": 9.337720326158251e-06, + "loss": 0.5576, + "step": 1522 + }, + { + "epoch": 0.19, + "learning_rate": 9.336710207997284e-06, + "loss": 0.6062, + "step": 1523 + }, + { + "epoch": 0.19, + "learning_rate": 9.335699374821657e-06, + "loss": 0.6242, + "step": 1524 + }, + { + "epoch": 0.19, + "learning_rate": 9.334687826798028e-06, + "loss": 0.603, + "step": 1525 + }, + { + "epoch": 0.19, + "learning_rate": 9.333675564093174e-06, + "loss": 0.6288, + "step": 1526 + }, + { + "epoch": 0.19, + "learning_rate": 9.332662586873995e-06, + "loss": 0.605, + "step": 1527 + }, + { + "epoch": 0.19, + "learning_rate": 9.331648895307505e-06, + "loss": 0.6032, + "step": 1528 + }, + { + "epoch": 0.19, + "learning_rate": 9.330634489560835e-06, + "loss": 0.6519, + "step": 1529 + }, + { + "epoch": 0.19, + "learning_rate": 9.329619369801235e-06, + "loss": 0.6088, + "step": 1530 + }, + { + "epoch": 0.19, + "learning_rate": 9.328603536196075e-06, + "loss": 0.5851, + "step": 1531 + }, + { + "epoch": 0.19, + "learning_rate": 9.327586988912838e-06, + "loss": 0.5503, + "step": 1532 + }, + { + "epoch": 0.19, + "learning_rate": 9.326569728119128e-06, + "loss": 0.6194, + "step": 1533 + }, + { + "epoch": 0.19, + "learning_rate": 9.325551753982667e-06, + "loss": 0.6128, + "step": 1534 + }, + { + "epoch": 0.19, + "learning_rate": 9.32453306667129e-06, + "loss": 0.6162, + "step": 1535 + }, + { + "epoch": 0.19, + "learning_rate": 9.323513666352956e-06, + "loss": 0.5298, + "step": 1536 + }, + { + "epoch": 0.19, + "learning_rate": 9.322493553195739e-06, + "loss": 0.6335, + "step": 1537 + }, + { + "epoch": 0.19, + "learning_rate": 9.321472727367828e-06, + "loss": 0.6265, + "step": 1538 + }, + { + "epoch": 0.19, + "learning_rate": 9.320451189037531e-06, + "loss": 0.5752, + "step": 1539 + }, + { + "epoch": 0.19, + "learning_rate": 9.319428938373278e-06, + "loss": 0.5265, + "step": 1540 + }, + { + "epoch": 0.19, + "learning_rate": 9.318405975543608e-06, + "loss": 0.5456, + "step": 1541 + }, + { + "epoch": 0.19, + "learning_rate": 9.317382300717183e-06, + "loss": 0.547, + "step": 1542 + }, + { + "epoch": 0.19, + "learning_rate": 9.316357914062783e-06, + "loss": 0.5671, + "step": 1543 + }, + { + "epoch": 0.19, + "learning_rate": 9.315332815749303e-06, + "loss": 0.561, + "step": 1544 + }, + { + "epoch": 0.19, + "learning_rate": 9.314307005945755e-06, + "loss": 0.5443, + "step": 1545 + }, + { + "epoch": 0.19, + "learning_rate": 9.31328048482127e-06, + "loss": 0.5721, + "step": 1546 + }, + { + "epoch": 0.19, + "learning_rate": 9.312253252545096e-06, + "loss": 0.6018, + "step": 1547 + }, + { + "epoch": 0.19, + "learning_rate": 9.311225309286596e-06, + "loss": 0.5928, + "step": 1548 + }, + { + "epoch": 0.19, + "learning_rate": 9.310196655215255e-06, + "loss": 0.5995, + "step": 1549 + }, + { + "epoch": 0.19, + "learning_rate": 9.30916729050067e-06, + "loss": 0.61, + "step": 1550 + }, + { + "epoch": 0.19, + "learning_rate": 9.308137215312558e-06, + "loss": 0.4869, + "step": 1551 + }, + { + "epoch": 0.19, + "learning_rate": 9.307106429820751e-06, + "loss": 0.5582, + "step": 1552 + }, + { + "epoch": 0.19, + "learning_rate": 9.306074934195203e-06, + "loss": 0.6005, + "step": 1553 + }, + { + "epoch": 0.19, + "learning_rate": 9.305042728605979e-06, + "loss": 0.5582, + "step": 1554 + }, + { + "epoch": 0.19, + "learning_rate": 9.304009813223265e-06, + "loss": 0.6156, + "step": 1555 + }, + { + "epoch": 0.2, + "learning_rate": 9.302976188217363e-06, + "loss": 0.5333, + "step": 1556 + }, + { + "epoch": 0.2, + "learning_rate": 9.30194185375869e-06, + "loss": 0.5503, + "step": 1557 + }, + { + "epoch": 0.2, + "learning_rate": 9.300906810017784e-06, + "loss": 0.6402, + "step": 1558 + }, + { + "epoch": 0.2, + "learning_rate": 9.299871057165298e-06, + "loss": 0.5831, + "step": 1559 + }, + { + "epoch": 0.2, + "learning_rate": 9.298834595371999e-06, + "loss": 0.5942, + "step": 1560 + }, + { + "epoch": 0.2, + "learning_rate": 9.297797424808776e-06, + "loss": 0.5632, + "step": 1561 + }, + { + "epoch": 0.2, + "learning_rate": 9.296759545646632e-06, + "loss": 0.5775, + "step": 1562 + }, + { + "epoch": 0.2, + "learning_rate": 9.295720958056684e-06, + "loss": 0.5573, + "step": 1563 + }, + { + "epoch": 0.2, + "learning_rate": 9.294681662210174e-06, + "loss": 0.5549, + "step": 1564 + }, + { + "epoch": 0.2, + "learning_rate": 9.293641658278452e-06, + "loss": 0.5849, + "step": 1565 + }, + { + "epoch": 0.2, + "learning_rate": 9.292600946432992e-06, + "loss": 0.52, + "step": 1566 + }, + { + "epoch": 0.2, + "learning_rate": 9.291559526845378e-06, + "loss": 0.6385, + "step": 1567 + }, + { + "epoch": 0.2, + "learning_rate": 9.290517399687316e-06, + "loss": 0.6339, + "step": 1568 + }, + { + "epoch": 0.2, + "learning_rate": 9.289474565130625e-06, + "loss": 0.6174, + "step": 1569 + }, + { + "epoch": 0.2, + "learning_rate": 9.288431023347244e-06, + "loss": 0.6132, + "step": 1570 + }, + { + "epoch": 0.2, + "learning_rate": 9.287386774509224e-06, + "loss": 0.5757, + "step": 1571 + }, + { + "epoch": 0.2, + "learning_rate": 9.28634181878874e-06, + "loss": 0.5487, + "step": 1572 + }, + { + "epoch": 0.2, + "learning_rate": 9.285296156358074e-06, + "loss": 0.591, + "step": 1573 + }, + { + "epoch": 0.2, + "learning_rate": 9.284249787389635e-06, + "loss": 0.6019, + "step": 1574 + }, + { + "epoch": 0.2, + "learning_rate": 9.283202712055936e-06, + "loss": 0.5842, + "step": 1575 + }, + { + "epoch": 0.2, + "learning_rate": 9.282154930529618e-06, + "loss": 0.5768, + "step": 1576 + }, + { + "epoch": 0.2, + "learning_rate": 9.281106442983437e-06, + "loss": 0.58, + "step": 1577 + }, + { + "epoch": 0.2, + "learning_rate": 9.280057249590254e-06, + "loss": 0.6219, + "step": 1578 + }, + { + "epoch": 0.2, + "learning_rate": 9.279007350523062e-06, + "loss": 0.5418, + "step": 1579 + }, + { + "epoch": 0.2, + "learning_rate": 9.277956745954957e-06, + "loss": 0.5855, + "step": 1580 + }, + { + "epoch": 0.2, + "learning_rate": 9.276905436059163e-06, + "loss": 0.6002, + "step": 1581 + }, + { + "epoch": 0.2, + "learning_rate": 9.27585342100901e-06, + "loss": 0.6618, + "step": 1582 + }, + { + "epoch": 0.2, + "learning_rate": 9.274800700977953e-06, + "loss": 0.5921, + "step": 1583 + }, + { + "epoch": 0.2, + "learning_rate": 9.273747276139555e-06, + "loss": 0.6409, + "step": 1584 + }, + { + "epoch": 0.2, + "learning_rate": 9.272693146667505e-06, + "loss": 0.5866, + "step": 1585 + }, + { + "epoch": 0.2, + "learning_rate": 9.271638312735594e-06, + "loss": 0.5603, + "step": 1586 + }, + { + "epoch": 0.2, + "learning_rate": 9.270582774517745e-06, + "loss": 0.593, + "step": 1587 + }, + { + "epoch": 0.2, + "learning_rate": 9.269526532187986e-06, + "loss": 0.5796, + "step": 1588 + }, + { + "epoch": 0.2, + "learning_rate": 9.268469585920467e-06, + "loss": 0.58, + "step": 1589 + }, + { + "epoch": 0.2, + "learning_rate": 9.267411935889452e-06, + "loss": 0.6288, + "step": 1590 + }, + { + "epoch": 0.2, + "learning_rate": 9.266353582269318e-06, + "loss": 0.6003, + "step": 1591 + }, + { + "epoch": 0.2, + "learning_rate": 9.265294525234564e-06, + "loss": 0.5833, + "step": 1592 + }, + { + "epoch": 0.2, + "learning_rate": 9.264234764959801e-06, + "loss": 0.6133, + "step": 1593 + }, + { + "epoch": 0.2, + "learning_rate": 9.263174301619756e-06, + "loss": 0.6544, + "step": 1594 + }, + { + "epoch": 0.2, + "learning_rate": 9.262113135389273e-06, + "loss": 0.5575, + "step": 1595 + }, + { + "epoch": 0.2, + "learning_rate": 9.261051266443314e-06, + "loss": 0.5479, + "step": 1596 + }, + { + "epoch": 0.2, + "learning_rate": 9.25998869495695e-06, + "loss": 0.5918, + "step": 1597 + }, + { + "epoch": 0.2, + "learning_rate": 9.258925421105376e-06, + "loss": 0.6773, + "step": 1598 + }, + { + "epoch": 0.2, + "learning_rate": 9.257861445063897e-06, + "loss": 0.5707, + "step": 1599 + }, + { + "epoch": 0.2, + "learning_rate": 9.256796767007939e-06, + "loss": 0.5703, + "step": 1600 + }, + { + "epoch": 0.2, + "learning_rate": 9.255731387113039e-06, + "loss": 0.6296, + "step": 1601 + }, + { + "epoch": 0.2, + "learning_rate": 9.254665305554851e-06, + "loss": 0.5492, + "step": 1602 + }, + { + "epoch": 0.2, + "learning_rate": 9.253598522509143e-06, + "loss": 0.6287, + "step": 1603 + }, + { + "epoch": 0.2, + "learning_rate": 9.252531038151807e-06, + "loss": 0.5206, + "step": 1604 + }, + { + "epoch": 0.2, + "learning_rate": 9.251462852658837e-06, + "loss": 0.2182, + "step": 1605 + }, + { + "epoch": 0.2, + "learning_rate": 9.250393966206354e-06, + "loss": 0.5739, + "step": 1606 + }, + { + "epoch": 0.2, + "learning_rate": 9.249324378970593e-06, + "loss": 0.5259, + "step": 1607 + }, + { + "epoch": 0.2, + "learning_rate": 9.248254091127896e-06, + "loss": 0.5877, + "step": 1608 + }, + { + "epoch": 0.2, + "learning_rate": 9.24718310285473e-06, + "loss": 0.6302, + "step": 1609 + }, + { + "epoch": 0.2, + "learning_rate": 9.246111414327673e-06, + "loss": 0.6058, + "step": 1610 + }, + { + "epoch": 0.2, + "learning_rate": 9.245039025723422e-06, + "loss": 0.609, + "step": 1611 + }, + { + "epoch": 0.2, + "learning_rate": 9.243965937218786e-06, + "loss": 0.5497, + "step": 1612 + }, + { + "epoch": 0.2, + "learning_rate": 9.242892148990687e-06, + "loss": 0.6344, + "step": 1613 + }, + { + "epoch": 0.2, + "learning_rate": 9.24181766121617e-06, + "loss": 0.5803, + "step": 1614 + }, + { + "epoch": 0.2, + "learning_rate": 9.240742474072388e-06, + "loss": 0.5426, + "step": 1615 + }, + { + "epoch": 0.2, + "learning_rate": 9.239666587736615e-06, + "loss": 0.5906, + "step": 1616 + }, + { + "epoch": 0.2, + "learning_rate": 9.238590002386237e-06, + "loss": 0.602, + "step": 1617 + }, + { + "epoch": 0.2, + "learning_rate": 9.237512718198754e-06, + "loss": 0.2315, + "step": 1618 + }, + { + "epoch": 0.2, + "learning_rate": 9.236434735351785e-06, + "loss": 0.5651, + "step": 1619 + }, + { + "epoch": 0.2, + "learning_rate": 9.235356054023061e-06, + "loss": 0.5524, + "step": 1620 + }, + { + "epoch": 0.2, + "learning_rate": 9.234276674390431e-06, + "loss": 0.5944, + "step": 1621 + }, + { + "epoch": 0.2, + "learning_rate": 9.233196596631857e-06, + "loss": 0.6032, + "step": 1622 + }, + { + "epoch": 0.2, + "learning_rate": 9.232115820925415e-06, + "loss": 0.6158, + "step": 1623 + }, + { + "epoch": 0.2, + "learning_rate": 9.231034347449301e-06, + "loss": 0.5302, + "step": 1624 + }, + { + "epoch": 0.2, + "learning_rate": 9.229952176381821e-06, + "loss": 0.5643, + "step": 1625 + }, + { + "epoch": 0.2, + "learning_rate": 9.228869307901398e-06, + "loss": 0.5917, + "step": 1626 + }, + { + "epoch": 0.2, + "learning_rate": 9.227785742186571e-06, + "loss": 0.2412, + "step": 1627 + }, + { + "epoch": 0.2, + "learning_rate": 9.22670147941599e-06, + "loss": 0.5934, + "step": 1628 + }, + { + "epoch": 0.2, + "learning_rate": 9.225616519768423e-06, + "loss": 0.5463, + "step": 1629 + }, + { + "epoch": 0.2, + "learning_rate": 9.224530863422755e-06, + "loss": 0.242, + "step": 1630 + }, + { + "epoch": 0.2, + "learning_rate": 9.223444510557983e-06, + "loss": 0.5898, + "step": 1631 + }, + { + "epoch": 0.2, + "learning_rate": 9.22235746135322e-06, + "loss": 0.5826, + "step": 1632 + }, + { + "epoch": 0.2, + "learning_rate": 9.22126971598769e-06, + "loss": 0.6661, + "step": 1633 + }, + { + "epoch": 0.2, + "learning_rate": 9.220181274640738e-06, + "loss": 0.6251, + "step": 1634 + }, + { + "epoch": 0.2, + "learning_rate": 9.219092137491819e-06, + "loss": 0.5469, + "step": 1635 + }, + { + "epoch": 0.21, + "learning_rate": 9.218002304720504e-06, + "loss": 0.5909, + "step": 1636 + }, + { + "epoch": 0.21, + "learning_rate": 9.21691177650648e-06, + "loss": 0.59, + "step": 1637 + }, + { + "epoch": 0.21, + "learning_rate": 9.21582055302955e-06, + "loss": 0.2334, + "step": 1638 + }, + { + "epoch": 0.21, + "learning_rate": 9.214728634469626e-06, + "loss": 0.6086, + "step": 1639 + }, + { + "epoch": 0.21, + "learning_rate": 9.213636021006738e-06, + "loss": 0.539, + "step": 1640 + }, + { + "epoch": 0.21, + "learning_rate": 9.212542712821031e-06, + "loss": 0.5913, + "step": 1641 + }, + { + "epoch": 0.21, + "learning_rate": 9.211448710092766e-06, + "loss": 0.6282, + "step": 1642 + }, + { + "epoch": 0.21, + "learning_rate": 9.210354013002313e-06, + "loss": 0.605, + "step": 1643 + }, + { + "epoch": 0.21, + "learning_rate": 9.209258621730164e-06, + "loss": 0.6433, + "step": 1644 + }, + { + "epoch": 0.21, + "learning_rate": 9.20816253645692e-06, + "loss": 0.1994, + "step": 1645 + }, + { + "epoch": 0.21, + "learning_rate": 9.207065757363298e-06, + "loss": 0.5662, + "step": 1646 + }, + { + "epoch": 0.21, + "learning_rate": 9.205968284630128e-06, + "loss": 0.6322, + "step": 1647 + }, + { + "epoch": 0.21, + "learning_rate": 9.204870118438357e-06, + "loss": 0.2121, + "step": 1648 + }, + { + "epoch": 0.21, + "learning_rate": 9.203771258969045e-06, + "loss": 0.5462, + "step": 1649 + }, + { + "epoch": 0.21, + "learning_rate": 9.202671706403365e-06, + "loss": 0.4997, + "step": 1650 + }, + { + "epoch": 0.21, + "learning_rate": 9.20157146092261e-06, + "loss": 0.6035, + "step": 1651 + }, + { + "epoch": 0.21, + "learning_rate": 9.200470522708176e-06, + "loss": 0.5536, + "step": 1652 + }, + { + "epoch": 0.21, + "learning_rate": 9.199368891941585e-06, + "loss": 0.5736, + "step": 1653 + }, + { + "epoch": 0.21, + "learning_rate": 9.198266568804467e-06, + "loss": 0.5686, + "step": 1654 + }, + { + "epoch": 0.21, + "learning_rate": 9.19716355347857e-06, + "loss": 0.6593, + "step": 1655 + }, + { + "epoch": 0.21, + "learning_rate": 9.196059846145747e-06, + "loss": 0.5974, + "step": 1656 + }, + { + "epoch": 0.21, + "learning_rate": 9.194955446987976e-06, + "loss": 0.5282, + "step": 1657 + }, + { + "epoch": 0.21, + "learning_rate": 9.193850356187346e-06, + "loss": 0.5341, + "step": 1658 + }, + { + "epoch": 0.21, + "learning_rate": 9.192744573926058e-06, + "loss": 0.5638, + "step": 1659 + }, + { + "epoch": 0.21, + "learning_rate": 9.191638100386424e-06, + "loss": 0.5772, + "step": 1660 + }, + { + "epoch": 0.21, + "learning_rate": 9.190530935750878e-06, + "loss": 0.6697, + "step": 1661 + }, + { + "epoch": 0.21, + "learning_rate": 9.189423080201962e-06, + "loss": 0.5316, + "step": 1662 + }, + { + "epoch": 0.21, + "learning_rate": 9.188314533922334e-06, + "loss": 0.6103, + "step": 1663 + }, + { + "epoch": 0.21, + "learning_rate": 9.187205297094763e-06, + "loss": 0.5499, + "step": 1664 + }, + { + "epoch": 0.21, + "learning_rate": 9.186095369902139e-06, + "loss": 0.6035, + "step": 1665 + }, + { + "epoch": 0.21, + "learning_rate": 9.184984752527457e-06, + "loss": 0.5561, + "step": 1666 + }, + { + "epoch": 0.21, + "learning_rate": 9.183873445153833e-06, + "loss": 0.6065, + "step": 1667 + }, + { + "epoch": 0.21, + "learning_rate": 9.182761447964492e-06, + "loss": 0.6053, + "step": 1668 + }, + { + "epoch": 0.21, + "learning_rate": 9.181648761142775e-06, + "loss": 0.6177, + "step": 1669 + }, + { + "epoch": 0.21, + "learning_rate": 9.180535384872134e-06, + "loss": 0.5602, + "step": 1670 + }, + { + "epoch": 0.21, + "learning_rate": 9.179421319336139e-06, + "loss": 0.6133, + "step": 1671 + }, + { + "epoch": 0.21, + "learning_rate": 9.178306564718472e-06, + "loss": 0.6344, + "step": 1672 + }, + { + "epoch": 0.21, + "learning_rate": 9.177191121202927e-06, + "loss": 0.5986, + "step": 1673 + }, + { + "epoch": 0.21, + "learning_rate": 9.176074988973413e-06, + "loss": 0.5564, + "step": 1674 + }, + { + "epoch": 0.21, + "learning_rate": 9.17495816821395e-06, + "loss": 0.6005, + "step": 1675 + }, + { + "epoch": 0.21, + "learning_rate": 9.173840659108677e-06, + "loss": 0.6133, + "step": 1676 + }, + { + "epoch": 0.21, + "learning_rate": 9.172722461841843e-06, + "loss": 0.5739, + "step": 1677 + }, + { + "epoch": 0.21, + "learning_rate": 9.171603576597808e-06, + "loss": 0.6303, + "step": 1678 + }, + { + "epoch": 0.21, + "learning_rate": 9.17048400356105e-06, + "loss": 0.5098, + "step": 1679 + }, + { + "epoch": 0.21, + "learning_rate": 9.169363742916158e-06, + "loss": 0.518, + "step": 1680 + }, + { + "epoch": 0.21, + "learning_rate": 9.168242794847835e-06, + "loss": 0.6001, + "step": 1681 + }, + { + "epoch": 0.21, + "learning_rate": 9.167121159540897e-06, + "loss": 0.6105, + "step": 1682 + }, + { + "epoch": 0.21, + "learning_rate": 9.165998837180271e-06, + "loss": 0.5736, + "step": 1683 + }, + { + "epoch": 0.21, + "learning_rate": 9.164875827951007e-06, + "loss": 0.5584, + "step": 1684 + }, + { + "epoch": 0.21, + "learning_rate": 9.163752132038254e-06, + "loss": 0.5273, + "step": 1685 + }, + { + "epoch": 0.21, + "learning_rate": 9.162627749627284e-06, + "loss": 0.5933, + "step": 1686 + }, + { + "epoch": 0.21, + "learning_rate": 9.16150268090348e-06, + "loss": 0.2295, + "step": 1687 + }, + { + "epoch": 0.21, + "learning_rate": 9.160376926052336e-06, + "loss": 0.6375, + "step": 1688 + }, + { + "epoch": 0.21, + "learning_rate": 9.159250485259462e-06, + "loss": 0.5024, + "step": 1689 + }, + { + "epoch": 0.21, + "learning_rate": 9.158123358710578e-06, + "loss": 0.6257, + "step": 1690 + }, + { + "epoch": 0.21, + "learning_rate": 9.156995546591522e-06, + "loss": 0.6587, + "step": 1691 + }, + { + "epoch": 0.21, + "learning_rate": 9.155867049088239e-06, + "loss": 0.5764, + "step": 1692 + }, + { + "epoch": 0.21, + "learning_rate": 9.154737866386791e-06, + "loss": 0.5657, + "step": 1693 + }, + { + "epoch": 0.21, + "learning_rate": 9.153607998673352e-06, + "loss": 0.6019, + "step": 1694 + }, + { + "epoch": 0.21, + "learning_rate": 9.15247744613421e-06, + "loss": 0.5697, + "step": 1695 + }, + { + "epoch": 0.21, + "learning_rate": 9.15134620895576e-06, + "loss": 0.6481, + "step": 1696 + }, + { + "epoch": 0.21, + "learning_rate": 9.150214287324522e-06, + "loss": 0.6578, + "step": 1697 + }, + { + "epoch": 0.21, + "learning_rate": 9.149081681427115e-06, + "loss": 0.5782, + "step": 1698 + }, + { + "epoch": 0.21, + "learning_rate": 9.14794839145028e-06, + "loss": 0.5745, + "step": 1699 + }, + { + "epoch": 0.21, + "learning_rate": 9.14681441758087e-06, + "loss": 0.5572, + "step": 1700 + }, + { + "epoch": 0.21, + "learning_rate": 9.145679760005843e-06, + "loss": 0.5921, + "step": 1701 + }, + { + "epoch": 0.21, + "learning_rate": 9.144544418912283e-06, + "loss": 0.543, + "step": 1702 + }, + { + "epoch": 0.21, + "learning_rate": 9.143408394487374e-06, + "loss": 0.552, + "step": 1703 + }, + { + "epoch": 0.21, + "learning_rate": 9.142271686918419e-06, + "loss": 0.4738, + "step": 1704 + }, + { + "epoch": 0.21, + "learning_rate": 9.141134296392833e-06, + "loss": 0.5758, + "step": 1705 + }, + { + "epoch": 0.21, + "learning_rate": 9.139996223098142e-06, + "loss": 0.5714, + "step": 1706 + }, + { + "epoch": 0.21, + "learning_rate": 9.138857467221989e-06, + "loss": 0.5741, + "step": 1707 + }, + { + "epoch": 0.21, + "learning_rate": 9.137718028952123e-06, + "loss": 0.5527, + "step": 1708 + }, + { + "epoch": 0.21, + "learning_rate": 9.136577908476411e-06, + "loss": 0.5733, + "step": 1709 + }, + { + "epoch": 0.21, + "learning_rate": 9.135437105982828e-06, + "loss": 0.6045, + "step": 1710 + }, + { + "epoch": 0.21, + "learning_rate": 9.134295621659465e-06, + "loss": 0.5855, + "step": 1711 + }, + { + "epoch": 0.21, + "learning_rate": 9.133153455694524e-06, + "loss": 0.5603, + "step": 1712 + }, + { + "epoch": 0.21, + "learning_rate": 9.132010608276319e-06, + "loss": 0.5987, + "step": 1713 + }, + { + "epoch": 0.21, + "learning_rate": 9.130867079593278e-06, + "loss": 0.5513, + "step": 1714 + }, + { + "epoch": 0.21, + "learning_rate": 9.12972286983394e-06, + "loss": 0.5806, + "step": 1715 + }, + { + "epoch": 0.22, + "learning_rate": 9.128577979186955e-06, + "loss": 0.623, + "step": 1716 + }, + { + "epoch": 0.22, + "learning_rate": 9.127432407841086e-06, + "loss": 0.6273, + "step": 1717 + }, + { + "epoch": 0.22, + "learning_rate": 9.126286155985213e-06, + "loss": 0.6008, + "step": 1718 + }, + { + "epoch": 0.22, + "learning_rate": 9.12513922380832e-06, + "loss": 0.5647, + "step": 1719 + }, + { + "epoch": 0.22, + "learning_rate": 9.123991611499512e-06, + "loss": 0.5617, + "step": 1720 + }, + { + "epoch": 0.22, + "learning_rate": 9.122843319247996e-06, + "loss": 0.6116, + "step": 1721 + }, + { + "epoch": 0.22, + "learning_rate": 9.1216943472431e-06, + "loss": 0.4657, + "step": 1722 + }, + { + "epoch": 0.22, + "learning_rate": 9.12054469567426e-06, + "loss": 0.5849, + "step": 1723 + }, + { + "epoch": 0.22, + "learning_rate": 9.119394364731025e-06, + "loss": 0.5773, + "step": 1724 + }, + { + "epoch": 0.22, + "learning_rate": 9.118243354603053e-06, + "loss": 0.5728, + "step": 1725 + }, + { + "epoch": 0.22, + "learning_rate": 9.11709166548012e-06, + "loss": 0.5242, + "step": 1726 + }, + { + "epoch": 0.22, + "learning_rate": 9.11593929755211e-06, + "loss": 0.5956, + "step": 1727 + }, + { + "epoch": 0.22, + "learning_rate": 9.11478625100902e-06, + "loss": 0.5189, + "step": 1728 + }, + { + "epoch": 0.22, + "learning_rate": 9.113632526040956e-06, + "loss": 0.5246, + "step": 1729 + }, + { + "epoch": 0.22, + "learning_rate": 9.11247812283814e-06, + "loss": 0.5853, + "step": 1730 + }, + { + "epoch": 0.22, + "learning_rate": 9.111323041590903e-06, + "loss": 0.5901, + "step": 1731 + }, + { + "epoch": 0.22, + "learning_rate": 9.110167282489692e-06, + "loss": 0.5483, + "step": 1732 + }, + { + "epoch": 0.22, + "learning_rate": 9.10901084572506e-06, + "loss": 0.6044, + "step": 1733 + }, + { + "epoch": 0.22, + "learning_rate": 9.107853731487673e-06, + "loss": 0.6061, + "step": 1734 + }, + { + "epoch": 0.22, + "learning_rate": 9.106695939968313e-06, + "loss": 0.5588, + "step": 1735 + }, + { + "epoch": 0.22, + "learning_rate": 9.10553747135787e-06, + "loss": 0.5657, + "step": 1736 + }, + { + "epoch": 0.22, + "learning_rate": 9.104378325847344e-06, + "loss": 0.5875, + "step": 1737 + }, + { + "epoch": 0.22, + "learning_rate": 9.103218503627852e-06, + "loss": 0.541, + "step": 1738 + }, + { + "epoch": 0.22, + "learning_rate": 9.102058004890621e-06, + "loss": 0.5607, + "step": 1739 + }, + { + "epoch": 0.22, + "learning_rate": 9.100896829826984e-06, + "loss": 0.6108, + "step": 1740 + }, + { + "epoch": 0.22, + "learning_rate": 9.099734978628392e-06, + "loss": 0.573, + "step": 1741 + }, + { + "epoch": 0.22, + "learning_rate": 9.098572451486404e-06, + "loss": 0.5305, + "step": 1742 + }, + { + "epoch": 0.22, + "learning_rate": 9.097409248592693e-06, + "loss": 0.6395, + "step": 1743 + }, + { + "epoch": 0.22, + "learning_rate": 9.09624537013904e-06, + "loss": 0.6332, + "step": 1744 + }, + { + "epoch": 0.22, + "learning_rate": 9.095080816317341e-06, + "loss": 0.5745, + "step": 1745 + }, + { + "epoch": 0.22, + "learning_rate": 9.0939155873196e-06, + "loss": 0.5369, + "step": 1746 + }, + { + "epoch": 0.22, + "learning_rate": 9.092749683337936e-06, + "loss": 0.5677, + "step": 1747 + }, + { + "epoch": 0.22, + "learning_rate": 9.091583104564575e-06, + "loss": 0.6506, + "step": 1748 + }, + { + "epoch": 0.22, + "learning_rate": 9.09041585119186e-06, + "loss": 0.5728, + "step": 1749 + }, + { + "epoch": 0.22, + "learning_rate": 9.089247923412237e-06, + "loss": 0.5454, + "step": 1750 + }, + { + "epoch": 0.22, + "learning_rate": 9.088079321418273e-06, + "loss": 0.5808, + "step": 1751 + }, + { + "epoch": 0.22, + "learning_rate": 9.08691004540264e-06, + "loss": 0.2396, + "step": 1752 + }, + { + "epoch": 0.22, + "learning_rate": 9.085740095558118e-06, + "loss": 0.5207, + "step": 1753 + }, + { + "epoch": 0.22, + "learning_rate": 9.084569472077607e-06, + "loss": 0.6213, + "step": 1754 + }, + { + "epoch": 0.22, + "learning_rate": 9.083398175154111e-06, + "loss": 0.5762, + "step": 1755 + }, + { + "epoch": 0.22, + "learning_rate": 9.082226204980749e-06, + "loss": 0.565, + "step": 1756 + }, + { + "epoch": 0.22, + "learning_rate": 9.08105356175075e-06, + "loss": 0.5213, + "step": 1757 + }, + { + "epoch": 0.22, + "learning_rate": 9.07988024565745e-06, + "loss": 0.5829, + "step": 1758 + }, + { + "epoch": 0.22, + "learning_rate": 9.078706256894303e-06, + "loss": 0.6246, + "step": 1759 + }, + { + "epoch": 0.22, + "learning_rate": 9.077531595654867e-06, + "loss": 0.6057, + "step": 1760 + }, + { + "epoch": 0.22, + "learning_rate": 9.076356262132819e-06, + "loss": 0.5661, + "step": 1761 + }, + { + "epoch": 0.22, + "learning_rate": 9.075180256521935e-06, + "loss": 0.6148, + "step": 1762 + }, + { + "epoch": 0.22, + "learning_rate": 9.074003579016117e-06, + "loss": 0.5505, + "step": 1763 + }, + { + "epoch": 0.22, + "learning_rate": 9.072826229809364e-06, + "loss": 0.5706, + "step": 1764 + }, + { + "epoch": 0.22, + "learning_rate": 9.071648209095793e-06, + "loss": 0.5718, + "step": 1765 + }, + { + "epoch": 0.22, + "learning_rate": 9.070469517069629e-06, + "loss": 0.6019, + "step": 1766 + }, + { + "epoch": 0.22, + "learning_rate": 9.06929015392521e-06, + "loss": 0.6173, + "step": 1767 + }, + { + "epoch": 0.22, + "learning_rate": 9.068110119856983e-06, + "loss": 0.6595, + "step": 1768 + }, + { + "epoch": 0.22, + "learning_rate": 9.066929415059506e-06, + "loss": 0.5142, + "step": 1769 + }, + { + "epoch": 0.22, + "learning_rate": 9.065748039727446e-06, + "loss": 0.5714, + "step": 1770 + }, + { + "epoch": 0.22, + "learning_rate": 9.064565994055587e-06, + "loss": 0.2139, + "step": 1771 + }, + { + "epoch": 0.22, + "learning_rate": 9.063383278238812e-06, + "loss": 0.5897, + "step": 1772 + }, + { + "epoch": 0.22, + "learning_rate": 9.062199892472126e-06, + "loss": 0.5918, + "step": 1773 + }, + { + "epoch": 0.22, + "learning_rate": 9.061015836950639e-06, + "loss": 0.6192, + "step": 1774 + }, + { + "epoch": 0.22, + "learning_rate": 9.05983111186957e-06, + "loss": 0.6375, + "step": 1775 + }, + { + "epoch": 0.22, + "learning_rate": 9.058645717424251e-06, + "loss": 0.6027, + "step": 1776 + }, + { + "epoch": 0.22, + "learning_rate": 9.057459653810126e-06, + "loss": 0.6574, + "step": 1777 + }, + { + "epoch": 0.22, + "learning_rate": 9.056272921222744e-06, + "loss": 0.6038, + "step": 1778 + }, + { + "epoch": 0.22, + "learning_rate": 9.055085519857769e-06, + "loss": 0.5889, + "step": 1779 + }, + { + "epoch": 0.22, + "learning_rate": 9.053897449910974e-06, + "loss": 0.5784, + "step": 1780 + }, + { + "epoch": 0.22, + "learning_rate": 9.052708711578241e-06, + "loss": 0.2366, + "step": 1781 + }, + { + "epoch": 0.22, + "learning_rate": 9.051519305055565e-06, + "loss": 0.5687, + "step": 1782 + }, + { + "epoch": 0.22, + "learning_rate": 9.050329230539046e-06, + "loss": 0.5886, + "step": 1783 + }, + { + "epoch": 0.22, + "learning_rate": 9.0491384882249e-06, + "loss": 0.2097, + "step": 1784 + }, + { + "epoch": 0.22, + "learning_rate": 9.04794707830945e-06, + "loss": 0.5342, + "step": 1785 + }, + { + "epoch": 0.22, + "learning_rate": 9.04675500098913e-06, + "loss": 0.5785, + "step": 1786 + }, + { + "epoch": 0.22, + "learning_rate": 9.045562256460482e-06, + "loss": 0.6135, + "step": 1787 + }, + { + "epoch": 0.22, + "learning_rate": 9.044368844920161e-06, + "loss": 0.5614, + "step": 1788 + }, + { + "epoch": 0.22, + "learning_rate": 9.04317476656493e-06, + "loss": 0.655, + "step": 1789 + }, + { + "epoch": 0.22, + "learning_rate": 9.041980021591664e-06, + "loss": 0.5607, + "step": 1790 + }, + { + "epoch": 0.22, + "learning_rate": 9.040784610197344e-06, + "loss": 0.2248, + "step": 1791 + }, + { + "epoch": 0.22, + "learning_rate": 9.039588532579065e-06, + "loss": 0.6087, + "step": 1792 + }, + { + "epoch": 0.22, + "learning_rate": 9.03839178893403e-06, + "loss": 0.5383, + "step": 1793 + }, + { + "epoch": 0.22, + "learning_rate": 9.037194379459551e-06, + "loss": 0.5664, + "step": 1794 + }, + { + "epoch": 0.23, + "learning_rate": 9.035996304353051e-06, + "loss": 0.5406, + "step": 1795 + }, + { + "epoch": 0.23, + "learning_rate": 9.034797563812066e-06, + "loss": 0.58, + "step": 1796 + }, + { + "epoch": 0.23, + "learning_rate": 9.033598158034233e-06, + "loss": 0.5493, + "step": 1797 + }, + { + "epoch": 0.23, + "learning_rate": 9.032398087217307e-06, + "loss": 0.6336, + "step": 1798 + }, + { + "epoch": 0.23, + "learning_rate": 9.031197351559147e-06, + "loss": 0.6922, + "step": 1799 + }, + { + "epoch": 0.23, + "learning_rate": 9.029995951257729e-06, + "loss": 0.5375, + "step": 1800 + }, + { + "epoch": 0.23, + "learning_rate": 9.028793886511127e-06, + "loss": 0.57, + "step": 1801 + }, + { + "epoch": 0.23, + "learning_rate": 9.027591157517539e-06, + "loss": 0.5888, + "step": 1802 + }, + { + "epoch": 0.23, + "learning_rate": 9.026387764475258e-06, + "loss": 0.5715, + "step": 1803 + }, + { + "epoch": 0.23, + "learning_rate": 9.025183707582696e-06, + "loss": 0.5908, + "step": 1804 + }, + { + "epoch": 0.23, + "learning_rate": 9.023978987038373e-06, + "loss": 0.5495, + "step": 1805 + }, + { + "epoch": 0.23, + "learning_rate": 9.022773603040915e-06, + "loss": 0.5558, + "step": 1806 + }, + { + "epoch": 0.23, + "learning_rate": 9.021567555789062e-06, + "loss": 0.5967, + "step": 1807 + }, + { + "epoch": 0.23, + "learning_rate": 9.020360845481659e-06, + "loss": 0.6257, + "step": 1808 + }, + { + "epoch": 0.23, + "learning_rate": 9.019153472317663e-06, + "loss": 0.5758, + "step": 1809 + }, + { + "epoch": 0.23, + "learning_rate": 9.017945436496139e-06, + "loss": 0.2102, + "step": 1810 + }, + { + "epoch": 0.23, + "learning_rate": 9.016736738216262e-06, + "loss": 0.5985, + "step": 1811 + }, + { + "epoch": 0.23, + "learning_rate": 9.015527377677318e-06, + "loss": 0.5352, + "step": 1812 + }, + { + "epoch": 0.23, + "learning_rate": 9.014317355078696e-06, + "loss": 0.606, + "step": 1813 + }, + { + "epoch": 0.23, + "learning_rate": 9.0131066706199e-06, + "loss": 0.5272, + "step": 1814 + }, + { + "epoch": 0.23, + "learning_rate": 9.011895324500544e-06, + "loss": 0.5754, + "step": 1815 + }, + { + "epoch": 0.23, + "learning_rate": 9.010683316920347e-06, + "loss": 0.2094, + "step": 1816 + }, + { + "epoch": 0.23, + "learning_rate": 9.009470648079137e-06, + "loss": 0.633, + "step": 1817 + }, + { + "epoch": 0.23, + "learning_rate": 9.008257318176855e-06, + "loss": 0.4994, + "step": 1818 + }, + { + "epoch": 0.23, + "learning_rate": 9.007043327413547e-06, + "loss": 0.225, + "step": 1819 + }, + { + "epoch": 0.23, + "learning_rate": 9.00582867598937e-06, + "loss": 0.5862, + "step": 1820 + }, + { + "epoch": 0.23, + "learning_rate": 9.004613364104589e-06, + "loss": 0.6109, + "step": 1821 + }, + { + "epoch": 0.23, + "learning_rate": 9.003397391959581e-06, + "loss": 0.6131, + "step": 1822 + }, + { + "epoch": 0.23, + "learning_rate": 9.002180759754827e-06, + "loss": 0.598, + "step": 1823 + }, + { + "epoch": 0.23, + "learning_rate": 9.00096346769092e-06, + "loss": 0.5507, + "step": 1824 + }, + { + "epoch": 0.23, + "learning_rate": 8.999745515968559e-06, + "loss": 0.5777, + "step": 1825 + }, + { + "epoch": 0.23, + "learning_rate": 8.998526904788556e-06, + "loss": 0.5528, + "step": 1826 + }, + { + "epoch": 0.23, + "learning_rate": 8.997307634351828e-06, + "loss": 0.5847, + "step": 1827 + }, + { + "epoch": 0.23, + "learning_rate": 8.996087704859403e-06, + "loss": 0.5788, + "step": 1828 + }, + { + "epoch": 0.23, + "learning_rate": 8.994867116512418e-06, + "loss": 0.5678, + "step": 1829 + }, + { + "epoch": 0.23, + "learning_rate": 8.993645869512114e-06, + "loss": 0.5883, + "step": 1830 + }, + { + "epoch": 0.23, + "learning_rate": 8.992423964059846e-06, + "loss": 0.6235, + "step": 1831 + }, + { + "epoch": 0.23, + "learning_rate": 8.991201400357076e-06, + "loss": 0.6287, + "step": 1832 + }, + { + "epoch": 0.23, + "learning_rate": 8.989978178605372e-06, + "loss": 0.5602, + "step": 1833 + }, + { + "epoch": 0.23, + "learning_rate": 8.988754299006417e-06, + "loss": 0.5791, + "step": 1834 + }, + { + "epoch": 0.23, + "learning_rate": 8.987529761761992e-06, + "loss": 0.5493, + "step": 1835 + }, + { + "epoch": 0.23, + "learning_rate": 8.986304567073996e-06, + "loss": 0.5538, + "step": 1836 + }, + { + "epoch": 0.23, + "learning_rate": 8.985078715144433e-06, + "loss": 0.6406, + "step": 1837 + }, + { + "epoch": 0.23, + "learning_rate": 8.983852206175415e-06, + "loss": 0.621, + "step": 1838 + }, + { + "epoch": 0.23, + "learning_rate": 8.982625040369161e-06, + "loss": 0.5609, + "step": 1839 + }, + { + "epoch": 0.23, + "learning_rate": 8.981397217928001e-06, + "loss": 0.5641, + "step": 1840 + }, + { + "epoch": 0.23, + "learning_rate": 8.980168739054371e-06, + "loss": 0.5577, + "step": 1841 + }, + { + "epoch": 0.23, + "learning_rate": 8.978939603950819e-06, + "loss": 0.5319, + "step": 1842 + }, + { + "epoch": 0.23, + "learning_rate": 8.977709812819997e-06, + "loss": 0.6001, + "step": 1843 + }, + { + "epoch": 0.23, + "learning_rate": 8.976479365864666e-06, + "loss": 0.6115, + "step": 1844 + }, + { + "epoch": 0.23, + "learning_rate": 8.975248263287696e-06, + "loss": 0.5866, + "step": 1845 + }, + { + "epoch": 0.23, + "learning_rate": 8.974016505292065e-06, + "loss": 0.5636, + "step": 1846 + }, + { + "epoch": 0.23, + "learning_rate": 8.97278409208086e-06, + "loss": 0.5791, + "step": 1847 + }, + { + "epoch": 0.23, + "learning_rate": 8.971551023857274e-06, + "loss": 0.2211, + "step": 1848 + }, + { + "epoch": 0.23, + "learning_rate": 8.97031730082461e-06, + "loss": 0.5794, + "step": 1849 + }, + { + "epoch": 0.23, + "learning_rate": 8.969082923186275e-06, + "loss": 0.5244, + "step": 1850 + }, + { + "epoch": 0.23, + "learning_rate": 8.96784789114579e-06, + "loss": 0.5249, + "step": 1851 + }, + { + "epoch": 0.23, + "learning_rate": 8.966612204906781e-06, + "loss": 0.5452, + "step": 1852 + }, + { + "epoch": 0.23, + "learning_rate": 8.96537586467298e-06, + "loss": 0.5935, + "step": 1853 + }, + { + "epoch": 0.23, + "learning_rate": 8.96413887064823e-06, + "loss": 0.6585, + "step": 1854 + }, + { + "epoch": 0.23, + "learning_rate": 8.962901223036478e-06, + "loss": 0.5589, + "step": 1855 + }, + { + "epoch": 0.23, + "learning_rate": 8.96166292204178e-06, + "loss": 0.6538, + "step": 1856 + }, + { + "epoch": 0.23, + "learning_rate": 8.960423967868307e-06, + "loss": 0.5493, + "step": 1857 + }, + { + "epoch": 0.23, + "learning_rate": 8.959184360720326e-06, + "loss": 0.5648, + "step": 1858 + }, + { + "epoch": 0.23, + "learning_rate": 8.957944100802218e-06, + "loss": 0.522, + "step": 1859 + }, + { + "epoch": 0.23, + "learning_rate": 8.956703188318474e-06, + "loss": 0.5953, + "step": 1860 + }, + { + "epoch": 0.23, + "learning_rate": 8.955461623473684e-06, + "loss": 0.5548, + "step": 1861 + }, + { + "epoch": 0.23, + "learning_rate": 8.954219406472553e-06, + "loss": 0.5824, + "step": 1862 + }, + { + "epoch": 0.23, + "learning_rate": 8.952976537519895e-06, + "loss": 0.5725, + "step": 1863 + }, + { + "epoch": 0.23, + "learning_rate": 8.951733016820622e-06, + "loss": 0.6126, + "step": 1864 + }, + { + "epoch": 0.23, + "learning_rate": 8.950488844579763e-06, + "loss": 0.5508, + "step": 1865 + }, + { + "epoch": 0.23, + "learning_rate": 8.94924402100245e-06, + "loss": 0.5891, + "step": 1866 + }, + { + "epoch": 0.23, + "learning_rate": 8.947998546293924e-06, + "loss": 0.5532, + "step": 1867 + }, + { + "epoch": 0.23, + "learning_rate": 8.94675242065953e-06, + "loss": 0.5518, + "step": 1868 + }, + { + "epoch": 0.23, + "learning_rate": 8.945505644304727e-06, + "loss": 0.5528, + "step": 1869 + }, + { + "epoch": 0.23, + "learning_rate": 8.94425821743507e-06, + "loss": 0.5522, + "step": 1870 + }, + { + "epoch": 0.23, + "learning_rate": 8.943010140256238e-06, + "loss": 0.6041, + "step": 1871 + }, + { + "epoch": 0.23, + "learning_rate": 8.941761412974e-06, + "loss": 0.5817, + "step": 1872 + }, + { + "epoch": 0.23, + "learning_rate": 8.940512035794242e-06, + "loss": 0.5574, + "step": 1873 + }, + { + "epoch": 0.23, + "learning_rate": 8.939262008922957e-06, + "loss": 0.6432, + "step": 1874 + }, + { + "epoch": 0.24, + "learning_rate": 8.938011332566241e-06, + "loss": 0.5765, + "step": 1875 + }, + { + "epoch": 0.24, + "learning_rate": 8.936760006930298e-06, + "loss": 0.5503, + "step": 1876 + }, + { + "epoch": 0.24, + "learning_rate": 8.935508032221443e-06, + "loss": 0.5782, + "step": 1877 + }, + { + "epoch": 0.24, + "learning_rate": 8.934255408646094e-06, + "loss": 0.5739, + "step": 1878 + }, + { + "epoch": 0.24, + "learning_rate": 8.933002136410776e-06, + "loss": 0.5755, + "step": 1879 + }, + { + "epoch": 0.24, + "learning_rate": 8.931748215722125e-06, + "loss": 0.5538, + "step": 1880 + }, + { + "epoch": 0.24, + "learning_rate": 8.930493646786877e-06, + "loss": 0.5889, + "step": 1881 + }, + { + "epoch": 0.24, + "learning_rate": 8.929238429811883e-06, + "loss": 0.5592, + "step": 1882 + }, + { + "epoch": 0.24, + "learning_rate": 8.927982565004094e-06, + "loss": 0.5486, + "step": 1883 + }, + { + "epoch": 0.24, + "learning_rate": 8.926726052570573e-06, + "loss": 0.6439, + "step": 1884 + }, + { + "epoch": 0.24, + "learning_rate": 8.925468892718484e-06, + "loss": 0.569, + "step": 1885 + }, + { + "epoch": 0.24, + "learning_rate": 8.924211085655104e-06, + "loss": 0.5626, + "step": 1886 + }, + { + "epoch": 0.24, + "learning_rate": 8.922952631587813e-06, + "loss": 0.5836, + "step": 1887 + }, + { + "epoch": 0.24, + "learning_rate": 8.921693530724098e-06, + "loss": 0.5837, + "step": 1888 + }, + { + "epoch": 0.24, + "learning_rate": 8.920433783271553e-06, + "loss": 0.5426, + "step": 1889 + }, + { + "epoch": 0.24, + "learning_rate": 8.91917338943788e-06, + "loss": 0.6255, + "step": 1890 + }, + { + "epoch": 0.24, + "learning_rate": 8.917912349430887e-06, + "loss": 0.5613, + "step": 1891 + }, + { + "epoch": 0.24, + "learning_rate": 8.916650663458484e-06, + "loss": 0.5841, + "step": 1892 + }, + { + "epoch": 0.24, + "learning_rate": 8.915388331728693e-06, + "loss": 0.5922, + "step": 1893 + }, + { + "epoch": 0.24, + "learning_rate": 8.914125354449644e-06, + "loss": 0.5948, + "step": 1894 + }, + { + "epoch": 0.24, + "learning_rate": 8.912861731829567e-06, + "loss": 0.5862, + "step": 1895 + }, + { + "epoch": 0.24, + "learning_rate": 8.911597464076802e-06, + "loss": 0.57, + "step": 1896 + }, + { + "epoch": 0.24, + "learning_rate": 8.910332551399795e-06, + "loss": 0.6035, + "step": 1897 + }, + { + "epoch": 0.24, + "learning_rate": 8.9090669940071e-06, + "loss": 0.2529, + "step": 1898 + }, + { + "epoch": 0.24, + "learning_rate": 8.907800792107373e-06, + "loss": 0.5874, + "step": 1899 + }, + { + "epoch": 0.24, + "learning_rate": 8.906533945909382e-06, + "loss": 0.2289, + "step": 1900 + }, + { + "epoch": 0.24, + "learning_rate": 8.905266455621995e-06, + "loss": 0.5953, + "step": 1901 + }, + { + "epoch": 0.24, + "learning_rate": 8.90399832145419e-06, + "loss": 0.5052, + "step": 1902 + }, + { + "epoch": 0.24, + "learning_rate": 8.902729543615054e-06, + "loss": 0.6642, + "step": 1903 + }, + { + "epoch": 0.24, + "learning_rate": 8.901460122313772e-06, + "loss": 0.5622, + "step": 1904 + }, + { + "epoch": 0.24, + "learning_rate": 8.900190057759642e-06, + "loss": 0.5665, + "step": 1905 + }, + { + "epoch": 0.24, + "learning_rate": 8.898919350162066e-06, + "loss": 0.5399, + "step": 1906 + }, + { + "epoch": 0.24, + "learning_rate": 8.89764799973055e-06, + "loss": 0.5951, + "step": 1907 + }, + { + "epoch": 0.24, + "learning_rate": 8.89637600667471e-06, + "loss": 0.5412, + "step": 1908 + }, + { + "epoch": 0.24, + "learning_rate": 8.895103371204265e-06, + "loss": 0.6425, + "step": 1909 + }, + { + "epoch": 0.24, + "learning_rate": 8.89383009352904e-06, + "loss": 0.6327, + "step": 1910 + }, + { + "epoch": 0.24, + "learning_rate": 8.892556173858965e-06, + "loss": 0.6396, + "step": 1911 + }, + { + "epoch": 0.24, + "learning_rate": 8.891281612404083e-06, + "loss": 0.6618, + "step": 1912 + }, + { + "epoch": 0.24, + "learning_rate": 8.890006409374531e-06, + "loss": 0.5333, + "step": 1913 + }, + { + "epoch": 0.24, + "learning_rate": 8.888730564980561e-06, + "loss": 0.5996, + "step": 1914 + }, + { + "epoch": 0.24, + "learning_rate": 8.887454079432527e-06, + "loss": 0.5329, + "step": 1915 + }, + { + "epoch": 0.24, + "learning_rate": 8.886176952940891e-06, + "loss": 0.5605, + "step": 1916 + }, + { + "epoch": 0.24, + "learning_rate": 8.884899185716216e-06, + "loss": 0.6047, + "step": 1917 + }, + { + "epoch": 0.24, + "learning_rate": 8.883620777969175e-06, + "loss": 0.583, + "step": 1918 + }, + { + "epoch": 0.24, + "learning_rate": 8.882341729910546e-06, + "loss": 0.5331, + "step": 1919 + }, + { + "epoch": 0.24, + "learning_rate": 8.881062041751213e-06, + "loss": 0.5798, + "step": 1920 + }, + { + "epoch": 0.24, + "learning_rate": 8.879781713702163e-06, + "loss": 0.6138, + "step": 1921 + }, + { + "epoch": 0.24, + "learning_rate": 8.878500745974488e-06, + "loss": 0.6146, + "step": 1922 + }, + { + "epoch": 0.24, + "learning_rate": 8.877219138779392e-06, + "loss": 0.5453, + "step": 1923 + }, + { + "epoch": 0.24, + "learning_rate": 8.875936892328176e-06, + "loss": 0.5985, + "step": 1924 + }, + { + "epoch": 0.24, + "learning_rate": 8.874654006832252e-06, + "loss": 0.5494, + "step": 1925 + }, + { + "epoch": 0.24, + "learning_rate": 8.873370482503135e-06, + "loss": 0.5942, + "step": 1926 + }, + { + "epoch": 0.24, + "learning_rate": 8.872086319552447e-06, + "loss": 0.5862, + "step": 1927 + }, + { + "epoch": 0.24, + "learning_rate": 8.870801518191912e-06, + "loss": 0.5888, + "step": 1928 + }, + { + "epoch": 0.24, + "learning_rate": 8.869516078633364e-06, + "loss": 0.6501, + "step": 1929 + }, + { + "epoch": 0.24, + "learning_rate": 8.86823000108874e-06, + "loss": 0.5563, + "step": 1930 + }, + { + "epoch": 0.24, + "learning_rate": 8.866943285770077e-06, + "loss": 0.5447, + "step": 1931 + }, + { + "epoch": 0.24, + "learning_rate": 8.865655932889529e-06, + "loss": 0.6215, + "step": 1932 + }, + { + "epoch": 0.24, + "learning_rate": 8.864367942659344e-06, + "loss": 0.5686, + "step": 1933 + }, + { + "epoch": 0.24, + "learning_rate": 8.86307931529188e-06, + "loss": 0.6007, + "step": 1934 + }, + { + "epoch": 0.24, + "learning_rate": 8.861790050999598e-06, + "loss": 0.5881, + "step": 1935 + }, + { + "epoch": 0.24, + "learning_rate": 8.860500149995069e-06, + "loss": 0.6144, + "step": 1936 + }, + { + "epoch": 0.24, + "learning_rate": 8.85920961249096e-06, + "loss": 0.5499, + "step": 1937 + }, + { + "epoch": 0.24, + "learning_rate": 8.857918438700054e-06, + "loss": 0.5768, + "step": 1938 + }, + { + "epoch": 0.24, + "learning_rate": 8.85662662883523e-06, + "loss": 0.5968, + "step": 1939 + }, + { + "epoch": 0.24, + "learning_rate": 8.855334183109475e-06, + "loss": 0.5543, + "step": 1940 + }, + { + "epoch": 0.24, + "learning_rate": 8.85404110173588e-06, + "loss": 0.6787, + "step": 1941 + }, + { + "epoch": 0.24, + "learning_rate": 8.852747384927644e-06, + "loss": 0.5572, + "step": 1942 + }, + { + "epoch": 0.24, + "learning_rate": 8.851453032898068e-06, + "loss": 0.5865, + "step": 1943 + }, + { + "epoch": 0.24, + "learning_rate": 8.850158045860555e-06, + "loss": 0.6097, + "step": 1944 + }, + { + "epoch": 0.24, + "learning_rate": 8.84886242402862e-06, + "loss": 0.6204, + "step": 1945 + }, + { + "epoch": 0.24, + "learning_rate": 8.847566167615879e-06, + "loss": 0.5826, + "step": 1946 + }, + { + "epoch": 0.24, + "learning_rate": 8.846269276836048e-06, + "loss": 0.54, + "step": 1947 + }, + { + "epoch": 0.24, + "learning_rate": 8.844971751902953e-06, + "loss": 0.5666, + "step": 1948 + }, + { + "epoch": 0.24, + "learning_rate": 8.843673593030527e-06, + "loss": 0.6227, + "step": 1949 + }, + { + "epoch": 0.24, + "learning_rate": 8.842374800432799e-06, + "loss": 0.5364, + "step": 1950 + }, + { + "epoch": 0.24, + "learning_rate": 8.841075374323908e-06, + "loss": 0.6221, + "step": 1951 + }, + { + "epoch": 0.24, + "learning_rate": 8.8397753149181e-06, + "loss": 0.597, + "step": 1952 + }, + { + "epoch": 0.24, + "learning_rate": 8.838474622429717e-06, + "loss": 0.5413, + "step": 1953 + }, + { + "epoch": 0.24, + "learning_rate": 8.837173297073217e-06, + "loss": 0.5462, + "step": 1954 + }, + { + "epoch": 0.25, + "learning_rate": 8.83587133906315e-06, + "loss": 0.6016, + "step": 1955 + }, + { + "epoch": 0.25, + "learning_rate": 8.834568748614183e-06, + "loss": 0.5446, + "step": 1956 + }, + { + "epoch": 0.25, + "learning_rate": 8.833265525941073e-06, + "loss": 0.6196, + "step": 1957 + }, + { + "epoch": 0.25, + "learning_rate": 8.831961671258692e-06, + "loss": 0.5634, + "step": 1958 + }, + { + "epoch": 0.25, + "learning_rate": 8.830657184782012e-06, + "loss": 0.6373, + "step": 1959 + }, + { + "epoch": 0.25, + "learning_rate": 8.829352066726114e-06, + "loss": 0.5585, + "step": 1960 + }, + { + "epoch": 0.25, + "learning_rate": 8.828046317306174e-06, + "loss": 0.5914, + "step": 1961 + }, + { + "epoch": 0.25, + "learning_rate": 8.826739936737478e-06, + "loss": 0.5024, + "step": 1962 + }, + { + "epoch": 0.25, + "learning_rate": 8.82543292523542e-06, + "loss": 0.5889, + "step": 1963 + }, + { + "epoch": 0.25, + "learning_rate": 8.824125283015487e-06, + "loss": 0.5451, + "step": 1964 + }, + { + "epoch": 0.25, + "learning_rate": 8.822817010293281e-06, + "loss": 0.587, + "step": 1965 + }, + { + "epoch": 0.25, + "learning_rate": 8.821508107284502e-06, + "loss": 0.5491, + "step": 1966 + }, + { + "epoch": 0.25, + "learning_rate": 8.820198574204955e-06, + "loss": 0.5407, + "step": 1967 + }, + { + "epoch": 0.25, + "learning_rate": 8.818888411270549e-06, + "loss": 0.5938, + "step": 1968 + }, + { + "epoch": 0.25, + "learning_rate": 8.817577618697299e-06, + "loss": 0.5793, + "step": 1969 + }, + { + "epoch": 0.25, + "learning_rate": 8.816266196701319e-06, + "loss": 0.6386, + "step": 1970 + }, + { + "epoch": 0.25, + "learning_rate": 8.81495414549883e-06, + "loss": 0.5519, + "step": 1971 + }, + { + "epoch": 0.25, + "learning_rate": 8.813641465306157e-06, + "loss": 0.5064, + "step": 1972 + }, + { + "epoch": 0.25, + "learning_rate": 8.81232815633973e-06, + "loss": 0.5962, + "step": 1973 + }, + { + "epoch": 0.25, + "learning_rate": 8.811014218816077e-06, + "loss": 0.5241, + "step": 1974 + }, + { + "epoch": 0.25, + "learning_rate": 8.809699652951834e-06, + "loss": 0.4958, + "step": 1975 + }, + { + "epoch": 0.25, + "learning_rate": 8.808384458963742e-06, + "loss": 0.5679, + "step": 1976 + }, + { + "epoch": 0.25, + "learning_rate": 8.807068637068643e-06, + "loss": 0.5916, + "step": 1977 + }, + { + "epoch": 0.25, + "learning_rate": 8.80575218748348e-06, + "loss": 0.6097, + "step": 1978 + }, + { + "epoch": 0.25, + "learning_rate": 8.804435110425308e-06, + "loss": 0.6261, + "step": 1979 + }, + { + "epoch": 0.25, + "learning_rate": 8.803117406111275e-06, + "loss": 0.6015, + "step": 1980 + }, + { + "epoch": 0.25, + "learning_rate": 8.801799074758638e-06, + "loss": 0.2169, + "step": 1981 + }, + { + "epoch": 0.25, + "learning_rate": 8.80048011658476e-06, + "loss": 0.6518, + "step": 1982 + }, + { + "epoch": 0.25, + "learning_rate": 8.799160531807101e-06, + "loss": 0.6385, + "step": 1983 + }, + { + "epoch": 0.25, + "learning_rate": 8.79784032064323e-06, + "loss": 0.5708, + "step": 1984 + }, + { + "epoch": 0.25, + "learning_rate": 8.796519483310813e-06, + "loss": 0.5633, + "step": 1985 + }, + { + "epoch": 0.25, + "learning_rate": 8.795198020027626e-06, + "loss": 0.504, + "step": 1986 + }, + { + "epoch": 0.25, + "learning_rate": 8.793875931011543e-06, + "loss": 0.6185, + "step": 1987 + }, + { + "epoch": 0.25, + "learning_rate": 8.792553216480546e-06, + "loss": 0.587, + "step": 1988 + }, + { + "epoch": 0.25, + "learning_rate": 8.791229876652716e-06, + "loss": 0.5915, + "step": 1989 + }, + { + "epoch": 0.25, + "learning_rate": 8.789905911746238e-06, + "loss": 0.5941, + "step": 1990 + }, + { + "epoch": 0.25, + "learning_rate": 8.7885813219794e-06, + "loss": 0.5466, + "step": 1991 + }, + { + "epoch": 0.25, + "learning_rate": 8.787256107570597e-06, + "loss": 0.5766, + "step": 1992 + }, + { + "epoch": 0.25, + "learning_rate": 8.785930268738318e-06, + "loss": 0.6143, + "step": 1993 + }, + { + "epoch": 0.25, + "learning_rate": 8.784603805701167e-06, + "loss": 0.5419, + "step": 1994 + }, + { + "epoch": 0.25, + "learning_rate": 8.783276718677837e-06, + "loss": 0.5817, + "step": 1995 + }, + { + "epoch": 0.25, + "learning_rate": 8.78194900788714e-06, + "loss": 0.5323, + "step": 1996 + }, + { + "epoch": 0.25, + "learning_rate": 8.780620673547975e-06, + "loss": 0.544, + "step": 1997 + }, + { + "epoch": 0.25, + "learning_rate": 8.779291715879355e-06, + "loss": 0.569, + "step": 1998 + }, + { + "epoch": 0.25, + "learning_rate": 8.77796213510039e-06, + "loss": 0.6097, + "step": 1999 + }, + { + "epoch": 0.25, + "learning_rate": 8.776631931430295e-06, + "loss": 0.5983, + "step": 2000 + }, + { + "epoch": 0.25, + "learning_rate": 8.775301105088388e-06, + "loss": 0.6278, + "step": 2001 + }, + { + "epoch": 0.25, + "learning_rate": 8.773969656294085e-06, + "loss": 0.5107, + "step": 2002 + }, + { + "epoch": 0.25, + "learning_rate": 8.772637585266914e-06, + "loss": 0.5514, + "step": 2003 + }, + { + "epoch": 0.25, + "learning_rate": 8.771304892226498e-06, + "loss": 0.5854, + "step": 2004 + }, + { + "epoch": 0.25, + "learning_rate": 8.769971577392563e-06, + "loss": 0.5695, + "step": 2005 + }, + { + "epoch": 0.25, + "learning_rate": 8.768637640984941e-06, + "loss": 0.5861, + "step": 2006 + }, + { + "epoch": 0.25, + "learning_rate": 8.767303083223564e-06, + "loss": 0.6345, + "step": 2007 + }, + { + "epoch": 0.25, + "learning_rate": 8.765967904328467e-06, + "loss": 0.5776, + "step": 2008 + }, + { + "epoch": 0.25, + "learning_rate": 8.764632104519786e-06, + "loss": 0.538, + "step": 2009 + }, + { + "epoch": 0.25, + "learning_rate": 8.763295684017764e-06, + "loss": 0.5853, + "step": 2010 + }, + { + "epoch": 0.25, + "learning_rate": 8.761958643042743e-06, + "loss": 0.578, + "step": 2011 + }, + { + "epoch": 0.25, + "learning_rate": 8.760620981815164e-06, + "loss": 0.5356, + "step": 2012 + }, + { + "epoch": 0.25, + "learning_rate": 8.759282700555575e-06, + "loss": 0.6247, + "step": 2013 + }, + { + "epoch": 0.25, + "learning_rate": 8.757943799484628e-06, + "loss": 0.6294, + "step": 2014 + }, + { + "epoch": 0.25, + "learning_rate": 8.756604278823073e-06, + "loss": 0.5029, + "step": 2015 + }, + { + "epoch": 0.25, + "learning_rate": 8.75526413879176e-06, + "loss": 0.5319, + "step": 2016 + }, + { + "epoch": 0.25, + "learning_rate": 8.753923379611648e-06, + "loss": 0.4955, + "step": 2017 + }, + { + "epoch": 0.25, + "learning_rate": 8.752582001503792e-06, + "loss": 0.5291, + "step": 2018 + }, + { + "epoch": 0.25, + "learning_rate": 8.751240004689354e-06, + "loss": 0.5562, + "step": 2019 + }, + { + "epoch": 0.25, + "learning_rate": 8.749897389389595e-06, + "loss": 0.5734, + "step": 2020 + }, + { + "epoch": 0.25, + "learning_rate": 8.748554155825877e-06, + "loss": 0.5478, + "step": 2021 + }, + { + "epoch": 0.25, + "learning_rate": 8.74721030421967e-06, + "loss": 0.5916, + "step": 2022 + }, + { + "epoch": 0.25, + "learning_rate": 8.745865834792533e-06, + "loss": 0.5388, + "step": 2023 + }, + { + "epoch": 0.25, + "learning_rate": 8.744520747766143e-06, + "loss": 0.6569, + "step": 2024 + }, + { + "epoch": 0.25, + "learning_rate": 8.743175043362269e-06, + "loss": 0.5759, + "step": 2025 + }, + { + "epoch": 0.25, + "learning_rate": 8.741828721802782e-06, + "loss": 0.6261, + "step": 2026 + }, + { + "epoch": 0.25, + "learning_rate": 8.740481783309658e-06, + "loss": 0.5687, + "step": 2027 + }, + { + "epoch": 0.25, + "learning_rate": 8.739134228104974e-06, + "loss": 0.5652, + "step": 2028 + }, + { + "epoch": 0.25, + "learning_rate": 8.737786056410905e-06, + "loss": 0.5262, + "step": 2029 + }, + { + "epoch": 0.25, + "learning_rate": 8.736437268449735e-06, + "loss": 0.637, + "step": 2030 + }, + { + "epoch": 0.25, + "learning_rate": 8.735087864443841e-06, + "loss": 0.5369, + "step": 2031 + }, + { + "epoch": 0.25, + "learning_rate": 8.73373784461571e-06, + "loss": 0.5557, + "step": 2032 + }, + { + "epoch": 0.25, + "learning_rate": 8.732387209187925e-06, + "loss": 0.6015, + "step": 2033 + }, + { + "epoch": 0.25, + "learning_rate": 8.73103595838317e-06, + "loss": 0.5585, + "step": 2034 + }, + { + "epoch": 0.26, + "learning_rate": 8.729684092424236e-06, + "loss": 0.5859, + "step": 2035 + }, + { + "epoch": 0.26, + "learning_rate": 8.728331611534007e-06, + "loss": 0.5652, + "step": 2036 + }, + { + "epoch": 0.26, + "learning_rate": 8.726978515935476e-06, + "loss": 0.5758, + "step": 2037 + }, + { + "epoch": 0.26, + "learning_rate": 8.725624805851734e-06, + "loss": 0.576, + "step": 2038 + }, + { + "epoch": 0.26, + "learning_rate": 8.724270481505974e-06, + "loss": 0.5353, + "step": 2039 + }, + { + "epoch": 0.26, + "learning_rate": 8.72291554312149e-06, + "loss": 0.5967, + "step": 2040 + }, + { + "epoch": 0.26, + "learning_rate": 8.721559990921679e-06, + "loss": 0.5917, + "step": 2041 + }, + { + "epoch": 0.26, + "learning_rate": 8.720203825130034e-06, + "loss": 0.5795, + "step": 2042 + }, + { + "epoch": 0.26, + "learning_rate": 8.718847045970156e-06, + "loss": 0.6176, + "step": 2043 + }, + { + "epoch": 0.26, + "learning_rate": 8.717489653665741e-06, + "loss": 0.6148, + "step": 2044 + }, + { + "epoch": 0.26, + "learning_rate": 8.716131648440592e-06, + "loss": 0.5899, + "step": 2045 + }, + { + "epoch": 0.26, + "learning_rate": 8.714773030518605e-06, + "loss": 0.6389, + "step": 2046 + }, + { + "epoch": 0.26, + "learning_rate": 8.713413800123787e-06, + "loss": 0.5879, + "step": 2047 + }, + { + "epoch": 0.26, + "learning_rate": 8.71205395748024e-06, + "loss": 0.5467, + "step": 2048 + }, + { + "epoch": 0.26, + "learning_rate": 8.710693502812166e-06, + "loss": 0.6047, + "step": 2049 + }, + { + "epoch": 0.26, + "learning_rate": 8.709332436343872e-06, + "loss": 0.6309, + "step": 2050 + }, + { + "epoch": 0.26, + "learning_rate": 8.707970758299763e-06, + "loss": 0.6224, + "step": 2051 + }, + { + "epoch": 0.26, + "learning_rate": 8.706608468904344e-06, + "loss": 0.6192, + "step": 2052 + }, + { + "epoch": 0.26, + "learning_rate": 8.705245568382224e-06, + "loss": 0.5801, + "step": 2053 + }, + { + "epoch": 0.26, + "learning_rate": 8.703882056958111e-06, + "loss": 0.5748, + "step": 2054 + }, + { + "epoch": 0.26, + "learning_rate": 8.702517934856813e-06, + "loss": 0.5655, + "step": 2055 + }, + { + "epoch": 0.26, + "learning_rate": 8.70115320230324e-06, + "loss": 0.5715, + "step": 2056 + }, + { + "epoch": 0.26, + "learning_rate": 8.699787859522403e-06, + "loss": 0.6035, + "step": 2057 + }, + { + "epoch": 0.26, + "learning_rate": 8.698421906739412e-06, + "loss": 0.6291, + "step": 2058 + }, + { + "epoch": 0.26, + "learning_rate": 8.697055344179477e-06, + "loss": 0.5696, + "step": 2059 + }, + { + "epoch": 0.26, + "learning_rate": 8.69568817206791e-06, + "loss": 0.6104, + "step": 2060 + }, + { + "epoch": 0.26, + "learning_rate": 8.694320390630127e-06, + "loss": 0.5395, + "step": 2061 + }, + { + "epoch": 0.26, + "learning_rate": 8.692952000091637e-06, + "loss": 0.5988, + "step": 2062 + }, + { + "epoch": 0.26, + "learning_rate": 8.691583000678055e-06, + "loss": 0.5928, + "step": 2063 + }, + { + "epoch": 0.26, + "learning_rate": 8.690213392615095e-06, + "loss": 0.5841, + "step": 2064 + }, + { + "epoch": 0.26, + "learning_rate": 8.688843176128568e-06, + "loss": 0.5865, + "step": 2065 + }, + { + "epoch": 0.26, + "learning_rate": 8.687472351444391e-06, + "loss": 0.6078, + "step": 2066 + }, + { + "epoch": 0.26, + "learning_rate": 8.686100918788577e-06, + "loss": 0.6121, + "step": 2067 + }, + { + "epoch": 0.26, + "learning_rate": 8.684728878387246e-06, + "loss": 0.5715, + "step": 2068 + }, + { + "epoch": 0.26, + "learning_rate": 8.683356230466605e-06, + "loss": 0.5576, + "step": 2069 + }, + { + "epoch": 0.26, + "learning_rate": 8.681982975252975e-06, + "loss": 0.5651, + "step": 2070 + }, + { + "epoch": 0.26, + "learning_rate": 8.680609112972766e-06, + "loss": 0.606, + "step": 2071 + }, + { + "epoch": 0.26, + "learning_rate": 8.6792346438525e-06, + "loss": 0.5281, + "step": 2072 + }, + { + "epoch": 0.26, + "learning_rate": 8.677859568118789e-06, + "loss": 0.5718, + "step": 2073 + }, + { + "epoch": 0.26, + "learning_rate": 8.676483885998347e-06, + "loss": 0.5573, + "step": 2074 + }, + { + "epoch": 0.26, + "learning_rate": 8.675107597717993e-06, + "loss": 0.5933, + "step": 2075 + }, + { + "epoch": 0.26, + "learning_rate": 8.67373070350464e-06, + "loss": 0.5816, + "step": 2076 + }, + { + "epoch": 0.26, + "learning_rate": 8.672353203585304e-06, + "loss": 0.5457, + "step": 2077 + }, + { + "epoch": 0.26, + "learning_rate": 8.6709750981871e-06, + "loss": 0.6116, + "step": 2078 + }, + { + "epoch": 0.26, + "learning_rate": 8.669596387537243e-06, + "loss": 0.5611, + "step": 2079 + }, + { + "epoch": 0.26, + "learning_rate": 8.668217071863047e-06, + "loss": 0.6057, + "step": 2080 + }, + { + "epoch": 0.26, + "learning_rate": 8.666837151391928e-06, + "loss": 0.5909, + "step": 2081 + }, + { + "epoch": 0.26, + "learning_rate": 8.665456626351398e-06, + "loss": 0.5804, + "step": 2082 + }, + { + "epoch": 0.26, + "learning_rate": 8.664075496969076e-06, + "loss": 0.5808, + "step": 2083 + }, + { + "epoch": 0.26, + "learning_rate": 8.66269376347267e-06, + "loss": 0.5859, + "step": 2084 + }, + { + "epoch": 0.26, + "learning_rate": 8.661311426089995e-06, + "loss": 0.5719, + "step": 2085 + }, + { + "epoch": 0.26, + "learning_rate": 8.659928485048963e-06, + "loss": 0.5871, + "step": 2086 + }, + { + "epoch": 0.26, + "learning_rate": 8.65854494057759e-06, + "loss": 0.5125, + "step": 2087 + }, + { + "epoch": 0.26, + "learning_rate": 8.657160792903982e-06, + "loss": 0.5504, + "step": 2088 + }, + { + "epoch": 0.26, + "learning_rate": 8.655776042256355e-06, + "loss": 0.2113, + "step": 2089 + }, + { + "epoch": 0.26, + "learning_rate": 8.654390688863016e-06, + "loss": 0.5552, + "step": 2090 + }, + { + "epoch": 0.26, + "learning_rate": 8.65300473295238e-06, + "loss": 0.6177, + "step": 2091 + }, + { + "epoch": 0.26, + "learning_rate": 8.65161817475295e-06, + "loss": 0.532, + "step": 2092 + }, + { + "epoch": 0.26, + "learning_rate": 8.65023101449334e-06, + "loss": 0.6044, + "step": 2093 + }, + { + "epoch": 0.26, + "learning_rate": 8.648843252402256e-06, + "loss": 0.5493, + "step": 2094 + }, + { + "epoch": 0.26, + "learning_rate": 8.647454888708504e-06, + "loss": 0.6337, + "step": 2095 + }, + { + "epoch": 0.26, + "learning_rate": 8.646065923640992e-06, + "loss": 0.6352, + "step": 2096 + }, + { + "epoch": 0.26, + "learning_rate": 8.644676357428723e-06, + "loss": 0.5654, + "step": 2097 + }, + { + "epoch": 0.26, + "learning_rate": 8.643286190300804e-06, + "loss": 0.5295, + "step": 2098 + }, + { + "epoch": 0.26, + "learning_rate": 8.641895422486438e-06, + "loss": 0.5349, + "step": 2099 + }, + { + "epoch": 0.26, + "learning_rate": 8.640504054214927e-06, + "loss": 0.6275, + "step": 2100 + }, + { + "epoch": 0.26, + "learning_rate": 8.639112085715675e-06, + "loss": 0.6498, + "step": 2101 + }, + { + "epoch": 0.26, + "learning_rate": 8.63771951721818e-06, + "loss": 0.5643, + "step": 2102 + }, + { + "epoch": 0.26, + "learning_rate": 8.636326348952041e-06, + "loss": 0.5784, + "step": 2103 + }, + { + "epoch": 0.26, + "learning_rate": 8.63493258114696e-06, + "loss": 0.6078, + "step": 2104 + }, + { + "epoch": 0.26, + "learning_rate": 8.633538214032732e-06, + "loss": 0.5448, + "step": 2105 + }, + { + "epoch": 0.26, + "learning_rate": 8.632143247839254e-06, + "loss": 0.5839, + "step": 2106 + }, + { + "epoch": 0.26, + "learning_rate": 8.63074768279652e-06, + "loss": 0.551, + "step": 2107 + }, + { + "epoch": 0.26, + "learning_rate": 8.629351519134625e-06, + "loss": 0.6101, + "step": 2108 + }, + { + "epoch": 0.26, + "learning_rate": 8.62795475708376e-06, + "loss": 0.5777, + "step": 2109 + }, + { + "epoch": 0.26, + "learning_rate": 8.626557396874215e-06, + "loss": 0.6238, + "step": 2110 + }, + { + "epoch": 0.26, + "learning_rate": 8.625159438736383e-06, + "loss": 0.5923, + "step": 2111 + }, + { + "epoch": 0.26, + "learning_rate": 8.62376088290075e-06, + "loss": 0.6586, + "step": 2112 + }, + { + "epoch": 0.26, + "learning_rate": 8.622361729597901e-06, + "loss": 0.5689, + "step": 2113 + }, + { + "epoch": 0.26, + "learning_rate": 8.620961979058524e-06, + "loss": 0.5308, + "step": 2114 + }, + { + "epoch": 0.27, + "learning_rate": 8.619561631513403e-06, + "loss": 0.5631, + "step": 2115 + }, + { + "epoch": 0.27, + "learning_rate": 8.61816068719342e-06, + "loss": 0.5595, + "step": 2116 + }, + { + "epoch": 0.27, + "learning_rate": 8.616759146329553e-06, + "loss": 0.6059, + "step": 2117 + }, + { + "epoch": 0.27, + "learning_rate": 8.615357009152883e-06, + "loss": 0.6187, + "step": 2118 + }, + { + "epoch": 0.27, + "learning_rate": 8.613954275894589e-06, + "loss": 0.5604, + "step": 2119 + }, + { + "epoch": 0.27, + "learning_rate": 8.612550946785943e-06, + "loss": 0.5305, + "step": 2120 + }, + { + "epoch": 0.27, + "learning_rate": 8.611147022058318e-06, + "loss": 0.5629, + "step": 2121 + }, + { + "epoch": 0.27, + "learning_rate": 8.60974250194319e-06, + "loss": 0.5716, + "step": 2122 + }, + { + "epoch": 0.27, + "learning_rate": 8.608337386672128e-06, + "loss": 0.529, + "step": 2123 + }, + { + "epoch": 0.27, + "learning_rate": 8.606931676476797e-06, + "loss": 0.6461, + "step": 2124 + }, + { + "epoch": 0.27, + "learning_rate": 8.605525371588967e-06, + "loss": 0.5791, + "step": 2125 + }, + { + "epoch": 0.27, + "learning_rate": 8.604118472240503e-06, + "loss": 0.5587, + "step": 2126 + }, + { + "epoch": 0.27, + "learning_rate": 8.602710978663364e-06, + "loss": 0.629, + "step": 2127 + }, + { + "epoch": 0.27, + "learning_rate": 8.601302891089612e-06, + "loss": 0.5447, + "step": 2128 + }, + { + "epoch": 0.27, + "learning_rate": 8.599894209751405e-06, + "loss": 0.4897, + "step": 2129 + }, + { + "epoch": 0.27, + "learning_rate": 8.598484934880998e-06, + "loss": 0.6073, + "step": 2130 + }, + { + "epoch": 0.27, + "learning_rate": 8.597075066710749e-06, + "loss": 0.5138, + "step": 2131 + }, + { + "epoch": 0.27, + "learning_rate": 8.595664605473106e-06, + "loss": 0.5552, + "step": 2132 + }, + { + "epoch": 0.27, + "learning_rate": 8.59425355140062e-06, + "loss": 0.5801, + "step": 2133 + }, + { + "epoch": 0.27, + "learning_rate": 8.592841904725937e-06, + "loss": 0.6358, + "step": 2134 + }, + { + "epoch": 0.27, + "learning_rate": 8.591429665681806e-06, + "loss": 0.601, + "step": 2135 + }, + { + "epoch": 0.27, + "learning_rate": 8.590016834501066e-06, + "loss": 0.2193, + "step": 2136 + }, + { + "epoch": 0.27, + "learning_rate": 8.58860341141666e-06, + "loss": 0.599, + "step": 2137 + }, + { + "epoch": 0.27, + "learning_rate": 8.587189396661622e-06, + "loss": 0.575, + "step": 2138 + }, + { + "epoch": 0.27, + "learning_rate": 8.585774790469092e-06, + "loss": 0.544, + "step": 2139 + }, + { + "epoch": 0.27, + "learning_rate": 8.584359593072302e-06, + "loss": 0.5474, + "step": 2140 + }, + { + "epoch": 0.27, + "learning_rate": 8.58294380470458e-06, + "loss": 0.5349, + "step": 2141 + }, + { + "epoch": 0.27, + "learning_rate": 8.581527425599356e-06, + "loss": 0.4684, + "step": 2142 + }, + { + "epoch": 0.27, + "learning_rate": 8.580110455990157e-06, + "loss": 0.5111, + "step": 2143 + }, + { + "epoch": 0.27, + "learning_rate": 8.578692896110602e-06, + "loss": 0.5656, + "step": 2144 + }, + { + "epoch": 0.27, + "learning_rate": 8.577274746194412e-06, + "loss": 0.5007, + "step": 2145 + }, + { + "epoch": 0.27, + "learning_rate": 8.575856006475408e-06, + "loss": 0.5155, + "step": 2146 + }, + { + "epoch": 0.27, + "learning_rate": 8.5744366771875e-06, + "loss": 0.5343, + "step": 2147 + }, + { + "epoch": 0.27, + "learning_rate": 8.5730167585647e-06, + "loss": 0.5592, + "step": 2148 + }, + { + "epoch": 0.27, + "learning_rate": 8.571596250841121e-06, + "loss": 0.599, + "step": 2149 + }, + { + "epoch": 0.27, + "learning_rate": 8.570175154250966e-06, + "loss": 0.5559, + "step": 2150 + }, + { + "epoch": 0.27, + "learning_rate": 8.56875346902854e-06, + "loss": 0.567, + "step": 2151 + }, + { + "epoch": 0.27, + "learning_rate": 8.56733119540824e-06, + "loss": 0.6202, + "step": 2152 + }, + { + "epoch": 0.27, + "learning_rate": 8.565908333624564e-06, + "loss": 0.5922, + "step": 2153 + }, + { + "epoch": 0.27, + "learning_rate": 8.56448488391211e-06, + "loss": 0.2136, + "step": 2154 + }, + { + "epoch": 0.27, + "learning_rate": 8.563060846505564e-06, + "loss": 0.6298, + "step": 2155 + }, + { + "epoch": 0.27, + "learning_rate": 8.56163622163972e-06, + "loss": 0.6028, + "step": 2156 + }, + { + "epoch": 0.27, + "learning_rate": 8.560211009549454e-06, + "loss": 0.5404, + "step": 2157 + }, + { + "epoch": 0.27, + "learning_rate": 8.558785210469758e-06, + "loss": 0.5877, + "step": 2158 + }, + { + "epoch": 0.27, + "learning_rate": 8.557358824635703e-06, + "loss": 0.5904, + "step": 2159 + }, + { + "epoch": 0.27, + "learning_rate": 8.555931852282468e-06, + "loss": 0.5908, + "step": 2160 + }, + { + "epoch": 0.27, + "learning_rate": 8.554504293645323e-06, + "loss": 0.5317, + "step": 2161 + }, + { + "epoch": 0.27, + "learning_rate": 8.553076148959638e-06, + "loss": 0.545, + "step": 2162 + }, + { + "epoch": 0.27, + "learning_rate": 8.551647418460876e-06, + "loss": 0.6273, + "step": 2163 + }, + { + "epoch": 0.27, + "learning_rate": 8.550218102384602e-06, + "loss": 0.5707, + "step": 2164 + }, + { + "epoch": 0.27, + "learning_rate": 8.548788200966472e-06, + "loss": 0.625, + "step": 2165 + }, + { + "epoch": 0.27, + "learning_rate": 8.547357714442241e-06, + "loss": 0.5703, + "step": 2166 + }, + { + "epoch": 0.27, + "learning_rate": 8.545926643047762e-06, + "loss": 0.5763, + "step": 2167 + }, + { + "epoch": 0.27, + "learning_rate": 8.54449498701898e-06, + "loss": 0.5232, + "step": 2168 + }, + { + "epoch": 0.27, + "learning_rate": 8.543062746591942e-06, + "loss": 0.6593, + "step": 2169 + }, + { + "epoch": 0.27, + "learning_rate": 8.541629922002785e-06, + "loss": 0.5766, + "step": 2170 + }, + { + "epoch": 0.27, + "learning_rate": 8.54019651348775e-06, + "loss": 0.5151, + "step": 2171 + }, + { + "epoch": 0.27, + "learning_rate": 8.538762521283168e-06, + "loss": 0.54, + "step": 2172 + }, + { + "epoch": 0.27, + "learning_rate": 8.537327945625469e-06, + "loss": 0.6169, + "step": 2173 + }, + { + "epoch": 0.27, + "learning_rate": 8.535892786751177e-06, + "loss": 0.5629, + "step": 2174 + }, + { + "epoch": 0.27, + "learning_rate": 8.534457044896914e-06, + "loss": 0.5761, + "step": 2175 + }, + { + "epoch": 0.27, + "learning_rate": 8.5330207202994e-06, + "loss": 0.5752, + "step": 2176 + }, + { + "epoch": 0.27, + "learning_rate": 8.531583813195446e-06, + "loss": 0.5757, + "step": 2177 + }, + { + "epoch": 0.27, + "learning_rate": 8.530146323821964e-06, + "loss": 0.5439, + "step": 2178 + }, + { + "epoch": 0.27, + "learning_rate": 8.52870825241596e-06, + "loss": 0.5333, + "step": 2179 + }, + { + "epoch": 0.27, + "learning_rate": 8.527269599214533e-06, + "loss": 0.5906, + "step": 2180 + }, + { + "epoch": 0.27, + "learning_rate": 8.525830364454883e-06, + "loss": 0.6061, + "step": 2181 + }, + { + "epoch": 0.27, + "learning_rate": 8.524390548374304e-06, + "loss": 0.5066, + "step": 2182 + }, + { + "epoch": 0.27, + "learning_rate": 8.522950151210185e-06, + "loss": 0.5835, + "step": 2183 + }, + { + "epoch": 0.27, + "learning_rate": 8.521509173200012e-06, + "loss": 0.632, + "step": 2184 + }, + { + "epoch": 0.27, + "learning_rate": 8.520067614581365e-06, + "loss": 0.5821, + "step": 2185 + }, + { + "epoch": 0.27, + "learning_rate": 8.518625475591922e-06, + "loss": 0.5862, + "step": 2186 + }, + { + "epoch": 0.27, + "learning_rate": 8.517182756469453e-06, + "loss": 0.5875, + "step": 2187 + }, + { + "epoch": 0.27, + "learning_rate": 8.515739457451831e-06, + "loss": 0.588, + "step": 2188 + }, + { + "epoch": 0.27, + "learning_rate": 8.514295578777014e-06, + "loss": 0.576, + "step": 2189 + }, + { + "epoch": 0.27, + "learning_rate": 8.512851120683064e-06, + "loss": 0.5638, + "step": 2190 + }, + { + "epoch": 0.27, + "learning_rate": 8.511406083408138e-06, + "loss": 0.6222, + "step": 2191 + }, + { + "epoch": 0.27, + "learning_rate": 8.509960467190484e-06, + "loss": 0.6328, + "step": 2192 + }, + { + "epoch": 0.27, + "learning_rate": 8.508514272268446e-06, + "loss": 0.6392, + "step": 2193 + }, + { + "epoch": 0.28, + "learning_rate": 8.50706749888047e-06, + "loss": 0.5491, + "step": 2194 + }, + { + "epoch": 0.28, + "learning_rate": 8.505620147265089e-06, + "loss": 0.5559, + "step": 2195 + }, + { + "epoch": 0.28, + "learning_rate": 8.504172217660936e-06, + "loss": 0.551, + "step": 2196 + }, + { + "epoch": 0.28, + "learning_rate": 8.50272371030674e-06, + "loss": 0.236, + "step": 2197 + }, + { + "epoch": 0.28, + "learning_rate": 8.501274625441319e-06, + "loss": 0.5541, + "step": 2198 + }, + { + "epoch": 0.28, + "learning_rate": 8.499824963303596e-06, + "loss": 0.6056, + "step": 2199 + }, + { + "epoch": 0.28, + "learning_rate": 8.49837472413258e-06, + "loss": 0.539, + "step": 2200 + }, + { + "epoch": 0.28, + "learning_rate": 8.49692390816738e-06, + "loss": 0.5607, + "step": 2201 + }, + { + "epoch": 0.28, + "learning_rate": 8.4954725156472e-06, + "loss": 0.5651, + "step": 2202 + }, + { + "epoch": 0.28, + "learning_rate": 8.494020546811338e-06, + "loss": 0.5328, + "step": 2203 + }, + { + "epoch": 0.28, + "learning_rate": 8.492568001899188e-06, + "loss": 0.5418, + "step": 2204 + }, + { + "epoch": 0.28, + "learning_rate": 8.491114881150236e-06, + "loss": 0.5598, + "step": 2205 + }, + { + "epoch": 0.28, + "learning_rate": 8.489661184804067e-06, + "loss": 0.5637, + "step": 2206 + }, + { + "epoch": 0.28, + "learning_rate": 8.488206913100358e-06, + "loss": 0.598, + "step": 2207 + }, + { + "epoch": 0.28, + "learning_rate": 8.486752066278883e-06, + "loss": 0.6492, + "step": 2208 + }, + { + "epoch": 0.28, + "learning_rate": 8.485296644579507e-06, + "loss": 0.5776, + "step": 2209 + }, + { + "epoch": 0.28, + "learning_rate": 8.483840648242196e-06, + "loss": 0.5404, + "step": 2210 + }, + { + "epoch": 0.28, + "learning_rate": 8.482384077507006e-06, + "loss": 0.6008, + "step": 2211 + }, + { + "epoch": 0.28, + "learning_rate": 8.480926932614088e-06, + "loss": 0.5806, + "step": 2212 + }, + { + "epoch": 0.28, + "learning_rate": 8.479469213803688e-06, + "loss": 0.5423, + "step": 2213 + }, + { + "epoch": 0.28, + "learning_rate": 8.478010921316151e-06, + "loss": 0.5837, + "step": 2214 + }, + { + "epoch": 0.28, + "learning_rate": 8.47655205539191e-06, + "loss": 0.5303, + "step": 2215 + }, + { + "epoch": 0.28, + "learning_rate": 8.475092616271497e-06, + "loss": 0.5642, + "step": 2216 + }, + { + "epoch": 0.28, + "learning_rate": 8.473632604195532e-06, + "loss": 0.2116, + "step": 2217 + }, + { + "epoch": 0.28, + "learning_rate": 8.47217201940474e-06, + "loss": 0.5536, + "step": 2218 + }, + { + "epoch": 0.28, + "learning_rate": 8.470710862139933e-06, + "loss": 0.658, + "step": 2219 + }, + { + "epoch": 0.28, + "learning_rate": 8.46924913264202e-06, + "loss": 0.5949, + "step": 2220 + }, + { + "epoch": 0.28, + "learning_rate": 8.467786831152e-06, + "loss": 0.561, + "step": 2221 + }, + { + "epoch": 0.28, + "learning_rate": 8.466323957910973e-06, + "loss": 0.6344, + "step": 2222 + }, + { + "epoch": 0.28, + "learning_rate": 8.464860513160132e-06, + "loss": 0.2121, + "step": 2223 + }, + { + "epoch": 0.28, + "learning_rate": 8.463396497140757e-06, + "loss": 0.5463, + "step": 2224 + }, + { + "epoch": 0.28, + "learning_rate": 8.46193191009423e-06, + "loss": 0.565, + "step": 2225 + }, + { + "epoch": 0.28, + "learning_rate": 8.460466752262024e-06, + "loss": 0.6338, + "step": 2226 + }, + { + "epoch": 0.28, + "learning_rate": 8.45900102388571e-06, + "loss": 0.5843, + "step": 2227 + }, + { + "epoch": 0.28, + "learning_rate": 8.457534725206946e-06, + "loss": 0.5753, + "step": 2228 + }, + { + "epoch": 0.28, + "learning_rate": 8.45606785646749e-06, + "loss": 0.575, + "step": 2229 + }, + { + "epoch": 0.28, + "learning_rate": 8.45460041790919e-06, + "loss": 0.5688, + "step": 2230 + }, + { + "epoch": 0.28, + "learning_rate": 8.453132409773992e-06, + "loss": 0.6077, + "step": 2231 + }, + { + "epoch": 0.28, + "learning_rate": 8.451663832303932e-06, + "loss": 0.5475, + "step": 2232 + }, + { + "epoch": 0.28, + "learning_rate": 8.450194685741143e-06, + "loss": 0.5442, + "step": 2233 + }, + { + "epoch": 0.28, + "learning_rate": 8.448724970327846e-06, + "loss": 0.5805, + "step": 2234 + }, + { + "epoch": 0.28, + "learning_rate": 8.447254686306367e-06, + "loss": 0.6121, + "step": 2235 + }, + { + "epoch": 0.28, + "learning_rate": 8.445783833919115e-06, + "loss": 0.5545, + "step": 2236 + }, + { + "epoch": 0.28, + "learning_rate": 8.444312413408597e-06, + "loss": 0.5248, + "step": 2237 + }, + { + "epoch": 0.28, + "learning_rate": 8.442840425017414e-06, + "loss": 0.576, + "step": 2238 + }, + { + "epoch": 0.28, + "learning_rate": 8.441367868988258e-06, + "loss": 0.2285, + "step": 2239 + }, + { + "epoch": 0.28, + "learning_rate": 8.439894745563918e-06, + "loss": 0.5796, + "step": 2240 + }, + { + "epoch": 0.28, + "learning_rate": 8.438421054987276e-06, + "loss": 0.5193, + "step": 2241 + }, + { + "epoch": 0.28, + "learning_rate": 8.436946797501306e-06, + "loss": 0.6018, + "step": 2242 + }, + { + "epoch": 0.28, + "learning_rate": 8.435471973349077e-06, + "loss": 0.6121, + "step": 2243 + }, + { + "epoch": 0.28, + "learning_rate": 8.433996582773747e-06, + "loss": 0.2436, + "step": 2244 + }, + { + "epoch": 0.28, + "learning_rate": 8.432520626018572e-06, + "loss": 0.5727, + "step": 2245 + }, + { + "epoch": 0.28, + "learning_rate": 8.431044103326905e-06, + "loss": 0.5407, + "step": 2246 + }, + { + "epoch": 0.28, + "learning_rate": 8.42956701494218e-06, + "loss": 0.5978, + "step": 2247 + }, + { + "epoch": 0.28, + "learning_rate": 8.428089361107938e-06, + "loss": 0.5987, + "step": 2248 + }, + { + "epoch": 0.28, + "learning_rate": 8.426611142067805e-06, + "loss": 0.6053, + "step": 2249 + }, + { + "epoch": 0.28, + "learning_rate": 8.4251323580655e-06, + "loss": 0.5926, + "step": 2250 + }, + { + "epoch": 0.28, + "learning_rate": 8.42365300934484e-06, + "loss": 0.5522, + "step": 2251 + }, + { + "epoch": 0.28, + "learning_rate": 8.422173096149732e-06, + "loss": 0.6184, + "step": 2252 + }, + { + "epoch": 0.28, + "learning_rate": 8.420692618724175e-06, + "loss": 0.5827, + "step": 2253 + }, + { + "epoch": 0.28, + "learning_rate": 8.419211577312267e-06, + "loss": 0.6028, + "step": 2254 + }, + { + "epoch": 0.28, + "learning_rate": 8.417729972158189e-06, + "loss": 0.5745, + "step": 2255 + }, + { + "epoch": 0.28, + "learning_rate": 8.416247803506224e-06, + "loss": 0.5662, + "step": 2256 + }, + { + "epoch": 0.28, + "learning_rate": 8.414765071600742e-06, + "loss": 0.6284, + "step": 2257 + }, + { + "epoch": 0.28, + "learning_rate": 8.413281776686211e-06, + "loss": 0.5909, + "step": 2258 + }, + { + "epoch": 0.28, + "learning_rate": 8.411797919007186e-06, + "loss": 0.5961, + "step": 2259 + }, + { + "epoch": 0.28, + "learning_rate": 8.41031349880832e-06, + "loss": 0.5836, + "step": 2260 + }, + { + "epoch": 0.28, + "learning_rate": 8.408828516334357e-06, + "loss": 0.5655, + "step": 2261 + }, + { + "epoch": 0.28, + "learning_rate": 8.407342971830133e-06, + "loss": 0.5804, + "step": 2262 + }, + { + "epoch": 0.28, + "learning_rate": 8.405856865540576e-06, + "loss": 0.6166, + "step": 2263 + }, + { + "epoch": 0.28, + "learning_rate": 8.404370197710707e-06, + "loss": 0.6155, + "step": 2264 + }, + { + "epoch": 0.28, + "learning_rate": 8.402882968585642e-06, + "loss": 0.6337, + "step": 2265 + }, + { + "epoch": 0.28, + "learning_rate": 8.401395178410587e-06, + "loss": 0.5628, + "step": 2266 + }, + { + "epoch": 0.28, + "learning_rate": 8.399906827430843e-06, + "loss": 0.5815, + "step": 2267 + }, + { + "epoch": 0.28, + "learning_rate": 8.398417915891798e-06, + "loss": 0.6015, + "step": 2268 + }, + { + "epoch": 0.28, + "learning_rate": 8.39692844403894e-06, + "loss": 0.5673, + "step": 2269 + }, + { + "epoch": 0.28, + "learning_rate": 8.395438412117843e-06, + "loss": 0.5792, + "step": 2270 + }, + { + "epoch": 0.28, + "learning_rate": 8.393947820374176e-06, + "loss": 0.6145, + "step": 2271 + }, + { + "epoch": 0.28, + "learning_rate": 8.392456669053701e-06, + "loss": 0.6119, + "step": 2272 + }, + { + "epoch": 0.28, + "learning_rate": 8.39096495840227e-06, + "loss": 0.5714, + "step": 2273 + }, + { + "epoch": 0.29, + "learning_rate": 8.389472688665831e-06, + "loss": 0.6098, + "step": 2274 + }, + { + "epoch": 0.29, + "learning_rate": 8.387979860090424e-06, + "loss": 0.5143, + "step": 2275 + }, + { + "epoch": 0.29, + "learning_rate": 8.38648647292217e-06, + "loss": 0.6068, + "step": 2276 + }, + { + "epoch": 0.29, + "learning_rate": 8.384992527407299e-06, + "loss": 0.594, + "step": 2277 + }, + { + "epoch": 0.29, + "learning_rate": 8.383498023792125e-06, + "loss": 0.5844, + "step": 2278 + }, + { + "epoch": 0.29, + "learning_rate": 8.38200296232305e-06, + "loss": 0.5316, + "step": 2279 + }, + { + "epoch": 0.29, + "learning_rate": 8.380507343246573e-06, + "loss": 0.6245, + "step": 2280 + }, + { + "epoch": 0.29, + "learning_rate": 8.379011166809287e-06, + "loss": 0.6217, + "step": 2281 + }, + { + "epoch": 0.29, + "learning_rate": 8.37751443325787e-06, + "loss": 0.5582, + "step": 2282 + }, + { + "epoch": 0.29, + "learning_rate": 8.3760171428391e-06, + "loss": 0.6043, + "step": 2283 + }, + { + "epoch": 0.29, + "learning_rate": 8.374519295799839e-06, + "loss": 0.5436, + "step": 2284 + }, + { + "epoch": 0.29, + "learning_rate": 8.373020892387048e-06, + "loss": 0.5343, + "step": 2285 + }, + { + "epoch": 0.29, + "learning_rate": 8.371521932847773e-06, + "loss": 0.6034, + "step": 2286 + }, + { + "epoch": 0.29, + "learning_rate": 8.370022417429154e-06, + "loss": 0.5491, + "step": 2287 + }, + { + "epoch": 0.29, + "learning_rate": 8.368522346378426e-06, + "loss": 0.5531, + "step": 2288 + }, + { + "epoch": 0.29, + "learning_rate": 8.367021719942913e-06, + "loss": 0.5723, + "step": 2289 + }, + { + "epoch": 0.29, + "learning_rate": 8.36552053837003e-06, + "loss": 0.5588, + "step": 2290 + }, + { + "epoch": 0.29, + "learning_rate": 8.364018801907283e-06, + "loss": 0.5393, + "step": 2291 + }, + { + "epoch": 0.29, + "learning_rate": 8.362516510802272e-06, + "loss": 0.5875, + "step": 2292 + }, + { + "epoch": 0.29, + "learning_rate": 8.361013665302688e-06, + "loss": 0.5849, + "step": 2293 + }, + { + "epoch": 0.29, + "learning_rate": 8.35951026565631e-06, + "loss": 0.5515, + "step": 2294 + }, + { + "epoch": 0.29, + "learning_rate": 8.358006312111014e-06, + "loss": 0.5969, + "step": 2295 + }, + { + "epoch": 0.29, + "learning_rate": 8.356501804914759e-06, + "loss": 0.5468, + "step": 2296 + }, + { + "epoch": 0.29, + "learning_rate": 8.354996744315606e-06, + "loss": 0.593, + "step": 2297 + }, + { + "epoch": 0.29, + "learning_rate": 8.3534911305617e-06, + "loss": 0.5837, + "step": 2298 + }, + { + "epoch": 0.29, + "learning_rate": 8.351984963901278e-06, + "loss": 0.5386, + "step": 2299 + }, + { + "epoch": 0.29, + "learning_rate": 8.35047824458267e-06, + "loss": 0.5874, + "step": 2300 + }, + { + "epoch": 0.29, + "learning_rate": 8.348970972854295e-06, + "loss": 0.5908, + "step": 2301 + }, + { + "epoch": 0.29, + "learning_rate": 8.347463148964665e-06, + "loss": 0.5976, + "step": 2302 + }, + { + "epoch": 0.29, + "learning_rate": 8.345954773162384e-06, + "loss": 0.5395, + "step": 2303 + }, + { + "epoch": 0.29, + "learning_rate": 8.34444584569614e-06, + "loss": 0.553, + "step": 2304 + }, + { + "epoch": 0.29, + "learning_rate": 8.342936366814724e-06, + "loss": 0.5858, + "step": 2305 + }, + { + "epoch": 0.29, + "learning_rate": 8.341426336767007e-06, + "loss": 0.5999, + "step": 2306 + }, + { + "epoch": 0.29, + "learning_rate": 8.339915755801956e-06, + "loss": 0.5668, + "step": 2307 + }, + { + "epoch": 0.29, + "learning_rate": 8.33840462416863e-06, + "loss": 0.5558, + "step": 2308 + }, + { + "epoch": 0.29, + "learning_rate": 8.336892942116173e-06, + "loss": 0.5851, + "step": 2309 + }, + { + "epoch": 0.29, + "learning_rate": 8.335380709893826e-06, + "loss": 0.6266, + "step": 2310 + }, + { + "epoch": 0.29, + "learning_rate": 8.333867927750916e-06, + "loss": 0.5648, + "step": 2311 + }, + { + "epoch": 0.29, + "learning_rate": 8.332354595936865e-06, + "loss": 0.6225, + "step": 2312 + }, + { + "epoch": 0.29, + "learning_rate": 8.330840714701182e-06, + "loss": 0.5641, + "step": 2313 + }, + { + "epoch": 0.29, + "learning_rate": 8.329326284293469e-06, + "loss": 0.513, + "step": 2314 + }, + { + "epoch": 0.29, + "learning_rate": 8.327811304963418e-06, + "loss": 0.6336, + "step": 2315 + }, + { + "epoch": 0.29, + "learning_rate": 8.326295776960808e-06, + "loss": 0.5569, + "step": 2316 + }, + { + "epoch": 0.29, + "learning_rate": 8.324779700535515e-06, + "loss": 0.6049, + "step": 2317 + }, + { + "epoch": 0.29, + "learning_rate": 8.323263075937502e-06, + "loss": 0.5506, + "step": 2318 + }, + { + "epoch": 0.29, + "learning_rate": 8.321745903416819e-06, + "loss": 0.5636, + "step": 2319 + }, + { + "epoch": 0.29, + "learning_rate": 8.320228183223611e-06, + "loss": 0.5348, + "step": 2320 + }, + { + "epoch": 0.29, + "learning_rate": 8.318709915608115e-06, + "loss": 0.6193, + "step": 2321 + }, + { + "epoch": 0.29, + "learning_rate": 8.317191100820652e-06, + "loss": 0.5688, + "step": 2322 + }, + { + "epoch": 0.29, + "learning_rate": 8.315671739111635e-06, + "loss": 0.5838, + "step": 2323 + }, + { + "epoch": 0.29, + "learning_rate": 8.314151830731575e-06, + "loss": 0.6086, + "step": 2324 + }, + { + "epoch": 0.29, + "learning_rate": 8.31263137593106e-06, + "loss": 0.6064, + "step": 2325 + }, + { + "epoch": 0.29, + "learning_rate": 8.311110374960777e-06, + "loss": 0.5595, + "step": 2326 + }, + { + "epoch": 0.29, + "learning_rate": 8.309588828071503e-06, + "loss": 0.6591, + "step": 2327 + }, + { + "epoch": 0.29, + "learning_rate": 8.3080667355141e-06, + "loss": 0.6303, + "step": 2328 + }, + { + "epoch": 0.29, + "learning_rate": 8.306544097539525e-06, + "loss": 0.566, + "step": 2329 + }, + { + "epoch": 0.29, + "learning_rate": 8.30502091439882e-06, + "loss": 0.5771, + "step": 2330 + }, + { + "epoch": 0.29, + "learning_rate": 8.303497186343123e-06, + "loss": 0.5779, + "step": 2331 + }, + { + "epoch": 0.29, + "learning_rate": 8.301972913623657e-06, + "loss": 0.5458, + "step": 2332 + }, + { + "epoch": 0.29, + "learning_rate": 8.300448096491736e-06, + "loss": 0.6054, + "step": 2333 + }, + { + "epoch": 0.29, + "learning_rate": 8.298922735198765e-06, + "loss": 0.5441, + "step": 2334 + }, + { + "epoch": 0.29, + "learning_rate": 8.297396829996237e-06, + "loss": 0.2237, + "step": 2335 + }, + { + "epoch": 0.29, + "learning_rate": 8.295870381135735e-06, + "loss": 0.5378, + "step": 2336 + }, + { + "epoch": 0.29, + "learning_rate": 8.294343388868934e-06, + "loss": 0.598, + "step": 2337 + }, + { + "epoch": 0.29, + "learning_rate": 8.292815853447593e-06, + "loss": 0.5522, + "step": 2338 + }, + { + "epoch": 0.29, + "learning_rate": 8.29128777512357e-06, + "loss": 0.5688, + "step": 2339 + }, + { + "epoch": 0.29, + "learning_rate": 8.289759154148801e-06, + "loss": 0.2109, + "step": 2340 + }, + { + "epoch": 0.29, + "learning_rate": 8.288229990775322e-06, + "loss": 0.5278, + "step": 2341 + }, + { + "epoch": 0.29, + "learning_rate": 8.28670028525525e-06, + "loss": 0.5555, + "step": 2342 + }, + { + "epoch": 0.29, + "learning_rate": 8.285170037840797e-06, + "loss": 0.5233, + "step": 2343 + }, + { + "epoch": 0.29, + "learning_rate": 8.283639248784261e-06, + "loss": 0.5762, + "step": 2344 + }, + { + "epoch": 0.29, + "learning_rate": 8.282107918338033e-06, + "loss": 0.5499, + "step": 2345 + }, + { + "epoch": 0.29, + "learning_rate": 8.280576046754587e-06, + "loss": 0.5598, + "step": 2346 + }, + { + "epoch": 0.29, + "learning_rate": 8.279043634286495e-06, + "loss": 0.5858, + "step": 2347 + }, + { + "epoch": 0.29, + "learning_rate": 8.27751068118641e-06, + "loss": 0.5596, + "step": 2348 + }, + { + "epoch": 0.29, + "learning_rate": 8.275977187707079e-06, + "loss": 0.2277, + "step": 2349 + }, + { + "epoch": 0.29, + "learning_rate": 8.274443154101335e-06, + "loss": 0.5539, + "step": 2350 + }, + { + "epoch": 0.29, + "learning_rate": 8.272908580622104e-06, + "loss": 0.6419, + "step": 2351 + }, + { + "epoch": 0.29, + "learning_rate": 8.271373467522396e-06, + "loss": 0.5566, + "step": 2352 + }, + { + "epoch": 0.29, + "learning_rate": 8.269837815055317e-06, + "loss": 0.5763, + "step": 2353 + }, + { + "epoch": 0.3, + "learning_rate": 8.26830162347405e-06, + "loss": 0.561, + "step": 2354 + }, + { + "epoch": 0.3, + "learning_rate": 8.266764893031883e-06, + "loss": 0.5557, + "step": 2355 + }, + { + "epoch": 0.3, + "learning_rate": 8.265227623982178e-06, + "loss": 0.6006, + "step": 2356 + }, + { + "epoch": 0.3, + "learning_rate": 8.263689816578394e-06, + "loss": 0.5693, + "step": 2357 + }, + { + "epoch": 0.3, + "learning_rate": 8.26215147107408e-06, + "loss": 0.5522, + "step": 2358 + }, + { + "epoch": 0.3, + "learning_rate": 8.260612587722865e-06, + "loss": 0.5529, + "step": 2359 + }, + { + "epoch": 0.3, + "learning_rate": 8.259073166778475e-06, + "loss": 0.5833, + "step": 2360 + }, + { + "epoch": 0.3, + "learning_rate": 8.257533208494724e-06, + "loss": 0.6001, + "step": 2361 + }, + { + "epoch": 0.3, + "learning_rate": 8.255992713125507e-06, + "loss": 0.5853, + "step": 2362 + }, + { + "epoch": 0.3, + "learning_rate": 8.254451680924819e-06, + "loss": 0.5428, + "step": 2363 + }, + { + "epoch": 0.3, + "learning_rate": 8.252910112146735e-06, + "loss": 0.5318, + "step": 2364 + }, + { + "epoch": 0.3, + "learning_rate": 8.25136800704542e-06, + "loss": 0.5734, + "step": 2365 + }, + { + "epoch": 0.3, + "learning_rate": 8.249825365875128e-06, + "loss": 0.5594, + "step": 2366 + }, + { + "epoch": 0.3, + "learning_rate": 8.248282188890205e-06, + "loss": 0.5668, + "step": 2367 + }, + { + "epoch": 0.3, + "learning_rate": 8.246738476345079e-06, + "loss": 0.6107, + "step": 2368 + }, + { + "epoch": 0.3, + "learning_rate": 8.245194228494272e-06, + "loss": 0.5856, + "step": 2369 + }, + { + "epoch": 0.3, + "learning_rate": 8.24364944559239e-06, + "loss": 0.594, + "step": 2370 + }, + { + "epoch": 0.3, + "learning_rate": 8.242104127894128e-06, + "loss": 0.5918, + "step": 2371 + }, + { + "epoch": 0.3, + "learning_rate": 8.240558275654273e-06, + "loss": 0.5491, + "step": 2372 + }, + { + "epoch": 0.3, + "learning_rate": 8.239011889127695e-06, + "loss": 0.5333, + "step": 2373 + }, + { + "epoch": 0.3, + "learning_rate": 8.237464968569355e-06, + "loss": 0.6268, + "step": 2374 + }, + { + "epoch": 0.3, + "learning_rate": 8.235917514234302e-06, + "loss": 0.5412, + "step": 2375 + }, + { + "epoch": 0.3, + "learning_rate": 8.23436952637767e-06, + "loss": 0.5796, + "step": 2376 + }, + { + "epoch": 0.3, + "learning_rate": 8.232821005254687e-06, + "loss": 0.5664, + "step": 2377 + }, + { + "epoch": 0.3, + "learning_rate": 8.231271951120663e-06, + "loss": 0.5259, + "step": 2378 + }, + { + "epoch": 0.3, + "learning_rate": 8.229722364230996e-06, + "loss": 0.5368, + "step": 2379 + }, + { + "epoch": 0.3, + "learning_rate": 8.22817224484118e-06, + "loss": 0.544, + "step": 2380 + }, + { + "epoch": 0.3, + "learning_rate": 8.226621593206786e-06, + "loss": 0.5215, + "step": 2381 + }, + { + "epoch": 0.3, + "learning_rate": 8.22507040958348e-06, + "loss": 0.5708, + "step": 2382 + }, + { + "epoch": 0.3, + "learning_rate": 8.22351869422701e-06, + "loss": 0.5386, + "step": 2383 + }, + { + "epoch": 0.3, + "learning_rate": 8.221966447393217e-06, + "loss": 0.59, + "step": 2384 + }, + { + "epoch": 0.3, + "learning_rate": 8.220413669338028e-06, + "loss": 0.5894, + "step": 2385 + }, + { + "epoch": 0.3, + "learning_rate": 8.218860360317457e-06, + "loss": 0.5772, + "step": 2386 + }, + { + "epoch": 0.3, + "learning_rate": 8.217306520587605e-06, + "loss": 0.5519, + "step": 2387 + }, + { + "epoch": 0.3, + "learning_rate": 8.215752150404661e-06, + "loss": 0.5703, + "step": 2388 + }, + { + "epoch": 0.3, + "learning_rate": 8.214197250024904e-06, + "loss": 0.6019, + "step": 2389 + }, + { + "epoch": 0.3, + "learning_rate": 8.212641819704693e-06, + "loss": 0.5406, + "step": 2390 + }, + { + "epoch": 0.3, + "learning_rate": 8.211085859700485e-06, + "loss": 0.5408, + "step": 2391 + }, + { + "epoch": 0.3, + "learning_rate": 8.209529370268816e-06, + "loss": 0.5876, + "step": 2392 + }, + { + "epoch": 0.3, + "learning_rate": 8.207972351666313e-06, + "loss": 0.5985, + "step": 2393 + }, + { + "epoch": 0.3, + "learning_rate": 8.206414804149687e-06, + "loss": 0.5768, + "step": 2394 + }, + { + "epoch": 0.3, + "learning_rate": 8.20485672797574e-06, + "loss": 0.5484, + "step": 2395 + }, + { + "epoch": 0.3, + "learning_rate": 8.203298123401361e-06, + "loss": 0.6033, + "step": 2396 + }, + { + "epoch": 0.3, + "learning_rate": 8.201738990683523e-06, + "loss": 0.5908, + "step": 2397 + }, + { + "epoch": 0.3, + "learning_rate": 8.200179330079289e-06, + "loss": 0.5756, + "step": 2398 + }, + { + "epoch": 0.3, + "learning_rate": 8.198619141845808e-06, + "loss": 0.6296, + "step": 2399 + }, + { + "epoch": 0.3, + "learning_rate": 8.197058426240311e-06, + "loss": 0.5619, + "step": 2400 + }, + { + "epoch": 0.3, + "learning_rate": 8.195497183520128e-06, + "loss": 0.6222, + "step": 2401 + }, + { + "epoch": 0.3, + "learning_rate": 8.193935413942667e-06, + "loss": 0.6255, + "step": 2402 + }, + { + "epoch": 0.3, + "learning_rate": 8.192373117765422e-06, + "loss": 0.5333, + "step": 2403 + }, + { + "epoch": 0.3, + "learning_rate": 8.190810295245979e-06, + "loss": 0.5468, + "step": 2404 + }, + { + "epoch": 0.3, + "learning_rate": 8.189246946642005e-06, + "loss": 0.2566, + "step": 2405 + }, + { + "epoch": 0.3, + "learning_rate": 8.18768307221126e-06, + "loss": 0.5907, + "step": 2406 + }, + { + "epoch": 0.3, + "learning_rate": 8.186118672211587e-06, + "loss": 0.6513, + "step": 2407 + }, + { + "epoch": 0.3, + "learning_rate": 8.184553746900914e-06, + "loss": 0.5363, + "step": 2408 + }, + { + "epoch": 0.3, + "learning_rate": 8.182988296537261e-06, + "loss": 0.6219, + "step": 2409 + }, + { + "epoch": 0.3, + "learning_rate": 8.18142232137873e-06, + "loss": 0.562, + "step": 2410 + }, + { + "epoch": 0.3, + "learning_rate": 8.17985582168351e-06, + "loss": 0.5234, + "step": 2411 + }, + { + "epoch": 0.3, + "learning_rate": 8.178288797709881e-06, + "loss": 0.6268, + "step": 2412 + }, + { + "epoch": 0.3, + "learning_rate": 8.176721249716202e-06, + "loss": 0.5635, + "step": 2413 + }, + { + "epoch": 0.3, + "learning_rate": 8.175153177960927e-06, + "loss": 0.5901, + "step": 2414 + }, + { + "epoch": 0.3, + "learning_rate": 8.173584582702584e-06, + "loss": 0.5749, + "step": 2415 + }, + { + "epoch": 0.3, + "learning_rate": 8.172015464199803e-06, + "loss": 0.5843, + "step": 2416 + }, + { + "epoch": 0.3, + "learning_rate": 8.170445822711288e-06, + "loss": 0.5579, + "step": 2417 + }, + { + "epoch": 0.3, + "learning_rate": 8.168875658495835e-06, + "loss": 0.5891, + "step": 2418 + }, + { + "epoch": 0.3, + "learning_rate": 8.167304971812322e-06, + "loss": 0.5917, + "step": 2419 + }, + { + "epoch": 0.3, + "learning_rate": 8.165733762919718e-06, + "loss": 0.5913, + "step": 2420 + }, + { + "epoch": 0.3, + "learning_rate": 8.164162032077077e-06, + "loss": 0.6065, + "step": 2421 + }, + { + "epoch": 0.3, + "learning_rate": 8.162589779543536e-06, + "loss": 0.5224, + "step": 2422 + }, + { + "epoch": 0.3, + "learning_rate": 8.161017005578322e-06, + "loss": 0.5449, + "step": 2423 + }, + { + "epoch": 0.3, + "learning_rate": 8.159443710440742e-06, + "loss": 0.5265, + "step": 2424 + }, + { + "epoch": 0.3, + "learning_rate": 8.157869894390199e-06, + "loss": 0.56, + "step": 2425 + }, + { + "epoch": 0.3, + "learning_rate": 8.15629555768617e-06, + "loss": 0.5699, + "step": 2426 + }, + { + "epoch": 0.3, + "learning_rate": 8.154720700588226e-06, + "loss": 0.5343, + "step": 2427 + }, + { + "epoch": 0.3, + "learning_rate": 8.153145323356022e-06, + "loss": 0.5968, + "step": 2428 + }, + { + "epoch": 0.3, + "learning_rate": 8.151569426249297e-06, + "loss": 0.5926, + "step": 2429 + }, + { + "epoch": 0.3, + "learning_rate": 8.149993009527876e-06, + "loss": 0.6152, + "step": 2430 + }, + { + "epoch": 0.3, + "learning_rate": 8.148416073451675e-06, + "loss": 0.6489, + "step": 2431 + }, + { + "epoch": 0.3, + "learning_rate": 8.146838618280685e-06, + "loss": 0.561, + "step": 2432 + }, + { + "epoch": 0.3, + "learning_rate": 8.145260644274995e-06, + "loss": 0.5627, + "step": 2433 + }, + { + "epoch": 0.31, + "learning_rate": 8.143682151694768e-06, + "loss": 0.5953, + "step": 2434 + }, + { + "epoch": 0.31, + "learning_rate": 8.14210314080026e-06, + "loss": 0.5157, + "step": 2435 + }, + { + "epoch": 0.31, + "learning_rate": 8.140523611851812e-06, + "loss": 0.5705, + "step": 2436 + }, + { + "epoch": 0.31, + "learning_rate": 8.138943565109845e-06, + "loss": 0.5259, + "step": 2437 + }, + { + "epoch": 0.31, + "learning_rate": 8.137363000834871e-06, + "loss": 0.5703, + "step": 2438 + }, + { + "epoch": 0.31, + "learning_rate": 8.135781919287484e-06, + "loss": 0.5799, + "step": 2439 + }, + { + "epoch": 0.31, + "learning_rate": 8.134200320728369e-06, + "loss": 0.5646, + "step": 2440 + }, + { + "epoch": 0.31, + "learning_rate": 8.132618205418288e-06, + "loss": 0.6049, + "step": 2441 + }, + { + "epoch": 0.31, + "learning_rate": 8.131035573618091e-06, + "loss": 0.5693, + "step": 2442 + }, + { + "epoch": 0.31, + "learning_rate": 8.129452425588717e-06, + "loss": 0.5489, + "step": 2443 + }, + { + "epoch": 0.31, + "learning_rate": 8.127868761591189e-06, + "loss": 0.5659, + "step": 2444 + }, + { + "epoch": 0.31, + "learning_rate": 8.126284581886608e-06, + "loss": 0.5665, + "step": 2445 + }, + { + "epoch": 0.31, + "learning_rate": 8.124699886736171e-06, + "loss": 0.6053, + "step": 2446 + }, + { + "epoch": 0.31, + "learning_rate": 8.12311467640115e-06, + "loss": 0.5627, + "step": 2447 + }, + { + "epoch": 0.31, + "learning_rate": 8.12152895114291e-06, + "loss": 0.4959, + "step": 2448 + }, + { + "epoch": 0.31, + "learning_rate": 8.119942711222895e-06, + "loss": 0.5797, + "step": 2449 + }, + { + "epoch": 0.31, + "learning_rate": 8.118355956902636e-06, + "loss": 0.528, + "step": 2450 + }, + { + "epoch": 0.31, + "learning_rate": 8.11676868844375e-06, + "loss": 0.5133, + "step": 2451 + }, + { + "epoch": 0.31, + "learning_rate": 8.115180906107939e-06, + "loss": 0.5964, + "step": 2452 + }, + { + "epoch": 0.31, + "learning_rate": 8.113592610156986e-06, + "loss": 0.6562, + "step": 2453 + }, + { + "epoch": 0.31, + "learning_rate": 8.112003800852761e-06, + "loss": 0.2415, + "step": 2454 + }, + { + "epoch": 0.31, + "learning_rate": 8.110414478457222e-06, + "loss": 0.5532, + "step": 2455 + }, + { + "epoch": 0.31, + "learning_rate": 8.108824643232405e-06, + "loss": 0.5633, + "step": 2456 + }, + { + "epoch": 0.31, + "learning_rate": 8.107234295440436e-06, + "loss": 0.5527, + "step": 2457 + }, + { + "epoch": 0.31, + "learning_rate": 8.105643435343522e-06, + "loss": 0.4604, + "step": 2458 + }, + { + "epoch": 0.31, + "learning_rate": 8.104052063203955e-06, + "loss": 0.576, + "step": 2459 + }, + { + "epoch": 0.31, + "learning_rate": 8.102460179284116e-06, + "loss": 0.5808, + "step": 2460 + }, + { + "epoch": 0.31, + "learning_rate": 8.100867783846465e-06, + "loss": 0.5346, + "step": 2461 + }, + { + "epoch": 0.31, + "learning_rate": 8.099274877153545e-06, + "loss": 0.5198, + "step": 2462 + }, + { + "epoch": 0.31, + "learning_rate": 8.09768145946799e-06, + "loss": 0.5592, + "step": 2463 + }, + { + "epoch": 0.31, + "learning_rate": 8.096087531052515e-06, + "loss": 0.6097, + "step": 2464 + }, + { + "epoch": 0.31, + "learning_rate": 8.094493092169915e-06, + "loss": 0.5789, + "step": 2465 + }, + { + "epoch": 0.31, + "learning_rate": 8.092898143083077e-06, + "loss": 0.5697, + "step": 2466 + }, + { + "epoch": 0.31, + "learning_rate": 8.091302684054964e-06, + "loss": 0.5185, + "step": 2467 + }, + { + "epoch": 0.31, + "learning_rate": 8.089706715348633e-06, + "loss": 0.5262, + "step": 2468 + }, + { + "epoch": 0.31, + "learning_rate": 8.088110237227212e-06, + "loss": 0.5903, + "step": 2469 + }, + { + "epoch": 0.31, + "learning_rate": 8.086513249953926e-06, + "loss": 0.5593, + "step": 2470 + }, + { + "epoch": 0.31, + "learning_rate": 8.084915753792076e-06, + "loss": 0.5545, + "step": 2471 + }, + { + "epoch": 0.31, + "learning_rate": 8.083317749005046e-06, + "loss": 0.5085, + "step": 2472 + }, + { + "epoch": 0.31, + "learning_rate": 8.081719235856314e-06, + "loss": 0.5417, + "step": 2473 + }, + { + "epoch": 0.31, + "learning_rate": 8.080120214609428e-06, + "loss": 0.6096, + "step": 2474 + }, + { + "epoch": 0.31, + "learning_rate": 8.078520685528029e-06, + "loss": 0.5818, + "step": 2475 + }, + { + "epoch": 0.31, + "learning_rate": 8.07692064887584e-06, + "loss": 0.5589, + "step": 2476 + }, + { + "epoch": 0.31, + "learning_rate": 8.075320104916665e-06, + "loss": 0.5359, + "step": 2477 + }, + { + "epoch": 0.31, + "learning_rate": 8.073719053914395e-06, + "loss": 0.605, + "step": 2478 + }, + { + "epoch": 0.31, + "learning_rate": 8.072117496133005e-06, + "loss": 0.6111, + "step": 2479 + }, + { + "epoch": 0.31, + "learning_rate": 8.070515431836546e-06, + "loss": 0.5276, + "step": 2480 + }, + { + "epoch": 0.31, + "learning_rate": 8.068912861289163e-06, + "loss": 0.5486, + "step": 2481 + }, + { + "epoch": 0.31, + "learning_rate": 8.067309784755078e-06, + "loss": 0.654, + "step": 2482 + }, + { + "epoch": 0.31, + "learning_rate": 8.065706202498597e-06, + "loss": 0.5558, + "step": 2483 + }, + { + "epoch": 0.31, + "learning_rate": 8.064102114784116e-06, + "loss": 0.5535, + "step": 2484 + }, + { + "epoch": 0.31, + "learning_rate": 8.0624975218761e-06, + "loss": 0.5941, + "step": 2485 + }, + { + "epoch": 0.31, + "learning_rate": 8.060892424039112e-06, + "loss": 0.6133, + "step": 2486 + }, + { + "epoch": 0.31, + "learning_rate": 8.059286821537794e-06, + "loss": 0.6246, + "step": 2487 + }, + { + "epoch": 0.31, + "learning_rate": 8.057680714636862e-06, + "loss": 0.5425, + "step": 2488 + }, + { + "epoch": 0.31, + "learning_rate": 8.05607410360113e-06, + "loss": 0.5634, + "step": 2489 + }, + { + "epoch": 0.31, + "learning_rate": 8.054466988695487e-06, + "loss": 0.224, + "step": 2490 + }, + { + "epoch": 0.31, + "learning_rate": 8.0528593701849e-06, + "loss": 0.5944, + "step": 2491 + }, + { + "epoch": 0.31, + "learning_rate": 8.05125124833443e-06, + "loss": 0.5971, + "step": 2492 + }, + { + "epoch": 0.31, + "learning_rate": 8.049642623409217e-06, + "loss": 0.5886, + "step": 2493 + }, + { + "epoch": 0.31, + "learning_rate": 8.048033495674479e-06, + "loss": 0.5563, + "step": 2494 + }, + { + "epoch": 0.31, + "learning_rate": 8.046423865395521e-06, + "loss": 0.5237, + "step": 2495 + }, + { + "epoch": 0.31, + "learning_rate": 8.044813732837736e-06, + "loss": 0.5525, + "step": 2496 + }, + { + "epoch": 0.31, + "learning_rate": 8.043203098266587e-06, + "loss": 0.2248, + "step": 2497 + }, + { + "epoch": 0.31, + "learning_rate": 8.041591961947632e-06, + "loss": 0.5286, + "step": 2498 + }, + { + "epoch": 0.31, + "learning_rate": 8.039980324146508e-06, + "loss": 0.5787, + "step": 2499 + }, + { + "epoch": 0.31, + "learning_rate": 8.038368185128929e-06, + "loss": 0.5537, + "step": 2500 + }, + { + "epoch": 0.31, + "learning_rate": 8.036755545160699e-06, + "loss": 0.5219, + "step": 2501 + }, + { + "epoch": 0.31, + "learning_rate": 8.035142404507703e-06, + "loss": 0.5965, + "step": 2502 + }, + { + "epoch": 0.31, + "learning_rate": 8.033528763435905e-06, + "loss": 0.5752, + "step": 2503 + }, + { + "epoch": 0.31, + "learning_rate": 8.031914622211353e-06, + "loss": 0.5497, + "step": 2504 + }, + { + "epoch": 0.31, + "learning_rate": 8.030299981100183e-06, + "loss": 0.5751, + "step": 2505 + }, + { + "epoch": 0.31, + "learning_rate": 8.028684840368605e-06, + "loss": 0.6162, + "step": 2506 + }, + { + "epoch": 0.31, + "learning_rate": 8.027069200282916e-06, + "loss": 0.6163, + "step": 2507 + }, + { + "epoch": 0.31, + "learning_rate": 8.025453061109495e-06, + "loss": 0.5993, + "step": 2508 + }, + { + "epoch": 0.31, + "learning_rate": 8.023836423114802e-06, + "loss": 0.6128, + "step": 2509 + }, + { + "epoch": 0.31, + "learning_rate": 8.02221928656538e-06, + "loss": 0.5832, + "step": 2510 + }, + { + "epoch": 0.31, + "learning_rate": 8.020601651727855e-06, + "loss": 0.5722, + "step": 2511 + }, + { + "epoch": 0.31, + "learning_rate": 8.018983518868935e-06, + "loss": 0.5912, + "step": 2512 + }, + { + "epoch": 0.32, + "learning_rate": 8.017364888255409e-06, + "loss": 0.6002, + "step": 2513 + }, + { + "epoch": 0.32, + "learning_rate": 8.015745760154145e-06, + "loss": 0.5543, + "step": 2514 + }, + { + "epoch": 0.32, + "learning_rate": 8.014126134832103e-06, + "loss": 0.5544, + "step": 2515 + }, + { + "epoch": 0.32, + "learning_rate": 8.012506012556314e-06, + "loss": 0.5982, + "step": 2516 + }, + { + "epoch": 0.32, + "learning_rate": 8.010885393593897e-06, + "loss": 0.5047, + "step": 2517 + }, + { + "epoch": 0.32, + "learning_rate": 8.00926427821205e-06, + "loss": 0.6154, + "step": 2518 + }, + { + "epoch": 0.32, + "learning_rate": 8.007642666678058e-06, + "loss": 0.5805, + "step": 2519 + }, + { + "epoch": 0.32, + "learning_rate": 8.006020559259279e-06, + "loss": 0.4928, + "step": 2520 + }, + { + "epoch": 0.32, + "learning_rate": 8.004397956223162e-06, + "loss": 0.5309, + "step": 2521 + }, + { + "epoch": 0.32, + "learning_rate": 8.002774857837232e-06, + "loss": 0.5808, + "step": 2522 + }, + { + "epoch": 0.32, + "learning_rate": 8.001151264369095e-06, + "loss": 0.5762, + "step": 2523 + }, + { + "epoch": 0.32, + "learning_rate": 7.999527176086443e-06, + "loss": 0.5081, + "step": 2524 + }, + { + "epoch": 0.32, + "learning_rate": 7.997902593257049e-06, + "loss": 0.5984, + "step": 2525 + }, + { + "epoch": 0.32, + "learning_rate": 7.996277516148763e-06, + "loss": 0.5794, + "step": 2526 + }, + { + "epoch": 0.32, + "learning_rate": 7.99465194502952e-06, + "loss": 0.5809, + "step": 2527 + }, + { + "epoch": 0.32, + "learning_rate": 7.993025880167336e-06, + "loss": 0.5133, + "step": 2528 + }, + { + "epoch": 0.32, + "learning_rate": 7.99139932183031e-06, + "loss": 0.5274, + "step": 2529 + }, + { + "epoch": 0.32, + "learning_rate": 7.98977227028662e-06, + "loss": 0.5476, + "step": 2530 + }, + { + "epoch": 0.32, + "learning_rate": 7.988144725804522e-06, + "loss": 0.5502, + "step": 2531 + }, + { + "epoch": 0.32, + "learning_rate": 7.986516688652365e-06, + "loss": 0.5761, + "step": 2532 + }, + { + "epoch": 0.32, + "learning_rate": 7.984888159098564e-06, + "loss": 0.5777, + "step": 2533 + }, + { + "epoch": 0.32, + "learning_rate": 7.983259137411626e-06, + "loss": 0.5754, + "step": 2534 + }, + { + "epoch": 0.32, + "learning_rate": 7.981629623860136e-06, + "loss": 0.5954, + "step": 2535 + }, + { + "epoch": 0.32, + "learning_rate": 7.979999618712759e-06, + "loss": 0.5228, + "step": 2536 + }, + { + "epoch": 0.32, + "learning_rate": 7.978369122238242e-06, + "loss": 0.6983, + "step": 2537 + }, + { + "epoch": 0.32, + "learning_rate": 7.976738134705413e-06, + "loss": 0.5701, + "step": 2538 + }, + { + "epoch": 0.32, + "learning_rate": 7.975106656383182e-06, + "loss": 0.532, + "step": 2539 + }, + { + "epoch": 0.32, + "learning_rate": 7.973474687540538e-06, + "loss": 0.2603, + "step": 2540 + }, + { + "epoch": 0.32, + "learning_rate": 7.971842228446551e-06, + "loss": 0.2235, + "step": 2541 + }, + { + "epoch": 0.32, + "learning_rate": 7.970209279370372e-06, + "loss": 0.5963, + "step": 2542 + }, + { + "epoch": 0.32, + "learning_rate": 7.968575840581236e-06, + "loss": 0.6097, + "step": 2543 + }, + { + "epoch": 0.32, + "learning_rate": 7.966941912348454e-06, + "loss": 0.6059, + "step": 2544 + }, + { + "epoch": 0.32, + "learning_rate": 7.965307494941421e-06, + "loss": 0.5603, + "step": 2545 + }, + { + "epoch": 0.32, + "learning_rate": 7.96367258862961e-06, + "loss": 0.6231, + "step": 2546 + }, + { + "epoch": 0.32, + "learning_rate": 7.962037193682576e-06, + "loss": 0.5616, + "step": 2547 + }, + { + "epoch": 0.32, + "learning_rate": 7.960401310369957e-06, + "loss": 0.2254, + "step": 2548 + }, + { + "epoch": 0.32, + "learning_rate": 7.958764938961466e-06, + "loss": 0.6352, + "step": 2549 + }, + { + "epoch": 0.32, + "learning_rate": 7.957128079726902e-06, + "loss": 0.6208, + "step": 2550 + }, + { + "epoch": 0.32, + "learning_rate": 7.95549073293614e-06, + "loss": 0.5673, + "step": 2551 + }, + { + "epoch": 0.32, + "learning_rate": 7.953852898859138e-06, + "loss": 0.5679, + "step": 2552 + }, + { + "epoch": 0.32, + "learning_rate": 7.952214577765933e-06, + "loss": 0.5191, + "step": 2553 + }, + { + "epoch": 0.32, + "learning_rate": 7.950575769926647e-06, + "loss": 0.589, + "step": 2554 + }, + { + "epoch": 0.32, + "learning_rate": 7.948936475611473e-06, + "loss": 0.5735, + "step": 2555 + }, + { + "epoch": 0.32, + "learning_rate": 7.947296695090693e-06, + "loss": 0.5388, + "step": 2556 + }, + { + "epoch": 0.32, + "learning_rate": 7.945656428634666e-06, + "loss": 0.6195, + "step": 2557 + }, + { + "epoch": 0.32, + "learning_rate": 7.944015676513827e-06, + "loss": 0.5704, + "step": 2558 + }, + { + "epoch": 0.32, + "learning_rate": 7.942374438998699e-06, + "loss": 0.5766, + "step": 2559 + }, + { + "epoch": 0.32, + "learning_rate": 7.940732716359876e-06, + "loss": 0.6312, + "step": 2560 + }, + { + "epoch": 0.32, + "learning_rate": 7.939090508868041e-06, + "loss": 0.564, + "step": 2561 + }, + { + "epoch": 0.32, + "learning_rate": 7.937447816793952e-06, + "loss": 0.5484, + "step": 2562 + }, + { + "epoch": 0.32, + "learning_rate": 7.935804640408449e-06, + "loss": 0.5602, + "step": 2563 + }, + { + "epoch": 0.32, + "learning_rate": 7.934160979982447e-06, + "loss": 0.6039, + "step": 2564 + }, + { + "epoch": 0.32, + "learning_rate": 7.932516835786948e-06, + "loss": 0.5858, + "step": 2565 + }, + { + "epoch": 0.32, + "learning_rate": 7.930872208093026e-06, + "loss": 0.5482, + "step": 2566 + }, + { + "epoch": 0.32, + "learning_rate": 7.929227097171842e-06, + "loss": 0.5672, + "step": 2567 + }, + { + "epoch": 0.32, + "learning_rate": 7.927581503294632e-06, + "loss": 0.5395, + "step": 2568 + }, + { + "epoch": 0.32, + "learning_rate": 7.925935426732714e-06, + "loss": 0.5932, + "step": 2569 + }, + { + "epoch": 0.32, + "learning_rate": 7.924288867757487e-06, + "loss": 0.5611, + "step": 2570 + }, + { + "epoch": 0.32, + "learning_rate": 7.922641826640422e-06, + "loss": 0.5777, + "step": 2571 + }, + { + "epoch": 0.32, + "learning_rate": 7.920994303653078e-06, + "loss": 0.5253, + "step": 2572 + }, + { + "epoch": 0.32, + "learning_rate": 7.91934629906709e-06, + "loss": 0.5441, + "step": 2573 + }, + { + "epoch": 0.32, + "learning_rate": 7.917697813154173e-06, + "loss": 0.5715, + "step": 2574 + }, + { + "epoch": 0.32, + "learning_rate": 7.916048846186119e-06, + "loss": 0.6007, + "step": 2575 + }, + { + "epoch": 0.32, + "learning_rate": 7.914399398434804e-06, + "loss": 0.6144, + "step": 2576 + }, + { + "epoch": 0.32, + "learning_rate": 7.912749470172177e-06, + "loss": 0.5032, + "step": 2577 + }, + { + "epoch": 0.32, + "learning_rate": 7.911099061670274e-06, + "loss": 0.507, + "step": 2578 + }, + { + "epoch": 0.32, + "learning_rate": 7.909448173201201e-06, + "loss": 0.5033, + "step": 2579 + }, + { + "epoch": 0.32, + "learning_rate": 7.907796805037152e-06, + "loss": 0.2415, + "step": 2580 + }, + { + "epoch": 0.32, + "learning_rate": 7.906144957450394e-06, + "loss": 0.5249, + "step": 2581 + }, + { + "epoch": 0.32, + "learning_rate": 7.904492630713279e-06, + "loss": 0.51, + "step": 2582 + }, + { + "epoch": 0.32, + "learning_rate": 7.902839825098228e-06, + "loss": 0.5354, + "step": 2583 + }, + { + "epoch": 0.32, + "learning_rate": 7.901186540877752e-06, + "loss": 0.234, + "step": 2584 + }, + { + "epoch": 0.32, + "learning_rate": 7.899532778324435e-06, + "loss": 0.4917, + "step": 2585 + }, + { + "epoch": 0.32, + "learning_rate": 7.89787853771094e-06, + "loss": 0.5593, + "step": 2586 + }, + { + "epoch": 0.32, + "learning_rate": 7.89622381931001e-06, + "loss": 0.5806, + "step": 2587 + }, + { + "epoch": 0.32, + "learning_rate": 7.894568623394467e-06, + "loss": 0.5615, + "step": 2588 + }, + { + "epoch": 0.32, + "learning_rate": 7.892912950237213e-06, + "loss": 0.5515, + "step": 2589 + }, + { + "epoch": 0.32, + "learning_rate": 7.891256800111222e-06, + "loss": 0.5905, + "step": 2590 + }, + { + "epoch": 0.32, + "learning_rate": 7.889600173289556e-06, + "loss": 0.6038, + "step": 2591 + }, + { + "epoch": 0.32, + "learning_rate": 7.88794307004535e-06, + "loss": 0.578, + "step": 2592 + }, + { + "epoch": 0.33, + "learning_rate": 7.886285490651817e-06, + "loss": 0.5655, + "step": 2593 + }, + { + "epoch": 0.33, + "learning_rate": 7.884627435382254e-06, + "loss": 0.6123, + "step": 2594 + }, + { + "epoch": 0.33, + "learning_rate": 7.88296890451003e-06, + "loss": 0.6206, + "step": 2595 + }, + { + "epoch": 0.33, + "learning_rate": 7.881309898308593e-06, + "loss": 0.5771, + "step": 2596 + }, + { + "epoch": 0.33, + "learning_rate": 7.879650417051476e-06, + "loss": 0.5933, + "step": 2597 + }, + { + "epoch": 0.33, + "learning_rate": 7.877990461012283e-06, + "loss": 0.5975, + "step": 2598 + }, + { + "epoch": 0.33, + "learning_rate": 7.876330030464704e-06, + "loss": 0.2191, + "step": 2599 + }, + { + "epoch": 0.33, + "learning_rate": 7.874669125682492e-06, + "loss": 0.5629, + "step": 2600 + }, + { + "epoch": 0.33, + "learning_rate": 7.873007746939499e-06, + "loss": 0.563, + "step": 2601 + }, + { + "epoch": 0.33, + "learning_rate": 7.871345894509639e-06, + "loss": 0.5746, + "step": 2602 + }, + { + "epoch": 0.33, + "learning_rate": 7.86968356866691e-06, + "loss": 0.5316, + "step": 2603 + }, + { + "epoch": 0.33, + "learning_rate": 7.868020769685391e-06, + "loss": 0.647, + "step": 2604 + }, + { + "epoch": 0.33, + "learning_rate": 7.866357497839233e-06, + "loss": 0.5831, + "step": 2605 + }, + { + "epoch": 0.33, + "learning_rate": 7.864693753402668e-06, + "loss": 0.5595, + "step": 2606 + }, + { + "epoch": 0.33, + "learning_rate": 7.863029536650008e-06, + "loss": 0.6073, + "step": 2607 + }, + { + "epoch": 0.33, + "learning_rate": 7.861364847855637e-06, + "loss": 0.5401, + "step": 2608 + }, + { + "epoch": 0.33, + "learning_rate": 7.859699687294022e-06, + "loss": 0.5395, + "step": 2609 + }, + { + "epoch": 0.33, + "learning_rate": 7.858034055239709e-06, + "loss": 0.5914, + "step": 2610 + }, + { + "epoch": 0.33, + "learning_rate": 7.856367951967313e-06, + "loss": 0.5458, + "step": 2611 + }, + { + "epoch": 0.33, + "learning_rate": 7.854701377751538e-06, + "loss": 0.6005, + "step": 2612 + }, + { + "epoch": 0.33, + "learning_rate": 7.853034332867157e-06, + "loss": 0.5611, + "step": 2613 + }, + { + "epoch": 0.33, + "learning_rate": 7.851366817589026e-06, + "loss": 0.5403, + "step": 2614 + }, + { + "epoch": 0.33, + "learning_rate": 7.849698832192076e-06, + "loss": 0.6277, + "step": 2615 + }, + { + "epoch": 0.33, + "learning_rate": 7.848030376951314e-06, + "loss": 0.2205, + "step": 2616 + }, + { + "epoch": 0.33, + "learning_rate": 7.846361452141828e-06, + "loss": 0.5579, + "step": 2617 + }, + { + "epoch": 0.33, + "learning_rate": 7.844692058038785e-06, + "loss": 0.5712, + "step": 2618 + }, + { + "epoch": 0.33, + "learning_rate": 7.843022194917419e-06, + "loss": 0.5775, + "step": 2619 + }, + { + "epoch": 0.33, + "learning_rate": 7.841351863053053e-06, + "loss": 0.5537, + "step": 2620 + }, + { + "epoch": 0.33, + "learning_rate": 7.839681062721082e-06, + "loss": 0.4929, + "step": 2621 + }, + { + "epoch": 0.33, + "learning_rate": 7.83800979419698e-06, + "loss": 0.513, + "step": 2622 + }, + { + "epoch": 0.33, + "learning_rate": 7.836338057756297e-06, + "loss": 0.5214, + "step": 2623 + }, + { + "epoch": 0.33, + "learning_rate": 7.834665853674659e-06, + "loss": 0.6042, + "step": 2624 + }, + { + "epoch": 0.33, + "learning_rate": 7.83299318222777e-06, + "loss": 0.5658, + "step": 2625 + }, + { + "epoch": 0.33, + "learning_rate": 7.831320043691413e-06, + "loss": 0.5988, + "step": 2626 + }, + { + "epoch": 0.33, + "learning_rate": 7.829646438341447e-06, + "loss": 0.5662, + "step": 2627 + }, + { + "epoch": 0.33, + "learning_rate": 7.827972366453806e-06, + "loss": 0.5707, + "step": 2628 + }, + { + "epoch": 0.33, + "learning_rate": 7.826297828304505e-06, + "loss": 0.5764, + "step": 2629 + }, + { + "epoch": 0.33, + "learning_rate": 7.82462282416963e-06, + "loss": 0.5454, + "step": 2630 + }, + { + "epoch": 0.33, + "learning_rate": 7.82294735432535e-06, + "loss": 0.5199, + "step": 2631 + }, + { + "epoch": 0.33, + "learning_rate": 7.821271419047907e-06, + "loss": 0.5468, + "step": 2632 + }, + { + "epoch": 0.33, + "learning_rate": 7.819595018613619e-06, + "loss": 0.5482, + "step": 2633 + }, + { + "epoch": 0.33, + "learning_rate": 7.817918153298885e-06, + "loss": 0.5441, + "step": 2634 + }, + { + "epoch": 0.33, + "learning_rate": 7.816240823380175e-06, + "loss": 0.5177, + "step": 2635 + }, + { + "epoch": 0.33, + "learning_rate": 7.81456302913404e-06, + "loss": 0.5267, + "step": 2636 + }, + { + "epoch": 0.33, + "learning_rate": 7.812884770837108e-06, + "loss": 0.6136, + "step": 2637 + }, + { + "epoch": 0.33, + "learning_rate": 7.811206048766081e-06, + "loss": 0.6353, + "step": 2638 + }, + { + "epoch": 0.33, + "learning_rate": 7.809526863197734e-06, + "loss": 0.5469, + "step": 2639 + }, + { + "epoch": 0.33, + "learning_rate": 7.807847214408928e-06, + "loss": 0.5459, + "step": 2640 + }, + { + "epoch": 0.33, + "learning_rate": 7.806167102676591e-06, + "loss": 0.576, + "step": 2641 + }, + { + "epoch": 0.33, + "learning_rate": 7.804486528277737e-06, + "loss": 0.5513, + "step": 2642 + }, + { + "epoch": 0.33, + "learning_rate": 7.802805491489443e-06, + "loss": 0.5447, + "step": 2643 + }, + { + "epoch": 0.33, + "learning_rate": 7.801123992588873e-06, + "loss": 0.5396, + "step": 2644 + }, + { + "epoch": 0.33, + "learning_rate": 7.799442031853265e-06, + "loss": 0.5691, + "step": 2645 + }, + { + "epoch": 0.33, + "learning_rate": 7.797759609559932e-06, + "loss": 0.6398, + "step": 2646 + }, + { + "epoch": 0.33, + "learning_rate": 7.796076725986261e-06, + "loss": 0.5065, + "step": 2647 + }, + { + "epoch": 0.33, + "learning_rate": 7.794393381409722e-06, + "loss": 0.6103, + "step": 2648 + }, + { + "epoch": 0.33, + "learning_rate": 7.79270957610785e-06, + "loss": 0.5533, + "step": 2649 + }, + { + "epoch": 0.33, + "learning_rate": 7.791025310358266e-06, + "loss": 0.5698, + "step": 2650 + }, + { + "epoch": 0.33, + "learning_rate": 7.789340584438664e-06, + "loss": 0.5686, + "step": 2651 + }, + { + "epoch": 0.33, + "learning_rate": 7.78765539862681e-06, + "loss": 0.6068, + "step": 2652 + }, + { + "epoch": 0.33, + "learning_rate": 7.785969753200552e-06, + "loss": 0.5727, + "step": 2653 + }, + { + "epoch": 0.33, + "learning_rate": 7.784283648437807e-06, + "loss": 0.5459, + "step": 2654 + }, + { + "epoch": 0.33, + "learning_rate": 7.782597084616573e-06, + "loss": 0.5866, + "step": 2655 + }, + { + "epoch": 0.33, + "learning_rate": 7.780910062014924e-06, + "loss": 0.5661, + "step": 2656 + }, + { + "epoch": 0.33, + "learning_rate": 7.779222580911004e-06, + "loss": 0.5184, + "step": 2657 + }, + { + "epoch": 0.33, + "learning_rate": 7.777534641583042e-06, + "loss": 0.6193, + "step": 2658 + }, + { + "epoch": 0.33, + "learning_rate": 7.775846244309329e-06, + "loss": 0.6126, + "step": 2659 + }, + { + "epoch": 0.33, + "learning_rate": 7.774157389368244e-06, + "loss": 0.6067, + "step": 2660 + }, + { + "epoch": 0.33, + "learning_rate": 7.772468077038239e-06, + "loss": 0.6155, + "step": 2661 + }, + { + "epoch": 0.33, + "learning_rate": 7.770778307597835e-06, + "loss": 0.5468, + "step": 2662 + }, + { + "epoch": 0.33, + "learning_rate": 7.769088081325633e-06, + "loss": 0.6287, + "step": 2663 + }, + { + "epoch": 0.33, + "learning_rate": 7.767397398500312e-06, + "loss": 0.2188, + "step": 2664 + }, + { + "epoch": 0.33, + "learning_rate": 7.765706259400619e-06, + "loss": 0.5469, + "step": 2665 + }, + { + "epoch": 0.33, + "learning_rate": 7.764014664305385e-06, + "loss": 0.5253, + "step": 2666 + }, + { + "epoch": 0.33, + "learning_rate": 7.762322613493504e-06, + "loss": 0.542, + "step": 2667 + }, + { + "epoch": 0.33, + "learning_rate": 7.760630107243963e-06, + "loss": 0.5822, + "step": 2668 + }, + { + "epoch": 0.33, + "learning_rate": 7.758937145835806e-06, + "loss": 0.6177, + "step": 2669 + }, + { + "epoch": 0.33, + "learning_rate": 7.757243729548163e-06, + "loss": 0.505, + "step": 2670 + }, + { + "epoch": 0.33, + "learning_rate": 7.755549858660236e-06, + "loss": 0.2599, + "step": 2671 + }, + { + "epoch": 0.33, + "learning_rate": 7.7538555334513e-06, + "loss": 0.5505, + "step": 2672 + }, + { + "epoch": 0.34, + "learning_rate": 7.752160754200708e-06, + "loss": 0.5365, + "step": 2673 + }, + { + "epoch": 0.34, + "learning_rate": 7.750465521187886e-06, + "loss": 0.5009, + "step": 2674 + }, + { + "epoch": 0.34, + "learning_rate": 7.748769834692338e-06, + "loss": 0.6295, + "step": 2675 + }, + { + "epoch": 0.34, + "learning_rate": 7.747073694993634e-06, + "loss": 0.5599, + "step": 2676 + }, + { + "epoch": 0.34, + "learning_rate": 7.74537710237143e-06, + "loss": 0.5422, + "step": 2677 + }, + { + "epoch": 0.34, + "learning_rate": 7.743680057105452e-06, + "loss": 0.5408, + "step": 2678 + }, + { + "epoch": 0.34, + "learning_rate": 7.741982559475496e-06, + "loss": 0.514, + "step": 2679 + }, + { + "epoch": 0.34, + "learning_rate": 7.740284609761442e-06, + "loss": 0.5689, + "step": 2680 + }, + { + "epoch": 0.34, + "learning_rate": 7.738586208243232e-06, + "loss": 0.5599, + "step": 2681 + }, + { + "epoch": 0.34, + "learning_rate": 7.736887355200898e-06, + "loss": 0.5937, + "step": 2682 + }, + { + "epoch": 0.34, + "learning_rate": 7.735188050914533e-06, + "loss": 0.5588, + "step": 2683 + }, + { + "epoch": 0.34, + "learning_rate": 7.733488295664311e-06, + "loss": 0.5473, + "step": 2684 + }, + { + "epoch": 0.34, + "learning_rate": 7.73178808973048e-06, + "loss": 0.5231, + "step": 2685 + }, + { + "epoch": 0.34, + "learning_rate": 7.730087433393359e-06, + "loss": 0.5412, + "step": 2686 + }, + { + "epoch": 0.34, + "learning_rate": 7.728386326933344e-06, + "loss": 0.5937, + "step": 2687 + }, + { + "epoch": 0.34, + "learning_rate": 7.726684770630907e-06, + "loss": 0.5592, + "step": 2688 + }, + { + "epoch": 0.34, + "learning_rate": 7.72498276476659e-06, + "loss": 0.5263, + "step": 2689 + }, + { + "epoch": 0.34, + "learning_rate": 7.723280309621015e-06, + "loss": 0.5289, + "step": 2690 + }, + { + "epoch": 0.34, + "learning_rate": 7.721577405474868e-06, + "loss": 0.5358, + "step": 2691 + }, + { + "epoch": 0.34, + "learning_rate": 7.719874052608916e-06, + "loss": 0.547, + "step": 2692 + }, + { + "epoch": 0.34, + "learning_rate": 7.718170251304002e-06, + "loss": 0.6013, + "step": 2693 + }, + { + "epoch": 0.34, + "learning_rate": 7.716466001841041e-06, + "loss": 0.612, + "step": 2694 + }, + { + "epoch": 0.34, + "learning_rate": 7.714761304501017e-06, + "loss": 0.5313, + "step": 2695 + }, + { + "epoch": 0.34, + "learning_rate": 7.713056159564994e-06, + "loss": 0.5242, + "step": 2696 + }, + { + "epoch": 0.34, + "learning_rate": 7.711350567314108e-06, + "loss": 0.5558, + "step": 2697 + }, + { + "epoch": 0.34, + "learning_rate": 7.709644528029569e-06, + "loss": 0.5065, + "step": 2698 + }, + { + "epoch": 0.34, + "learning_rate": 7.707938041992656e-06, + "loss": 0.5476, + "step": 2699 + }, + { + "epoch": 0.34, + "learning_rate": 7.70623110948473e-06, + "loss": 0.5605, + "step": 2700 + }, + { + "epoch": 0.34, + "learning_rate": 7.704523730787221e-06, + "loss": 0.6001, + "step": 2701 + }, + { + "epoch": 0.34, + "learning_rate": 7.70281590618163e-06, + "loss": 0.5361, + "step": 2702 + }, + { + "epoch": 0.34, + "learning_rate": 7.701107635949535e-06, + "loss": 0.5146, + "step": 2703 + }, + { + "epoch": 0.34, + "learning_rate": 7.699398920372591e-06, + "loss": 0.5882, + "step": 2704 + }, + { + "epoch": 0.34, + "learning_rate": 7.69768975973252e-06, + "loss": 0.5305, + "step": 2705 + }, + { + "epoch": 0.34, + "learning_rate": 7.695980154311116e-06, + "loss": 0.5932, + "step": 2706 + }, + { + "epoch": 0.34, + "learning_rate": 7.694270104390256e-06, + "loss": 0.5188, + "step": 2707 + }, + { + "epoch": 0.34, + "learning_rate": 7.69255961025188e-06, + "loss": 0.6259, + "step": 2708 + }, + { + "epoch": 0.34, + "learning_rate": 7.690848672178007e-06, + "loss": 0.5444, + "step": 2709 + }, + { + "epoch": 0.34, + "learning_rate": 7.689137290450725e-06, + "loss": 0.5871, + "step": 2710 + }, + { + "epoch": 0.34, + "learning_rate": 7.687425465352204e-06, + "loss": 0.5286, + "step": 2711 + }, + { + "epoch": 0.34, + "learning_rate": 7.685713197164676e-06, + "loss": 0.5633, + "step": 2712 + }, + { + "epoch": 0.34, + "learning_rate": 7.684000486170453e-06, + "loss": 0.5561, + "step": 2713 + }, + { + "epoch": 0.34, + "learning_rate": 7.68228733265192e-06, + "loss": 0.2216, + "step": 2714 + }, + { + "epoch": 0.34, + "learning_rate": 7.680573736891529e-06, + "loss": 0.5676, + "step": 2715 + }, + { + "epoch": 0.34, + "learning_rate": 7.67885969917181e-06, + "loss": 0.5051, + "step": 2716 + }, + { + "epoch": 0.34, + "learning_rate": 7.677145219775364e-06, + "loss": 0.5268, + "step": 2717 + }, + { + "epoch": 0.34, + "learning_rate": 7.67543029898487e-06, + "loss": 0.5607, + "step": 2718 + }, + { + "epoch": 0.34, + "learning_rate": 7.67371493708307e-06, + "loss": 0.5669, + "step": 2719 + }, + { + "epoch": 0.34, + "learning_rate": 7.671999134352788e-06, + "loss": 0.5756, + "step": 2720 + }, + { + "epoch": 0.34, + "learning_rate": 7.670282891076915e-06, + "loss": 0.5668, + "step": 2721 + }, + { + "epoch": 0.34, + "learning_rate": 7.668566207538417e-06, + "loss": 0.5951, + "step": 2722 + }, + { + "epoch": 0.34, + "learning_rate": 7.666849084020332e-06, + "loss": 0.6103, + "step": 2723 + }, + { + "epoch": 0.34, + "learning_rate": 7.66513152080577e-06, + "loss": 0.5754, + "step": 2724 + }, + { + "epoch": 0.34, + "learning_rate": 7.663413518177915e-06, + "loss": 0.4823, + "step": 2725 + }, + { + "epoch": 0.34, + "learning_rate": 7.66169507642002e-06, + "loss": 0.5859, + "step": 2726 + }, + { + "epoch": 0.34, + "learning_rate": 7.659976195815417e-06, + "loss": 0.5533, + "step": 2727 + }, + { + "epoch": 0.34, + "learning_rate": 7.658256876647503e-06, + "loss": 0.5585, + "step": 2728 + }, + { + "epoch": 0.34, + "learning_rate": 7.656537119199751e-06, + "loss": 0.6102, + "step": 2729 + }, + { + "epoch": 0.34, + "learning_rate": 7.65481692375571e-06, + "loss": 0.5035, + "step": 2730 + }, + { + "epoch": 0.34, + "learning_rate": 7.653096290598992e-06, + "loss": 0.6101, + "step": 2731 + }, + { + "epoch": 0.34, + "learning_rate": 7.651375220013287e-06, + "loss": 0.5337, + "step": 2732 + }, + { + "epoch": 0.34, + "learning_rate": 7.649653712282355e-06, + "loss": 0.6181, + "step": 2733 + }, + { + "epoch": 0.34, + "learning_rate": 7.647931767690036e-06, + "loss": 0.5547, + "step": 2734 + }, + { + "epoch": 0.34, + "learning_rate": 7.646209386520229e-06, + "loss": 0.2167, + "step": 2735 + }, + { + "epoch": 0.34, + "learning_rate": 7.644486569056913e-06, + "loss": 0.6053, + "step": 2736 + }, + { + "epoch": 0.34, + "learning_rate": 7.642763315584137e-06, + "loss": 0.5654, + "step": 2737 + }, + { + "epoch": 0.34, + "learning_rate": 7.641039626386025e-06, + "loss": 0.5677, + "step": 2738 + }, + { + "epoch": 0.34, + "learning_rate": 7.639315501746767e-06, + "loss": 0.5795, + "step": 2739 + }, + { + "epoch": 0.34, + "learning_rate": 7.637590941950628e-06, + "loss": 0.509, + "step": 2740 + }, + { + "epoch": 0.34, + "learning_rate": 7.635865947281948e-06, + "loss": 0.5879, + "step": 2741 + }, + { + "epoch": 0.34, + "learning_rate": 7.634140518025133e-06, + "loss": 0.5662, + "step": 2742 + }, + { + "epoch": 0.34, + "learning_rate": 7.632414654464662e-06, + "loss": 0.6278, + "step": 2743 + }, + { + "epoch": 0.34, + "learning_rate": 7.630688356885088e-06, + "loss": 0.5031, + "step": 2744 + }, + { + "epoch": 0.34, + "learning_rate": 7.628961625571032e-06, + "loss": 0.5712, + "step": 2745 + }, + { + "epoch": 0.34, + "learning_rate": 7.6272344608071916e-06, + "loss": 0.5578, + "step": 2746 + }, + { + "epoch": 0.34, + "learning_rate": 7.6255068628783314e-06, + "loss": 0.5623, + "step": 2747 + }, + { + "epoch": 0.34, + "learning_rate": 7.62377883206929e-06, + "loss": 0.5305, + "step": 2748 + }, + { + "epoch": 0.34, + "learning_rate": 7.622050368664975e-06, + "loss": 0.5978, + "step": 2749 + }, + { + "epoch": 0.34, + "learning_rate": 7.620321472950367e-06, + "loss": 0.6061, + "step": 2750 + }, + { + "epoch": 0.34, + "learning_rate": 7.618592145210517e-06, + "loss": 0.606, + "step": 2751 + }, + { + "epoch": 0.34, + "learning_rate": 7.61686238573055e-06, + "loss": 0.6301, + "step": 2752 + }, + { + "epoch": 0.35, + "learning_rate": 7.615132194795656e-06, + "loss": 0.2178, + "step": 2753 + }, + { + "epoch": 0.35, + "learning_rate": 7.6134015726911035e-06, + "loss": 0.597, + "step": 2754 + }, + { + "epoch": 0.35, + "learning_rate": 7.611670519702228e-06, + "loss": 0.6077, + "step": 2755 + }, + { + "epoch": 0.35, + "learning_rate": 7.609939036114435e-06, + "loss": 0.6016, + "step": 2756 + }, + { + "epoch": 0.35, + "learning_rate": 7.608207122213206e-06, + "loss": 0.6745, + "step": 2757 + }, + { + "epoch": 0.35, + "learning_rate": 7.606474778284085e-06, + "loss": 0.6, + "step": 2758 + }, + { + "epoch": 0.35, + "learning_rate": 7.604742004612698e-06, + "loss": 0.6055, + "step": 2759 + }, + { + "epoch": 0.35, + "learning_rate": 7.6030088014847315e-06, + "loss": 0.627, + "step": 2760 + }, + { + "epoch": 0.35, + "learning_rate": 7.601275169185949e-06, + "loss": 0.5732, + "step": 2761 + }, + { + "epoch": 0.35, + "learning_rate": 7.5995411080021806e-06, + "loss": 0.5725, + "step": 2762 + }, + { + "epoch": 0.35, + "learning_rate": 7.5978066182193345e-06, + "loss": 0.5433, + "step": 2763 + }, + { + "epoch": 0.35, + "learning_rate": 7.59607170012338e-06, + "loss": 0.5741, + "step": 2764 + }, + { + "epoch": 0.35, + "learning_rate": 7.594336354000362e-06, + "loss": 0.5845, + "step": 2765 + }, + { + "epoch": 0.35, + "learning_rate": 7.592600580136398e-06, + "loss": 0.5327, + "step": 2766 + }, + { + "epoch": 0.35, + "learning_rate": 7.59086437881767e-06, + "loss": 0.5598, + "step": 2767 + }, + { + "epoch": 0.35, + "learning_rate": 7.5891277503304385e-06, + "loss": 0.5519, + "step": 2768 + }, + { + "epoch": 0.35, + "learning_rate": 7.587390694961027e-06, + "loss": 0.5588, + "step": 2769 + }, + { + "epoch": 0.35, + "learning_rate": 7.585653212995831e-06, + "loss": 0.5764, + "step": 2770 + }, + { + "epoch": 0.35, + "learning_rate": 7.583915304721323e-06, + "loss": 0.4933, + "step": 2771 + }, + { + "epoch": 0.35, + "learning_rate": 7.582176970424034e-06, + "loss": 0.5495, + "step": 2772 + }, + { + "epoch": 0.35, + "learning_rate": 7.580438210390575e-06, + "loss": 0.5267, + "step": 2773 + }, + { + "epoch": 0.35, + "learning_rate": 7.578699024907625e-06, + "loss": 0.6276, + "step": 2774 + }, + { + "epoch": 0.35, + "learning_rate": 7.57695941426193e-06, + "loss": 0.6541, + "step": 2775 + }, + { + "epoch": 0.35, + "learning_rate": 7.575219378740308e-06, + "loss": 0.5288, + "step": 2776 + }, + { + "epoch": 0.35, + "learning_rate": 7.573478918629648e-06, + "loss": 0.5621, + "step": 2777 + }, + { + "epoch": 0.35, + "learning_rate": 7.571738034216909e-06, + "loss": 0.5658, + "step": 2778 + }, + { + "epoch": 0.35, + "learning_rate": 7.569996725789118e-06, + "loss": 0.498, + "step": 2779 + }, + { + "epoch": 0.35, + "learning_rate": 7.568254993633373e-06, + "loss": 0.5574, + "step": 2780 + }, + { + "epoch": 0.35, + "learning_rate": 7.566512838036841e-06, + "loss": 0.5611, + "step": 2781 + }, + { + "epoch": 0.35, + "learning_rate": 7.5647702592867645e-06, + "loss": 0.4994, + "step": 2782 + }, + { + "epoch": 0.35, + "learning_rate": 7.563027257670443e-06, + "loss": 0.568, + "step": 2783 + }, + { + "epoch": 0.35, + "learning_rate": 7.561283833475262e-06, + "loss": 0.5574, + "step": 2784 + }, + { + "epoch": 0.35, + "learning_rate": 7.559539986988662e-06, + "loss": 0.5551, + "step": 2785 + }, + { + "epoch": 0.35, + "learning_rate": 7.557795718498164e-06, + "loss": 0.5258, + "step": 2786 + }, + { + "epoch": 0.35, + "learning_rate": 7.5560510282913516e-06, + "loss": 0.5373, + "step": 2787 + }, + { + "epoch": 0.35, + "learning_rate": 7.554305916655879e-06, + "loss": 0.6127, + "step": 2788 + }, + { + "epoch": 0.35, + "learning_rate": 7.552560383879475e-06, + "loss": 0.5522, + "step": 2789 + }, + { + "epoch": 0.35, + "learning_rate": 7.550814430249931e-06, + "loss": 0.5819, + "step": 2790 + }, + { + "epoch": 0.35, + "learning_rate": 7.5490680560551135e-06, + "loss": 0.5587, + "step": 2791 + }, + { + "epoch": 0.35, + "learning_rate": 7.547321261582954e-06, + "loss": 0.6022, + "step": 2792 + }, + { + "epoch": 0.35, + "learning_rate": 7.545574047121457e-06, + "loss": 0.2251, + "step": 2793 + }, + { + "epoch": 0.35, + "learning_rate": 7.543826412958692e-06, + "loss": 0.586, + "step": 2794 + }, + { + "epoch": 0.35, + "learning_rate": 7.542078359382801e-06, + "loss": 0.6417, + "step": 2795 + }, + { + "epoch": 0.35, + "learning_rate": 7.540329886681994e-06, + "loss": 0.5781, + "step": 2796 + }, + { + "epoch": 0.35, + "learning_rate": 7.538580995144551e-06, + "loss": 0.6027, + "step": 2797 + }, + { + "epoch": 0.35, + "learning_rate": 7.53683168505882e-06, + "loss": 0.6447, + "step": 2798 + }, + { + "epoch": 0.35, + "learning_rate": 7.535081956713217e-06, + "loss": 0.6069, + "step": 2799 + }, + { + "epoch": 0.35, + "learning_rate": 7.533331810396231e-06, + "loss": 0.581, + "step": 2800 + }, + { + "epoch": 0.35, + "learning_rate": 7.531581246396413e-06, + "loss": 0.5318, + "step": 2801 + }, + { + "epoch": 0.35, + "learning_rate": 7.5298302650023915e-06, + "loss": 0.5771, + "step": 2802 + }, + { + "epoch": 0.35, + "learning_rate": 7.528078866502858e-06, + "loss": 0.562, + "step": 2803 + }, + { + "epoch": 0.35, + "learning_rate": 7.526327051186573e-06, + "loss": 0.5782, + "step": 2804 + }, + { + "epoch": 0.35, + "learning_rate": 7.5245748193423675e-06, + "loss": 0.5102, + "step": 2805 + }, + { + "epoch": 0.35, + "learning_rate": 7.522822171259141e-06, + "loss": 0.5795, + "step": 2806 + }, + { + "epoch": 0.35, + "learning_rate": 7.521069107225859e-06, + "loss": 0.5478, + "step": 2807 + }, + { + "epoch": 0.35, + "learning_rate": 7.519315627531562e-06, + "loss": 0.5546, + "step": 2808 + }, + { + "epoch": 0.35, + "learning_rate": 7.51756173246535e-06, + "loss": 0.5094, + "step": 2809 + }, + { + "epoch": 0.35, + "learning_rate": 7.515807422316399e-06, + "loss": 0.515, + "step": 2810 + }, + { + "epoch": 0.35, + "learning_rate": 7.514052697373953e-06, + "loss": 0.5296, + "step": 2811 + }, + { + "epoch": 0.35, + "learning_rate": 7.512297557927315e-06, + "loss": 0.5179, + "step": 2812 + }, + { + "epoch": 0.35, + "learning_rate": 7.510542004265871e-06, + "loss": 0.538, + "step": 2813 + }, + { + "epoch": 0.35, + "learning_rate": 7.5087860366790635e-06, + "loss": 0.5899, + "step": 2814 + }, + { + "epoch": 0.35, + "learning_rate": 7.5070296554564084e-06, + "loss": 0.4956, + "step": 2815 + }, + { + "epoch": 0.35, + "learning_rate": 7.50527286088749e-06, + "loss": 0.5976, + "step": 2816 + }, + { + "epoch": 0.35, + "learning_rate": 7.503515653261958e-06, + "loss": 0.5042, + "step": 2817 + }, + { + "epoch": 0.35, + "learning_rate": 7.501758032869533e-06, + "loss": 0.5784, + "step": 2818 + }, + { + "epoch": 0.35, + "learning_rate": 7.500000000000001e-06, + "loss": 0.574, + "step": 2819 + }, + { + "epoch": 0.35, + "learning_rate": 7.498241554943219e-06, + "loss": 0.6436, + "step": 2820 + }, + { + "epoch": 0.35, + "learning_rate": 7.4964826979891095e-06, + "loss": 0.5153, + "step": 2821 + }, + { + "epoch": 0.35, + "learning_rate": 7.494723429427665e-06, + "loss": 0.5478, + "step": 2822 + }, + { + "epoch": 0.35, + "learning_rate": 7.492963749548942e-06, + "loss": 0.5475, + "step": 2823 + }, + { + "epoch": 0.35, + "learning_rate": 7.491203658643073e-06, + "loss": 0.5912, + "step": 2824 + }, + { + "epoch": 0.35, + "learning_rate": 7.489443157000247e-06, + "loss": 0.5442, + "step": 2825 + }, + { + "epoch": 0.35, + "learning_rate": 7.48768224491073e-06, + "loss": 0.5418, + "step": 2826 + }, + { + "epoch": 0.35, + "learning_rate": 7.485920922664851e-06, + "loss": 0.6031, + "step": 2827 + }, + { + "epoch": 0.35, + "learning_rate": 7.4841591905530065e-06, + "loss": 0.5967, + "step": 2828 + }, + { + "epoch": 0.35, + "learning_rate": 7.4823970488656635e-06, + "loss": 0.5234, + "step": 2829 + }, + { + "epoch": 0.35, + "learning_rate": 7.480634497893357e-06, + "loss": 0.5981, + "step": 2830 + }, + { + "epoch": 0.35, + "learning_rate": 7.478871537926684e-06, + "loss": 0.541, + "step": 2831 + }, + { + "epoch": 0.35, + "learning_rate": 7.477108169256314e-06, + "loss": 0.597, + "step": 2832 + }, + { + "epoch": 0.36, + "learning_rate": 7.4753443921729806e-06, + "loss": 0.567, + "step": 2833 + }, + { + "epoch": 0.36, + "learning_rate": 7.473580206967488e-06, + "loss": 0.5919, + "step": 2834 + }, + { + "epoch": 0.36, + "learning_rate": 7.471815613930706e-06, + "loss": 0.5627, + "step": 2835 + }, + { + "epoch": 0.36, + "learning_rate": 7.470050613353571e-06, + "loss": 0.5637, + "step": 2836 + }, + { + "epoch": 0.36, + "learning_rate": 7.468285205527087e-06, + "loss": 0.5217, + "step": 2837 + }, + { + "epoch": 0.36, + "learning_rate": 7.466519390742328e-06, + "loss": 0.6298, + "step": 2838 + }, + { + "epoch": 0.36, + "learning_rate": 7.464753169290429e-06, + "loss": 0.5363, + "step": 2839 + }, + { + "epoch": 0.36, + "learning_rate": 7.462986541462598e-06, + "loss": 0.5267, + "step": 2840 + }, + { + "epoch": 0.36, + "learning_rate": 7.461219507550106e-06, + "loss": 0.5971, + "step": 2841 + }, + { + "epoch": 0.36, + "learning_rate": 7.459452067844293e-06, + "loss": 0.5317, + "step": 2842 + }, + { + "epoch": 0.36, + "learning_rate": 7.457684222636565e-06, + "loss": 0.5633, + "step": 2843 + }, + { + "epoch": 0.36, + "learning_rate": 7.455915972218398e-06, + "loss": 0.5314, + "step": 2844 + }, + { + "epoch": 0.36, + "learning_rate": 7.45414731688133e-06, + "loss": 0.5155, + "step": 2845 + }, + { + "epoch": 0.36, + "learning_rate": 7.4523782569169664e-06, + "loss": 0.5542, + "step": 2846 + }, + { + "epoch": 0.36, + "learning_rate": 7.450608792616985e-06, + "loss": 0.5275, + "step": 2847 + }, + { + "epoch": 0.36, + "learning_rate": 7.448838924273121e-06, + "loss": 0.5624, + "step": 2848 + }, + { + "epoch": 0.36, + "learning_rate": 7.447068652177184e-06, + "loss": 0.5976, + "step": 2849 + }, + { + "epoch": 0.36, + "learning_rate": 7.445297976621048e-06, + "loss": 0.51, + "step": 2850 + }, + { + "epoch": 0.36, + "learning_rate": 7.443526897896653e-06, + "loss": 0.5437, + "step": 2851 + }, + { + "epoch": 0.36, + "learning_rate": 7.441755416296001e-06, + "loss": 0.5431, + "step": 2852 + }, + { + "epoch": 0.36, + "learning_rate": 7.439983532111171e-06, + "loss": 0.5639, + "step": 2853 + }, + { + "epoch": 0.36, + "learning_rate": 7.4382112456343e-06, + "loss": 0.6175, + "step": 2854 + }, + { + "epoch": 0.36, + "learning_rate": 7.436438557157593e-06, + "loss": 0.547, + "step": 2855 + }, + { + "epoch": 0.36, + "learning_rate": 7.4346654669733205e-06, + "loss": 0.5557, + "step": 2856 + }, + { + "epoch": 0.36, + "learning_rate": 7.432891975373825e-06, + "loss": 0.5381, + "step": 2857 + }, + { + "epoch": 0.36, + "learning_rate": 7.431118082651505e-06, + "loss": 0.6345, + "step": 2858 + }, + { + "epoch": 0.36, + "learning_rate": 7.429343789098835e-06, + "loss": 0.5974, + "step": 2859 + }, + { + "epoch": 0.36, + "learning_rate": 7.42756909500835e-06, + "loss": 0.5199, + "step": 2860 + }, + { + "epoch": 0.36, + "learning_rate": 7.425794000672653e-06, + "loss": 0.5357, + "step": 2861 + }, + { + "epoch": 0.36, + "learning_rate": 7.424018506384412e-06, + "loss": 0.5661, + "step": 2862 + }, + { + "epoch": 0.36, + "learning_rate": 7.422242612436361e-06, + "loss": 0.5589, + "step": 2863 + }, + { + "epoch": 0.36, + "learning_rate": 7.420466319121301e-06, + "loss": 0.5313, + "step": 2864 + }, + { + "epoch": 0.36, + "learning_rate": 7.4186896267321e-06, + "loss": 0.5657, + "step": 2865 + }, + { + "epoch": 0.36, + "learning_rate": 7.4169125355616865e-06, + "loss": 0.5302, + "step": 2866 + }, + { + "epoch": 0.36, + "learning_rate": 7.41513504590306e-06, + "loss": 0.5429, + "step": 2867 + }, + { + "epoch": 0.36, + "learning_rate": 7.413357158049284e-06, + "loss": 0.5772, + "step": 2868 + }, + { + "epoch": 0.36, + "learning_rate": 7.4115788722934875e-06, + "loss": 0.5181, + "step": 2869 + }, + { + "epoch": 0.36, + "learning_rate": 7.409800188928865e-06, + "loss": 0.5722, + "step": 2870 + }, + { + "epoch": 0.36, + "learning_rate": 7.408021108248678e-06, + "loss": 0.5824, + "step": 2871 + }, + { + "epoch": 0.36, + "learning_rate": 7.406241630546248e-06, + "loss": 0.6426, + "step": 2872 + }, + { + "epoch": 0.36, + "learning_rate": 7.404461756114973e-06, + "loss": 0.2446, + "step": 2873 + }, + { + "epoch": 0.36, + "learning_rate": 7.402681485248303e-06, + "loss": 0.5534, + "step": 2874 + }, + { + "epoch": 0.36, + "learning_rate": 7.4009008182397645e-06, + "loss": 0.507, + "step": 2875 + }, + { + "epoch": 0.36, + "learning_rate": 7.399119755382942e-06, + "loss": 0.531, + "step": 2876 + }, + { + "epoch": 0.36, + "learning_rate": 7.3973382969714895e-06, + "loss": 0.5035, + "step": 2877 + }, + { + "epoch": 0.36, + "learning_rate": 7.395556443299126e-06, + "loss": 0.5571, + "step": 2878 + }, + { + "epoch": 0.36, + "learning_rate": 7.393774194659631e-06, + "loss": 0.5583, + "step": 2879 + }, + { + "epoch": 0.36, + "learning_rate": 7.391991551346856e-06, + "loss": 0.5274, + "step": 2880 + }, + { + "epoch": 0.36, + "learning_rate": 7.3902085136547106e-06, + "loss": 0.5711, + "step": 2881 + }, + { + "epoch": 0.36, + "learning_rate": 7.388425081877177e-06, + "loss": 0.4873, + "step": 2882 + }, + { + "epoch": 0.36, + "learning_rate": 7.386641256308296e-06, + "loss": 0.605, + "step": 2883 + }, + { + "epoch": 0.36, + "learning_rate": 7.384857037242175e-06, + "loss": 0.5776, + "step": 2884 + }, + { + "epoch": 0.36, + "learning_rate": 7.383072424972989e-06, + "loss": 0.5542, + "step": 2885 + }, + { + "epoch": 0.36, + "learning_rate": 7.381287419794976e-06, + "loss": 0.5147, + "step": 2886 + }, + { + "epoch": 0.36, + "learning_rate": 7.3795020220024345e-06, + "loss": 0.2096, + "step": 2887 + }, + { + "epoch": 0.36, + "learning_rate": 7.377716231889736e-06, + "loss": 0.5289, + "step": 2888 + }, + { + "epoch": 0.36, + "learning_rate": 7.375930049751311e-06, + "loss": 0.6053, + "step": 2889 + }, + { + "epoch": 0.36, + "learning_rate": 7.374143475881655e-06, + "loss": 0.5948, + "step": 2890 + }, + { + "epoch": 0.36, + "learning_rate": 7.372356510575333e-06, + "loss": 0.5711, + "step": 2891 + }, + { + "epoch": 0.36, + "learning_rate": 7.370569154126965e-06, + "loss": 0.586, + "step": 2892 + }, + { + "epoch": 0.36, + "learning_rate": 7.368781406831246e-06, + "loss": 0.5572, + "step": 2893 + }, + { + "epoch": 0.36, + "learning_rate": 7.36699326898293e-06, + "loss": 0.5511, + "step": 2894 + }, + { + "epoch": 0.36, + "learning_rate": 7.3652047408768325e-06, + "loss": 0.5324, + "step": 2895 + }, + { + "epoch": 0.36, + "learning_rate": 7.36341582280784e-06, + "loss": 0.548, + "step": 2896 + }, + { + "epoch": 0.36, + "learning_rate": 7.361626515070901e-06, + "loss": 0.5245, + "step": 2897 + }, + { + "epoch": 0.36, + "learning_rate": 7.3598368179610235e-06, + "loss": 0.5517, + "step": 2898 + }, + { + "epoch": 0.36, + "learning_rate": 7.358046731773288e-06, + "loss": 0.2026, + "step": 2899 + }, + { + "epoch": 0.36, + "learning_rate": 7.356256256802833e-06, + "loss": 0.6091, + "step": 2900 + }, + { + "epoch": 0.36, + "learning_rate": 7.354465393344862e-06, + "loss": 0.5742, + "step": 2901 + }, + { + "epoch": 0.36, + "learning_rate": 7.352674141694643e-06, + "loss": 0.6101, + "step": 2902 + }, + { + "epoch": 0.36, + "learning_rate": 7.350882502147513e-06, + "loss": 0.5446, + "step": 2903 + }, + { + "epoch": 0.36, + "learning_rate": 7.349090474998862e-06, + "loss": 0.6329, + "step": 2904 + }, + { + "epoch": 0.36, + "learning_rate": 7.347298060544154e-06, + "loss": 0.5409, + "step": 2905 + }, + { + "epoch": 0.36, + "learning_rate": 7.345505259078913e-06, + "loss": 0.5628, + "step": 2906 + }, + { + "epoch": 0.36, + "learning_rate": 7.343712070898728e-06, + "loss": 0.5097, + "step": 2907 + }, + { + "epoch": 0.36, + "learning_rate": 7.341918496299246e-06, + "loss": 0.628, + "step": 2908 + }, + { + "epoch": 0.36, + "learning_rate": 7.340124535576187e-06, + "loss": 0.5676, + "step": 2909 + }, + { + "epoch": 0.36, + "learning_rate": 7.33833018902533e-06, + "loss": 0.5221, + "step": 2910 + }, + { + "epoch": 0.36, + "learning_rate": 7.3365354569425154e-06, + "loss": 0.5805, + "step": 2911 + }, + { + "epoch": 0.37, + "learning_rate": 7.334740339623649e-06, + "loss": 0.5331, + "step": 2912 + }, + { + "epoch": 0.37, + "learning_rate": 7.332944837364705e-06, + "loss": 0.2378, + "step": 2913 + }, + { + "epoch": 0.37, + "learning_rate": 7.331148950461712e-06, + "loss": 0.2201, + "step": 2914 + }, + { + "epoch": 0.37, + "learning_rate": 7.329352679210769e-06, + "loss": 0.5323, + "step": 2915 + }, + { + "epoch": 0.37, + "learning_rate": 7.327556023908036e-06, + "loss": 0.5365, + "step": 2916 + }, + { + "epoch": 0.37, + "learning_rate": 7.325758984849734e-06, + "loss": 0.6246, + "step": 2917 + }, + { + "epoch": 0.37, + "learning_rate": 7.323961562332153e-06, + "loss": 0.5857, + "step": 2918 + }, + { + "epoch": 0.37, + "learning_rate": 7.3221637566516404e-06, + "loss": 0.5824, + "step": 2919 + }, + { + "epoch": 0.37, + "learning_rate": 7.32036556810461e-06, + "loss": 0.5284, + "step": 2920 + }, + { + "epoch": 0.37, + "learning_rate": 7.3185669969875394e-06, + "loss": 0.5521, + "step": 2921 + }, + { + "epoch": 0.37, + "learning_rate": 7.3167680435969645e-06, + "loss": 0.5317, + "step": 2922 + }, + { + "epoch": 0.37, + "learning_rate": 7.314968708229491e-06, + "loss": 0.5759, + "step": 2923 + }, + { + "epoch": 0.37, + "learning_rate": 7.313168991181783e-06, + "loss": 0.5907, + "step": 2924 + }, + { + "epoch": 0.37, + "learning_rate": 7.311368892750568e-06, + "loss": 0.5229, + "step": 2925 + }, + { + "epoch": 0.37, + "learning_rate": 7.309568413232638e-06, + "loss": 0.5635, + "step": 2926 + }, + { + "epoch": 0.37, + "learning_rate": 7.3077675529248445e-06, + "loss": 0.55, + "step": 2927 + }, + { + "epoch": 0.37, + "learning_rate": 7.305966312124108e-06, + "loss": 0.5834, + "step": 2928 + }, + { + "epoch": 0.37, + "learning_rate": 7.304164691127406e-06, + "loss": 0.6116, + "step": 2929 + }, + { + "epoch": 0.37, + "learning_rate": 7.302362690231779e-06, + "loss": 0.5173, + "step": 2930 + }, + { + "epoch": 0.37, + "learning_rate": 7.3005603097343335e-06, + "loss": 0.5448, + "step": 2931 + }, + { + "epoch": 0.37, + "learning_rate": 7.298757549932237e-06, + "loss": 0.5391, + "step": 2932 + }, + { + "epoch": 0.37, + "learning_rate": 7.296954411122719e-06, + "loss": 0.5627, + "step": 2933 + }, + { + "epoch": 0.37, + "learning_rate": 7.295150893603071e-06, + "loss": 0.5507, + "step": 2934 + }, + { + "epoch": 0.37, + "learning_rate": 7.29334699767065e-06, + "loss": 0.5375, + "step": 2935 + }, + { + "epoch": 0.37, + "learning_rate": 7.291542723622869e-06, + "loss": 0.55, + "step": 2936 + }, + { + "epoch": 0.37, + "learning_rate": 7.289738071757212e-06, + "loss": 0.5659, + "step": 2937 + }, + { + "epoch": 0.37, + "learning_rate": 7.287933042371218e-06, + "loss": 0.4745, + "step": 2938 + }, + { + "epoch": 0.37, + "learning_rate": 7.286127635762492e-06, + "loss": 0.5388, + "step": 2939 + }, + { + "epoch": 0.37, + "learning_rate": 7.284321852228702e-06, + "loss": 0.597, + "step": 2940 + }, + { + "epoch": 0.37, + "learning_rate": 7.282515692067572e-06, + "loss": 0.558, + "step": 2941 + }, + { + "epoch": 0.37, + "learning_rate": 7.280709155576897e-06, + "loss": 0.5875, + "step": 2942 + }, + { + "epoch": 0.37, + "learning_rate": 7.2789022430545266e-06, + "loss": 0.1993, + "step": 2943 + }, + { + "epoch": 0.37, + "learning_rate": 7.277094954798376e-06, + "loss": 0.5562, + "step": 2944 + }, + { + "epoch": 0.37, + "learning_rate": 7.275287291106423e-06, + "loss": 0.5352, + "step": 2945 + }, + { + "epoch": 0.37, + "learning_rate": 7.273479252276704e-06, + "loss": 0.5278, + "step": 2946 + }, + { + "epoch": 0.37, + "learning_rate": 7.27167083860732e-06, + "loss": 0.5875, + "step": 2947 + }, + { + "epoch": 0.37, + "learning_rate": 7.269862050396433e-06, + "loss": 0.5576, + "step": 2948 + }, + { + "epoch": 0.37, + "learning_rate": 7.268052887942267e-06, + "loss": 0.5214, + "step": 2949 + }, + { + "epoch": 0.37, + "learning_rate": 7.266243351543109e-06, + "loss": 0.2174, + "step": 2950 + }, + { + "epoch": 0.37, + "learning_rate": 7.264433441497304e-06, + "loss": 0.5687, + "step": 2951 + }, + { + "epoch": 0.37, + "learning_rate": 7.26262315810326e-06, + "loss": 0.5933, + "step": 2952 + }, + { + "epoch": 0.37, + "learning_rate": 7.26081250165945e-06, + "loss": 0.5689, + "step": 2953 + }, + { + "epoch": 0.37, + "learning_rate": 7.259001472464403e-06, + "loss": 0.5386, + "step": 2954 + }, + { + "epoch": 0.37, + "learning_rate": 7.257190070816715e-06, + "loss": 0.4974, + "step": 2955 + }, + { + "epoch": 0.37, + "learning_rate": 7.255378297015039e-06, + "loss": 0.5245, + "step": 2956 + }, + { + "epoch": 0.37, + "learning_rate": 7.253566151358092e-06, + "loss": 0.5934, + "step": 2957 + }, + { + "epoch": 0.37, + "learning_rate": 7.2517536341446486e-06, + "loss": 0.5952, + "step": 2958 + }, + { + "epoch": 0.37, + "learning_rate": 7.249940745673549e-06, + "loss": 0.5147, + "step": 2959 + }, + { + "epoch": 0.37, + "learning_rate": 7.2481274862436965e-06, + "loss": 0.5471, + "step": 2960 + }, + { + "epoch": 0.37, + "learning_rate": 7.246313856154047e-06, + "loss": 0.5712, + "step": 2961 + }, + { + "epoch": 0.37, + "learning_rate": 7.244499855703625e-06, + "loss": 0.5298, + "step": 2962 + }, + { + "epoch": 0.37, + "learning_rate": 7.242685485191514e-06, + "loss": 0.5927, + "step": 2963 + }, + { + "epoch": 0.37, + "learning_rate": 7.240870744916857e-06, + "loss": 0.5542, + "step": 2964 + }, + { + "epoch": 0.37, + "learning_rate": 7.239055635178859e-06, + "loss": 0.5568, + "step": 2965 + }, + { + "epoch": 0.37, + "learning_rate": 7.237240156276788e-06, + "loss": 0.5259, + "step": 2966 + }, + { + "epoch": 0.37, + "learning_rate": 7.235424308509967e-06, + "loss": 0.5782, + "step": 2967 + }, + { + "epoch": 0.37, + "learning_rate": 7.233608092177789e-06, + "loss": 0.5049, + "step": 2968 + }, + { + "epoch": 0.37, + "learning_rate": 7.231791507579697e-06, + "loss": 0.6096, + "step": 2969 + }, + { + "epoch": 0.37, + "learning_rate": 7.229974555015205e-06, + "loss": 0.6284, + "step": 2970 + }, + { + "epoch": 0.37, + "learning_rate": 7.228157234783879e-06, + "loss": 0.5741, + "step": 2971 + }, + { + "epoch": 0.37, + "learning_rate": 7.226339547185354e-06, + "loss": 0.5278, + "step": 2972 + }, + { + "epoch": 0.37, + "learning_rate": 7.224521492519316e-06, + "loss": 0.5739, + "step": 2973 + }, + { + "epoch": 0.37, + "learning_rate": 7.222703071085521e-06, + "loss": 0.5438, + "step": 2974 + }, + { + "epoch": 0.37, + "learning_rate": 7.220884283183777e-06, + "loss": 0.5952, + "step": 2975 + }, + { + "epoch": 0.37, + "learning_rate": 7.219065129113958e-06, + "loss": 0.5591, + "step": 2976 + }, + { + "epoch": 0.37, + "learning_rate": 7.217245609175999e-06, + "loss": 0.5729, + "step": 2977 + }, + { + "epoch": 0.37, + "learning_rate": 7.21542572366989e-06, + "loss": 0.5451, + "step": 2978 + }, + { + "epoch": 0.37, + "learning_rate": 7.213605472895686e-06, + "loss": 0.5058, + "step": 2979 + }, + { + "epoch": 0.37, + "learning_rate": 7.2117848571535024e-06, + "loss": 0.5352, + "step": 2980 + }, + { + "epoch": 0.37, + "learning_rate": 7.209963876743509e-06, + "loss": 0.5477, + "step": 2981 + }, + { + "epoch": 0.37, + "learning_rate": 7.2081425319659435e-06, + "loss": 0.622, + "step": 2982 + }, + { + "epoch": 0.37, + "learning_rate": 7.2063208231210975e-06, + "loss": 0.5925, + "step": 2983 + }, + { + "epoch": 0.37, + "learning_rate": 7.204498750509325e-06, + "loss": 0.5386, + "step": 2984 + }, + { + "epoch": 0.37, + "learning_rate": 7.202676314431043e-06, + "loss": 0.5133, + "step": 2985 + }, + { + "epoch": 0.37, + "learning_rate": 7.200853515186723e-06, + "loss": 0.5439, + "step": 2986 + }, + { + "epoch": 0.37, + "learning_rate": 7.199030353076898e-06, + "loss": 0.5595, + "step": 2987 + }, + { + "epoch": 0.37, + "learning_rate": 7.1972068284021646e-06, + "loss": 0.4926, + "step": 2988 + }, + { + "epoch": 0.37, + "learning_rate": 7.195382941463174e-06, + "loss": 0.5688, + "step": 2989 + }, + { + "epoch": 0.37, + "learning_rate": 7.193558692560639e-06, + "loss": 0.6256, + "step": 2990 + }, + { + "epoch": 0.37, + "learning_rate": 7.191734081995335e-06, + "loss": 0.5559, + "step": 2991 + }, + { + "epoch": 0.38, + "learning_rate": 7.189909110068092e-06, + "loss": 0.5333, + "step": 2992 + }, + { + "epoch": 0.38, + "learning_rate": 7.188083777079804e-06, + "loss": 0.5528, + "step": 2993 + }, + { + "epoch": 0.38, + "learning_rate": 7.18625808333142e-06, + "loss": 0.6357, + "step": 2994 + }, + { + "epoch": 0.38, + "learning_rate": 7.184432029123953e-06, + "loss": 0.5182, + "step": 2995 + }, + { + "epoch": 0.38, + "learning_rate": 7.1826056147584755e-06, + "loss": 0.5642, + "step": 2996 + }, + { + "epoch": 0.38, + "learning_rate": 7.180778840536112e-06, + "loss": 0.6083, + "step": 2997 + }, + { + "epoch": 0.38, + "learning_rate": 7.178951706758056e-06, + "loss": 0.5636, + "step": 2998 + }, + { + "epoch": 0.38, + "learning_rate": 7.177124213725555e-06, + "loss": 0.5328, + "step": 2999 + }, + { + "epoch": 0.38, + "learning_rate": 7.175296361739916e-06, + "loss": 0.559, + "step": 3000 + }, + { + "epoch": 0.38, + "learning_rate": 7.173468151102507e-06, + "loss": 0.5191, + "step": 3001 + }, + { + "epoch": 0.38, + "learning_rate": 7.171639582114753e-06, + "loss": 0.5817, + "step": 3002 + }, + { + "epoch": 0.38, + "learning_rate": 7.16981065507814e-06, + "loss": 0.5631, + "step": 3003 + }, + { + "epoch": 0.38, + "learning_rate": 7.1679813702942124e-06, + "loss": 0.5911, + "step": 3004 + }, + { + "epoch": 0.38, + "learning_rate": 7.166151728064572e-06, + "loss": 0.5586, + "step": 3005 + }, + { + "epoch": 0.38, + "learning_rate": 7.164321728690881e-06, + "loss": 0.5228, + "step": 3006 + }, + { + "epoch": 0.38, + "learning_rate": 7.162491372474862e-06, + "loss": 0.5996, + "step": 3007 + }, + { + "epoch": 0.38, + "learning_rate": 7.160660659718293e-06, + "loss": 0.5522, + "step": 3008 + }, + { + "epoch": 0.38, + "learning_rate": 7.1588295907230146e-06, + "loss": 0.5252, + "step": 3009 + }, + { + "epoch": 0.38, + "learning_rate": 7.156998165790921e-06, + "loss": 0.5533, + "step": 3010 + }, + { + "epoch": 0.38, + "learning_rate": 7.155166385223971e-06, + "loss": 0.5995, + "step": 3011 + }, + { + "epoch": 0.38, + "learning_rate": 7.153334249324177e-06, + "loss": 0.5257, + "step": 3012 + }, + { + "epoch": 0.38, + "learning_rate": 7.151501758393615e-06, + "loss": 0.5664, + "step": 3013 + }, + { + "epoch": 0.38, + "learning_rate": 7.149668912734415e-06, + "loss": 0.5668, + "step": 3014 + }, + { + "epoch": 0.38, + "learning_rate": 7.1478357126487675e-06, + "loss": 0.4868, + "step": 3015 + }, + { + "epoch": 0.38, + "learning_rate": 7.146002158438922e-06, + "loss": 0.568, + "step": 3016 + }, + { + "epoch": 0.38, + "learning_rate": 7.144168250407183e-06, + "loss": 0.5473, + "step": 3017 + }, + { + "epoch": 0.38, + "learning_rate": 7.142333988855919e-06, + "loss": 0.5016, + "step": 3018 + }, + { + "epoch": 0.38, + "learning_rate": 7.140499374087552e-06, + "loss": 0.5929, + "step": 3019 + }, + { + "epoch": 0.38, + "learning_rate": 7.138664406404564e-06, + "loss": 0.6114, + "step": 3020 + }, + { + "epoch": 0.38, + "learning_rate": 7.136829086109496e-06, + "loss": 0.5428, + "step": 3021 + }, + { + "epoch": 0.38, + "learning_rate": 7.134993413504945e-06, + "loss": 0.5352, + "step": 3022 + }, + { + "epoch": 0.38, + "learning_rate": 7.133157388893568e-06, + "loss": 0.2088, + "step": 3023 + }, + { + "epoch": 0.38, + "learning_rate": 7.13132101257808e-06, + "loss": 0.5219, + "step": 3024 + }, + { + "epoch": 0.38, + "learning_rate": 7.129484284861252e-06, + "loss": 0.5436, + "step": 3025 + }, + { + "epoch": 0.38, + "learning_rate": 7.127647206045916e-06, + "loss": 0.5499, + "step": 3026 + }, + { + "epoch": 0.38, + "learning_rate": 7.125809776434958e-06, + "loss": 0.597, + "step": 3027 + }, + { + "epoch": 0.38, + "learning_rate": 7.123971996331327e-06, + "loss": 0.5802, + "step": 3028 + }, + { + "epoch": 0.38, + "learning_rate": 7.122133866038023e-06, + "loss": 0.5816, + "step": 3029 + }, + { + "epoch": 0.38, + "learning_rate": 7.12029538585811e-06, + "loss": 0.5995, + "step": 3030 + }, + { + "epoch": 0.38, + "learning_rate": 7.1184565560947085e-06, + "loss": 0.5629, + "step": 3031 + }, + { + "epoch": 0.38, + "learning_rate": 7.116617377050993e-06, + "loss": 0.5247, + "step": 3032 + }, + { + "epoch": 0.38, + "learning_rate": 7.114777849030199e-06, + "loss": 0.5154, + "step": 3033 + }, + { + "epoch": 0.38, + "learning_rate": 7.112937972335616e-06, + "loss": 0.5024, + "step": 3034 + }, + { + "epoch": 0.38, + "learning_rate": 7.1110977472705965e-06, + "loss": 0.5706, + "step": 3035 + }, + { + "epoch": 0.38, + "learning_rate": 7.109257174138549e-06, + "loss": 0.5292, + "step": 3036 + }, + { + "epoch": 0.38, + "learning_rate": 7.107416253242933e-06, + "loss": 0.2454, + "step": 3037 + }, + { + "epoch": 0.38, + "learning_rate": 7.105574984887272e-06, + "loss": 0.5653, + "step": 3038 + }, + { + "epoch": 0.38, + "learning_rate": 7.103733369375147e-06, + "loss": 0.5692, + "step": 3039 + }, + { + "epoch": 0.38, + "learning_rate": 7.101891407010193e-06, + "loss": 0.5723, + "step": 3040 + }, + { + "epoch": 0.38, + "learning_rate": 7.1000490980961025e-06, + "loss": 0.569, + "step": 3041 + }, + { + "epoch": 0.38, + "learning_rate": 7.098206442936626e-06, + "loss": 0.5398, + "step": 3042 + }, + { + "epoch": 0.38, + "learning_rate": 7.096363441835572e-06, + "loss": 0.6324, + "step": 3043 + }, + { + "epoch": 0.38, + "learning_rate": 7.094520095096805e-06, + "loss": 0.5489, + "step": 3044 + }, + { + "epoch": 0.38, + "learning_rate": 7.092676403024245e-06, + "loss": 0.5324, + "step": 3045 + }, + { + "epoch": 0.38, + "learning_rate": 7.090832365921873e-06, + "loss": 0.5541, + "step": 3046 + }, + { + "epoch": 0.38, + "learning_rate": 7.088987984093724e-06, + "loss": 0.5079, + "step": 3047 + }, + { + "epoch": 0.38, + "learning_rate": 7.087143257843888e-06, + "loss": 0.5627, + "step": 3048 + }, + { + "epoch": 0.38, + "learning_rate": 7.085298187476517e-06, + "loss": 0.5682, + "step": 3049 + }, + { + "epoch": 0.38, + "learning_rate": 7.083452773295813e-06, + "loss": 0.5614, + "step": 3050 + }, + { + "epoch": 0.38, + "learning_rate": 7.081607015606043e-06, + "loss": 0.5322, + "step": 3051 + }, + { + "epoch": 0.38, + "learning_rate": 7.0797609147115245e-06, + "loss": 0.5474, + "step": 3052 + }, + { + "epoch": 0.38, + "learning_rate": 7.077914470916632e-06, + "loss": 0.2389, + "step": 3053 + }, + { + "epoch": 0.38, + "learning_rate": 7.076067684525798e-06, + "loss": 0.5101, + "step": 3054 + }, + { + "epoch": 0.38, + "learning_rate": 7.074220555843512e-06, + "loss": 0.5375, + "step": 3055 + }, + { + "epoch": 0.38, + "learning_rate": 7.0723730851743185e-06, + "loss": 0.5789, + "step": 3056 + }, + { + "epoch": 0.38, + "learning_rate": 7.070525272822821e-06, + "loss": 0.5444, + "step": 3057 + }, + { + "epoch": 0.38, + "learning_rate": 7.0686771190936745e-06, + "loss": 0.6098, + "step": 3058 + }, + { + "epoch": 0.38, + "learning_rate": 7.066828624291593e-06, + "loss": 0.2084, + "step": 3059 + }, + { + "epoch": 0.38, + "learning_rate": 7.064979788721351e-06, + "loss": 0.6388, + "step": 3060 + }, + { + "epoch": 0.38, + "learning_rate": 7.063130612687771e-06, + "loss": 0.5453, + "step": 3061 + }, + { + "epoch": 0.38, + "learning_rate": 7.061281096495737e-06, + "loss": 0.575, + "step": 3062 + }, + { + "epoch": 0.38, + "learning_rate": 7.059431240450188e-06, + "loss": 0.5805, + "step": 3063 + }, + { + "epoch": 0.38, + "learning_rate": 7.0575810448561185e-06, + "loss": 0.5394, + "step": 3064 + }, + { + "epoch": 0.38, + "learning_rate": 7.055730510018579e-06, + "loss": 0.6073, + "step": 3065 + }, + { + "epoch": 0.38, + "learning_rate": 7.05387963624268e-06, + "loss": 0.602, + "step": 3066 + }, + { + "epoch": 0.38, + "learning_rate": 7.052028423833577e-06, + "loss": 0.4767, + "step": 3067 + }, + { + "epoch": 0.38, + "learning_rate": 7.050176873096493e-06, + "loss": 0.4964, + "step": 3068 + }, + { + "epoch": 0.38, + "learning_rate": 7.048324984336704e-06, + "loss": 0.4962, + "step": 3069 + }, + { + "epoch": 0.38, + "learning_rate": 7.046472757859536e-06, + "loss": 0.5513, + "step": 3070 + }, + { + "epoch": 0.38, + "learning_rate": 7.044620193970376e-06, + "loss": 0.5148, + "step": 3071 + }, + { + "epoch": 0.39, + "learning_rate": 7.042767292974667e-06, + "loss": 0.2177, + "step": 3072 + }, + { + "epoch": 0.39, + "learning_rate": 7.040914055177903e-06, + "loss": 0.5921, + "step": 3073 + }, + { + "epoch": 0.39, + "learning_rate": 7.039060480885638e-06, + "loss": 0.5792, + "step": 3074 + }, + { + "epoch": 0.39, + "learning_rate": 7.03720657040348e-06, + "loss": 0.5909, + "step": 3075 + }, + { + "epoch": 0.39, + "learning_rate": 7.035352324037092e-06, + "loss": 0.5603, + "step": 3076 + }, + { + "epoch": 0.39, + "learning_rate": 7.033497742092192e-06, + "loss": 0.563, + "step": 3077 + }, + { + "epoch": 0.39, + "learning_rate": 7.031642824874556e-06, + "loss": 0.5845, + "step": 3078 + }, + { + "epoch": 0.39, + "learning_rate": 7.029787572690012e-06, + "loss": 0.5463, + "step": 3079 + }, + { + "epoch": 0.39, + "learning_rate": 7.027931985844443e-06, + "loss": 0.5432, + "step": 3080 + }, + { + "epoch": 0.39, + "learning_rate": 7.026076064643791e-06, + "loss": 0.5715, + "step": 3081 + }, + { + "epoch": 0.39, + "learning_rate": 7.0242198093940505e-06, + "loss": 0.5209, + "step": 3082 + }, + { + "epoch": 0.39, + "learning_rate": 7.022363220401269e-06, + "loss": 0.5058, + "step": 3083 + }, + { + "epoch": 0.39, + "learning_rate": 7.020506297971556e-06, + "loss": 0.5397, + "step": 3084 + }, + { + "epoch": 0.39, + "learning_rate": 7.0186490424110675e-06, + "loss": 0.5727, + "step": 3085 + }, + { + "epoch": 0.39, + "learning_rate": 7.016791454026019e-06, + "loss": 0.4973, + "step": 3086 + }, + { + "epoch": 0.39, + "learning_rate": 7.014933533122682e-06, + "loss": 0.6166, + "step": 3087 + }, + { + "epoch": 0.39, + "learning_rate": 7.0130752800073784e-06, + "loss": 0.508, + "step": 3088 + }, + { + "epoch": 0.39, + "learning_rate": 7.01121669498649e-06, + "loss": 0.5937, + "step": 3089 + }, + { + "epoch": 0.39, + "learning_rate": 7.00935777836645e-06, + "loss": 0.4747, + "step": 3090 + }, + { + "epoch": 0.39, + "learning_rate": 7.007498530453746e-06, + "loss": 0.5829, + "step": 3091 + }, + { + "epoch": 0.39, + "learning_rate": 7.0056389515549225e-06, + "loss": 0.5159, + "step": 3092 + }, + { + "epoch": 0.39, + "learning_rate": 7.003779041976578e-06, + "loss": 0.5428, + "step": 3093 + }, + { + "epoch": 0.39, + "learning_rate": 7.00191880202536e-06, + "loss": 0.5572, + "step": 3094 + }, + { + "epoch": 0.39, + "learning_rate": 7.0000582320079835e-06, + "loss": 0.5296, + "step": 3095 + }, + { + "epoch": 0.39, + "learning_rate": 6.998197332231204e-06, + "loss": 0.5535, + "step": 3096 + }, + { + "epoch": 0.39, + "learning_rate": 6.9963361030018385e-06, + "loss": 0.5644, + "step": 3097 + }, + { + "epoch": 0.39, + "learning_rate": 6.994474544626758e-06, + "loss": 0.5793, + "step": 3098 + }, + { + "epoch": 0.39, + "learning_rate": 6.992612657412885e-06, + "loss": 0.5614, + "step": 3099 + }, + { + "epoch": 0.39, + "learning_rate": 6.9907504416672e-06, + "loss": 0.5229, + "step": 3100 + }, + { + "epoch": 0.39, + "learning_rate": 6.9888878976967335e-06, + "loss": 0.5488, + "step": 3101 + }, + { + "epoch": 0.39, + "learning_rate": 6.987025025808574e-06, + "loss": 0.6041, + "step": 3102 + }, + { + "epoch": 0.39, + "learning_rate": 6.985161826309862e-06, + "loss": 0.5381, + "step": 3103 + }, + { + "epoch": 0.39, + "learning_rate": 6.98329829950779e-06, + "loss": 0.4806, + "step": 3104 + }, + { + "epoch": 0.39, + "learning_rate": 6.98143444570961e-06, + "loss": 0.2233, + "step": 3105 + }, + { + "epoch": 0.39, + "learning_rate": 6.979570265222625e-06, + "loss": 0.5681, + "step": 3106 + }, + { + "epoch": 0.39, + "learning_rate": 6.9777057583541886e-06, + "loss": 0.5325, + "step": 3107 + }, + { + "epoch": 0.39, + "learning_rate": 6.975840925411713e-06, + "loss": 0.4973, + "step": 3108 + }, + { + "epoch": 0.39, + "learning_rate": 6.973975766702662e-06, + "loss": 0.5731, + "step": 3109 + }, + { + "epoch": 0.39, + "learning_rate": 6.972110282534552e-06, + "loss": 0.5135, + "step": 3110 + }, + { + "epoch": 0.39, + "learning_rate": 6.970244473214959e-06, + "loss": 0.5131, + "step": 3111 + }, + { + "epoch": 0.39, + "learning_rate": 6.9683783390515025e-06, + "loss": 0.5512, + "step": 3112 + }, + { + "epoch": 0.39, + "learning_rate": 6.966511880351865e-06, + "loss": 0.6082, + "step": 3113 + }, + { + "epoch": 0.39, + "learning_rate": 6.964645097423776e-06, + "loss": 0.5018, + "step": 3114 + }, + { + "epoch": 0.39, + "learning_rate": 6.962777990575025e-06, + "loss": 0.6228, + "step": 3115 + }, + { + "epoch": 0.39, + "learning_rate": 6.960910560113447e-06, + "loss": 0.5706, + "step": 3116 + }, + { + "epoch": 0.39, + "learning_rate": 6.9590428063469365e-06, + "loss": 0.56, + "step": 3117 + }, + { + "epoch": 0.39, + "learning_rate": 6.957174729583438e-06, + "loss": 0.5638, + "step": 3118 + }, + { + "epoch": 0.39, + "learning_rate": 6.955306330130953e-06, + "loss": 0.5607, + "step": 3119 + }, + { + "epoch": 0.39, + "learning_rate": 6.953437608297531e-06, + "loss": 0.6321, + "step": 3120 + }, + { + "epoch": 0.39, + "learning_rate": 6.951568564391277e-06, + "loss": 0.5377, + "step": 3121 + }, + { + "epoch": 0.39, + "learning_rate": 6.949699198720352e-06, + "loss": 0.5413, + "step": 3122 + }, + { + "epoch": 0.39, + "learning_rate": 6.947829511592966e-06, + "loss": 0.5634, + "step": 3123 + }, + { + "epoch": 0.39, + "learning_rate": 6.9459595033173835e-06, + "loss": 0.538, + "step": 3124 + }, + { + "epoch": 0.39, + "learning_rate": 6.9440891742019225e-06, + "loss": 0.6003, + "step": 3125 + }, + { + "epoch": 0.39, + "learning_rate": 6.942218524554952e-06, + "loss": 0.5369, + "step": 3126 + }, + { + "epoch": 0.39, + "learning_rate": 6.940347554684896e-06, + "loss": 0.4897, + "step": 3127 + }, + { + "epoch": 0.39, + "learning_rate": 6.938476264900232e-06, + "loss": 0.5842, + "step": 3128 + }, + { + "epoch": 0.39, + "learning_rate": 6.936604655509487e-06, + "loss": 0.2332, + "step": 3129 + }, + { + "epoch": 0.39, + "learning_rate": 6.934732726821242e-06, + "loss": 0.5487, + "step": 3130 + }, + { + "epoch": 0.39, + "learning_rate": 6.932860479144132e-06, + "loss": 0.5002, + "step": 3131 + }, + { + "epoch": 0.39, + "learning_rate": 6.930987912786843e-06, + "loss": 0.5133, + "step": 3132 + }, + { + "epoch": 0.39, + "learning_rate": 6.929115028058114e-06, + "loss": 0.5881, + "step": 3133 + }, + { + "epoch": 0.39, + "learning_rate": 6.927241825266738e-06, + "loss": 0.5346, + "step": 3134 + }, + { + "epoch": 0.39, + "learning_rate": 6.9253683047215594e-06, + "loss": 0.5795, + "step": 3135 + }, + { + "epoch": 0.39, + "learning_rate": 6.923494466731472e-06, + "loss": 0.5306, + "step": 3136 + }, + { + "epoch": 0.39, + "learning_rate": 6.921620311605427e-06, + "loss": 0.6011, + "step": 3137 + }, + { + "epoch": 0.39, + "learning_rate": 6.919745839652425e-06, + "loss": 0.4669, + "step": 3138 + }, + { + "epoch": 0.39, + "learning_rate": 6.91787105118152e-06, + "loss": 0.2254, + "step": 3139 + }, + { + "epoch": 0.39, + "learning_rate": 6.915995946501814e-06, + "loss": 0.5032, + "step": 3140 + }, + { + "epoch": 0.39, + "learning_rate": 6.914120525922471e-06, + "loss": 0.5576, + "step": 3141 + }, + { + "epoch": 0.39, + "learning_rate": 6.912244789752696e-06, + "loss": 0.468, + "step": 3142 + }, + { + "epoch": 0.39, + "learning_rate": 6.910368738301753e-06, + "loss": 0.5033, + "step": 3143 + }, + { + "epoch": 0.39, + "learning_rate": 6.908492371878952e-06, + "loss": 0.5838, + "step": 3144 + }, + { + "epoch": 0.39, + "learning_rate": 6.906615690793663e-06, + "loss": 0.5997, + "step": 3145 + }, + { + "epoch": 0.39, + "learning_rate": 6.9047386953553045e-06, + "loss": 0.5482, + "step": 3146 + }, + { + "epoch": 0.39, + "learning_rate": 6.9028613858733405e-06, + "loss": 0.5744, + "step": 3147 + }, + { + "epoch": 0.39, + "learning_rate": 6.900983762657297e-06, + "loss": 0.5438, + "step": 3148 + }, + { + "epoch": 0.39, + "learning_rate": 6.899105826016745e-06, + "loss": 0.4416, + "step": 3149 + }, + { + "epoch": 0.39, + "learning_rate": 6.8972275762613095e-06, + "loss": 0.5555, + "step": 3150 + }, + { + "epoch": 0.39, + "learning_rate": 6.895349013700667e-06, + "loss": 0.5832, + "step": 3151 + }, + { + "epoch": 0.4, + "learning_rate": 6.893470138644545e-06, + "loss": 0.558, + "step": 3152 + }, + { + "epoch": 0.4, + "learning_rate": 6.891590951402723e-06, + "loss": 0.5594, + "step": 3153 + }, + { + "epoch": 0.4, + "learning_rate": 6.889711452285031e-06, + "loss": 0.5275, + "step": 3154 + }, + { + "epoch": 0.4, + "learning_rate": 6.887831641601352e-06, + "loss": 0.5531, + "step": 3155 + }, + { + "epoch": 0.4, + "learning_rate": 6.885951519661619e-06, + "loss": 0.5476, + "step": 3156 + }, + { + "epoch": 0.4, + "learning_rate": 6.884071086775819e-06, + "loss": 0.6281, + "step": 3157 + }, + { + "epoch": 0.4, + "learning_rate": 6.882190343253985e-06, + "loss": 0.5056, + "step": 3158 + }, + { + "epoch": 0.4, + "learning_rate": 6.8803092894062075e-06, + "loss": 0.5482, + "step": 3159 + }, + { + "epoch": 0.4, + "learning_rate": 6.878427925542621e-06, + "loss": 0.5575, + "step": 3160 + }, + { + "epoch": 0.4, + "learning_rate": 6.876546251973419e-06, + "loss": 0.5571, + "step": 3161 + }, + { + "epoch": 0.4, + "learning_rate": 6.8746642690088425e-06, + "loss": 0.5765, + "step": 3162 + }, + { + "epoch": 0.4, + "learning_rate": 6.87278197695918e-06, + "loss": 0.6195, + "step": 3163 + }, + { + "epoch": 0.4, + "learning_rate": 6.8708993761347765e-06, + "loss": 0.5725, + "step": 3164 + }, + { + "epoch": 0.4, + "learning_rate": 6.869016466846025e-06, + "loss": 0.518, + "step": 3165 + }, + { + "epoch": 0.4, + "learning_rate": 6.8671332494033695e-06, + "loss": 0.5556, + "step": 3166 + }, + { + "epoch": 0.4, + "learning_rate": 6.865249724117307e-06, + "loss": 0.5584, + "step": 3167 + }, + { + "epoch": 0.4, + "learning_rate": 6.863365891298381e-06, + "loss": 0.5048, + "step": 3168 + }, + { + "epoch": 0.4, + "learning_rate": 6.8614817512571895e-06, + "loss": 0.5345, + "step": 3169 + }, + { + "epoch": 0.4, + "learning_rate": 6.859597304304382e-06, + "loss": 0.5162, + "step": 3170 + }, + { + "epoch": 0.4, + "learning_rate": 6.857712550750653e-06, + "loss": 0.5729, + "step": 3171 + }, + { + "epoch": 0.4, + "learning_rate": 6.855827490906752e-06, + "loss": 0.5617, + "step": 3172 + }, + { + "epoch": 0.4, + "learning_rate": 6.85394212508348e-06, + "loss": 0.5409, + "step": 3173 + }, + { + "epoch": 0.4, + "learning_rate": 6.852056453591684e-06, + "loss": 0.5549, + "step": 3174 + }, + { + "epoch": 0.4, + "learning_rate": 6.850170476742267e-06, + "loss": 0.5737, + "step": 3175 + }, + { + "epoch": 0.4, + "learning_rate": 6.848284194846175e-06, + "loss": 0.5873, + "step": 3176 + }, + { + "epoch": 0.4, + "learning_rate": 6.846397608214412e-06, + "loss": 0.6024, + "step": 3177 + }, + { + "epoch": 0.4, + "learning_rate": 6.8445107171580285e-06, + "loss": 0.5457, + "step": 3178 + }, + { + "epoch": 0.4, + "learning_rate": 6.842623521988122e-06, + "loss": 0.5143, + "step": 3179 + }, + { + "epoch": 0.4, + "learning_rate": 6.8407360230158475e-06, + "loss": 0.607, + "step": 3180 + }, + { + "epoch": 0.4, + "learning_rate": 6.838848220552404e-06, + "loss": 0.6231, + "step": 3181 + }, + { + "epoch": 0.4, + "learning_rate": 6.836960114909046e-06, + "loss": 0.4879, + "step": 3182 + }, + { + "epoch": 0.4, + "learning_rate": 6.835071706397069e-06, + "loss": 0.5478, + "step": 3183 + }, + { + "epoch": 0.4, + "learning_rate": 6.833182995327829e-06, + "loss": 0.5537, + "step": 3184 + }, + { + "epoch": 0.4, + "learning_rate": 6.831293982012728e-06, + "loss": 0.5376, + "step": 3185 + }, + { + "epoch": 0.4, + "learning_rate": 6.829404666763212e-06, + "loss": 0.532, + "step": 3186 + }, + { + "epoch": 0.4, + "learning_rate": 6.827515049890784e-06, + "loss": 0.6427, + "step": 3187 + }, + { + "epoch": 0.4, + "learning_rate": 6.825625131706996e-06, + "loss": 0.5566, + "step": 3188 + }, + { + "epoch": 0.4, + "learning_rate": 6.823734912523445e-06, + "loss": 0.5231, + "step": 3189 + }, + { + "epoch": 0.4, + "learning_rate": 6.821844392651783e-06, + "loss": 0.5258, + "step": 3190 + }, + { + "epoch": 0.4, + "learning_rate": 6.819953572403709e-06, + "loss": 0.5549, + "step": 3191 + }, + { + "epoch": 0.4, + "learning_rate": 6.81806245209097e-06, + "loss": 0.5523, + "step": 3192 + }, + { + "epoch": 0.4, + "learning_rate": 6.816171032025367e-06, + "loss": 0.5213, + "step": 3193 + }, + { + "epoch": 0.4, + "learning_rate": 6.8142793125187455e-06, + "loss": 0.5673, + "step": 3194 + }, + { + "epoch": 0.4, + "learning_rate": 6.812387293883004e-06, + "loss": 0.2096, + "step": 3195 + }, + { + "epoch": 0.4, + "learning_rate": 6.810494976430088e-06, + "loss": 0.5883, + "step": 3196 + }, + { + "epoch": 0.4, + "learning_rate": 6.808602360471994e-06, + "loss": 0.5204, + "step": 3197 + }, + { + "epoch": 0.4, + "learning_rate": 6.806709446320765e-06, + "loss": 0.5135, + "step": 3198 + }, + { + "epoch": 0.4, + "learning_rate": 6.8048162342884984e-06, + "loss": 0.5698, + "step": 3199 + }, + { + "epoch": 0.4, + "learning_rate": 6.802922724687333e-06, + "loss": 0.5996, + "step": 3200 + }, + { + "epoch": 0.4, + "learning_rate": 6.801028917829464e-06, + "loss": 0.5595, + "step": 3201 + }, + { + "epoch": 0.4, + "learning_rate": 6.799134814027133e-06, + "loss": 0.4918, + "step": 3202 + }, + { + "epoch": 0.4, + "learning_rate": 6.797240413592627e-06, + "loss": 0.5833, + "step": 3203 + }, + { + "epoch": 0.4, + "learning_rate": 6.795345716838288e-06, + "loss": 0.2081, + "step": 3204 + }, + { + "epoch": 0.4, + "learning_rate": 6.793450724076505e-06, + "loss": 0.5122, + "step": 3205 + }, + { + "epoch": 0.4, + "learning_rate": 6.79155543561971e-06, + "loss": 0.5606, + "step": 3206 + }, + { + "epoch": 0.4, + "learning_rate": 6.789659851780392e-06, + "loss": 0.5617, + "step": 3207 + }, + { + "epoch": 0.4, + "learning_rate": 6.787763972871087e-06, + "loss": 0.4872, + "step": 3208 + }, + { + "epoch": 0.4, + "learning_rate": 6.785867799204372e-06, + "loss": 0.5072, + "step": 3209 + }, + { + "epoch": 0.4, + "learning_rate": 6.783971331092885e-06, + "loss": 0.5288, + "step": 3210 + }, + { + "epoch": 0.4, + "learning_rate": 6.7820745688492995e-06, + "loss": 0.6018, + "step": 3211 + }, + { + "epoch": 0.4, + "learning_rate": 6.780177512786349e-06, + "loss": 0.542, + "step": 3212 + }, + { + "epoch": 0.4, + "learning_rate": 6.778280163216808e-06, + "loss": 0.5564, + "step": 3213 + }, + { + "epoch": 0.4, + "learning_rate": 6.776382520453504e-06, + "loss": 0.617, + "step": 3214 + }, + { + "epoch": 0.4, + "learning_rate": 6.774484584809308e-06, + "loss": 0.6122, + "step": 3215 + }, + { + "epoch": 0.4, + "learning_rate": 6.772586356597144e-06, + "loss": 0.5224, + "step": 3216 + }, + { + "epoch": 0.4, + "learning_rate": 6.77068783612998e-06, + "loss": 0.5634, + "step": 3217 + }, + { + "epoch": 0.4, + "learning_rate": 6.768789023720838e-06, + "loss": 0.5113, + "step": 3218 + }, + { + "epoch": 0.4, + "learning_rate": 6.76688991968278e-06, + "loss": 0.5465, + "step": 3219 + }, + { + "epoch": 0.4, + "learning_rate": 6.764990524328923e-06, + "loss": 0.5386, + "step": 3220 + }, + { + "epoch": 0.4, + "learning_rate": 6.7630908379724305e-06, + "loss": 0.5932, + "step": 3221 + }, + { + "epoch": 0.4, + "learning_rate": 6.761190860926511e-06, + "loss": 0.5504, + "step": 3222 + }, + { + "epoch": 0.4, + "learning_rate": 6.759290593504424e-06, + "loss": 0.5689, + "step": 3223 + }, + { + "epoch": 0.4, + "learning_rate": 6.757390036019478e-06, + "loss": 0.5406, + "step": 3224 + }, + { + "epoch": 0.4, + "learning_rate": 6.755489188785023e-06, + "loss": 0.5491, + "step": 3225 + }, + { + "epoch": 0.4, + "learning_rate": 6.753588052114463e-06, + "loss": 0.5428, + "step": 3226 + }, + { + "epoch": 0.4, + "learning_rate": 6.75168662632125e-06, + "loss": 0.4898, + "step": 3227 + }, + { + "epoch": 0.4, + "learning_rate": 6.749784911718877e-06, + "loss": 0.5766, + "step": 3228 + }, + { + "epoch": 0.4, + "learning_rate": 6.747882908620893e-06, + "loss": 0.5676, + "step": 3229 + }, + { + "epoch": 0.4, + "learning_rate": 6.745980617340888e-06, + "loss": 0.6216, + "step": 3230 + }, + { + "epoch": 0.41, + "learning_rate": 6.744078038192503e-06, + "loss": 0.5391, + "step": 3231 + }, + { + "epoch": 0.41, + "learning_rate": 6.742175171489426e-06, + "loss": 0.5326, + "step": 3232 + }, + { + "epoch": 0.41, + "learning_rate": 6.74027201754539e-06, + "loss": 0.514, + "step": 3233 + }, + { + "epoch": 0.41, + "learning_rate": 6.7383685766741816e-06, + "loss": 0.6741, + "step": 3234 + }, + { + "epoch": 0.41, + "learning_rate": 6.736464849189625e-06, + "loss": 0.4867, + "step": 3235 + }, + { + "epoch": 0.41, + "learning_rate": 6.734560835405599e-06, + "loss": 0.5426, + "step": 3236 + }, + { + "epoch": 0.41, + "learning_rate": 6.732656535636031e-06, + "loss": 0.5838, + "step": 3237 + }, + { + "epoch": 0.41, + "learning_rate": 6.730751950194888e-06, + "loss": 0.5626, + "step": 3238 + }, + { + "epoch": 0.41, + "learning_rate": 6.72884707939619e-06, + "loss": 0.5913, + "step": 3239 + }, + { + "epoch": 0.41, + "learning_rate": 6.726941923554002e-06, + "loss": 0.6091, + "step": 3240 + }, + { + "epoch": 0.41, + "learning_rate": 6.725036482982439e-06, + "loss": 0.5127, + "step": 3241 + }, + { + "epoch": 0.41, + "learning_rate": 6.723130757995655e-06, + "loss": 0.5952, + "step": 3242 + }, + { + "epoch": 0.41, + "learning_rate": 6.72122474890786e-06, + "loss": 0.5609, + "step": 3243 + }, + { + "epoch": 0.41, + "learning_rate": 6.719318456033307e-06, + "loss": 0.6602, + "step": 3244 + }, + { + "epoch": 0.41, + "learning_rate": 6.717411879686296e-06, + "loss": 0.5692, + "step": 3245 + }, + { + "epoch": 0.41, + "learning_rate": 6.71550502018117e-06, + "loss": 0.5406, + "step": 3246 + }, + { + "epoch": 0.41, + "learning_rate": 6.713597877832328e-06, + "loss": 0.5194, + "step": 3247 + }, + { + "epoch": 0.41, + "learning_rate": 6.711690452954205e-06, + "loss": 0.5258, + "step": 3248 + }, + { + "epoch": 0.41, + "learning_rate": 6.7097827458612895e-06, + "loss": 0.5562, + "step": 3249 + }, + { + "epoch": 0.41, + "learning_rate": 6.707874756868116e-06, + "loss": 0.5406, + "step": 3250 + }, + { + "epoch": 0.41, + "learning_rate": 6.7059664862892605e-06, + "loss": 0.572, + "step": 3251 + }, + { + "epoch": 0.41, + "learning_rate": 6.704057934439351e-06, + "loss": 0.5924, + "step": 3252 + }, + { + "epoch": 0.41, + "learning_rate": 6.702149101633061e-06, + "loss": 0.577, + "step": 3253 + }, + { + "epoch": 0.41, + "learning_rate": 6.700239988185107e-06, + "loss": 0.4917, + "step": 3254 + }, + { + "epoch": 0.41, + "learning_rate": 6.6983305944102525e-06, + "loss": 0.5681, + "step": 3255 + }, + { + "epoch": 0.41, + "learning_rate": 6.696420920623314e-06, + "loss": 0.6027, + "step": 3256 + }, + { + "epoch": 0.41, + "learning_rate": 6.694510967139144e-06, + "loss": 0.5418, + "step": 3257 + }, + { + "epoch": 0.41, + "learning_rate": 6.692600734272648e-06, + "loss": 0.5146, + "step": 3258 + }, + { + "epoch": 0.41, + "learning_rate": 6.690690222338774e-06, + "loss": 0.5561, + "step": 3259 + }, + { + "epoch": 0.41, + "learning_rate": 6.688779431652518e-06, + "loss": 0.4885, + "step": 3260 + }, + { + "epoch": 0.41, + "learning_rate": 6.6868683625289235e-06, + "loss": 0.5272, + "step": 3261 + }, + { + "epoch": 0.41, + "learning_rate": 6.684957015283073e-06, + "loss": 0.4833, + "step": 3262 + }, + { + "epoch": 0.41, + "learning_rate": 6.683045390230105e-06, + "loss": 0.5718, + "step": 3263 + }, + { + "epoch": 0.41, + "learning_rate": 6.681133487685197e-06, + "loss": 0.5452, + "step": 3264 + }, + { + "epoch": 0.41, + "learning_rate": 6.679221307963572e-06, + "loss": 0.5463, + "step": 3265 + }, + { + "epoch": 0.41, + "learning_rate": 6.677308851380502e-06, + "loss": 0.5007, + "step": 3266 + }, + { + "epoch": 0.41, + "learning_rate": 6.6753961182513005e-06, + "loss": 0.4984, + "step": 3267 + }, + { + "epoch": 0.41, + "learning_rate": 6.673483108891332e-06, + "loss": 0.5511, + "step": 3268 + }, + { + "epoch": 0.41, + "learning_rate": 6.671569823616004e-06, + "loss": 0.543, + "step": 3269 + }, + { + "epoch": 0.41, + "learning_rate": 6.6696562627407665e-06, + "loss": 0.5257, + "step": 3270 + }, + { + "epoch": 0.41, + "learning_rate": 6.667742426581121e-06, + "loss": 0.527, + "step": 3271 + }, + { + "epoch": 0.41, + "learning_rate": 6.66582831545261e-06, + "loss": 0.5748, + "step": 3272 + }, + { + "epoch": 0.41, + "learning_rate": 6.663913929670819e-06, + "loss": 0.5516, + "step": 3273 + }, + { + "epoch": 0.41, + "learning_rate": 6.661999269551387e-06, + "loss": 0.5676, + "step": 3274 + }, + { + "epoch": 0.41, + "learning_rate": 6.660084335409991e-06, + "loss": 0.5187, + "step": 3275 + }, + { + "epoch": 0.41, + "learning_rate": 6.658169127562354e-06, + "loss": 0.5072, + "step": 3276 + }, + { + "epoch": 0.41, + "learning_rate": 6.65625364632425e-06, + "loss": 0.632, + "step": 3277 + }, + { + "epoch": 0.41, + "learning_rate": 6.65433789201149e-06, + "loss": 0.4748, + "step": 3278 + }, + { + "epoch": 0.41, + "learning_rate": 6.652421864939935e-06, + "loss": 0.2251, + "step": 3279 + }, + { + "epoch": 0.41, + "learning_rate": 6.650505565425492e-06, + "loss": 0.5827, + "step": 3280 + }, + { + "epoch": 0.41, + "learning_rate": 6.6485889937841056e-06, + "loss": 0.5354, + "step": 3281 + }, + { + "epoch": 0.41, + "learning_rate": 6.6466721503317746e-06, + "loss": 0.5976, + "step": 3282 + }, + { + "epoch": 0.41, + "learning_rate": 6.644755035384537e-06, + "loss": 0.6106, + "step": 3283 + }, + { + "epoch": 0.41, + "learning_rate": 6.642837649258478e-06, + "loss": 0.5921, + "step": 3284 + }, + { + "epoch": 0.41, + "learning_rate": 6.640919992269724e-06, + "loss": 0.566, + "step": 3285 + }, + { + "epoch": 0.41, + "learning_rate": 6.639002064734451e-06, + "loss": 0.5836, + "step": 3286 + }, + { + "epoch": 0.41, + "learning_rate": 6.637083866968875e-06, + "loss": 0.5187, + "step": 3287 + }, + { + "epoch": 0.41, + "learning_rate": 6.635165399289262e-06, + "loss": 0.5198, + "step": 3288 + }, + { + "epoch": 0.41, + "learning_rate": 6.633246662011915e-06, + "loss": 0.5415, + "step": 3289 + }, + { + "epoch": 0.41, + "learning_rate": 6.631327655453188e-06, + "loss": 0.5357, + "step": 3290 + }, + { + "epoch": 0.41, + "learning_rate": 6.6294083799294785e-06, + "loss": 0.5674, + "step": 3291 + }, + { + "epoch": 0.41, + "learning_rate": 6.627488835757222e-06, + "loss": 0.5208, + "step": 3292 + }, + { + "epoch": 0.41, + "learning_rate": 6.625569023252907e-06, + "loss": 0.5256, + "step": 3293 + }, + { + "epoch": 0.41, + "learning_rate": 6.623648942733063e-06, + "loss": 0.5626, + "step": 3294 + }, + { + "epoch": 0.41, + "learning_rate": 6.62172859451426e-06, + "loss": 0.5985, + "step": 3295 + }, + { + "epoch": 0.41, + "learning_rate": 6.619807978913117e-06, + "loss": 0.6029, + "step": 3296 + }, + { + "epoch": 0.41, + "learning_rate": 6.617887096246295e-06, + "loss": 0.5246, + "step": 3297 + }, + { + "epoch": 0.41, + "learning_rate": 6.6159659468305e-06, + "loss": 0.5821, + "step": 3298 + }, + { + "epoch": 0.41, + "learning_rate": 6.61404453098248e-06, + "loss": 0.5389, + "step": 3299 + }, + { + "epoch": 0.41, + "learning_rate": 6.61212284901903e-06, + "loss": 0.4952, + "step": 3300 + }, + { + "epoch": 0.41, + "learning_rate": 6.610200901256987e-06, + "loss": 0.5935, + "step": 3301 + }, + { + "epoch": 0.41, + "learning_rate": 6.608278688013229e-06, + "loss": 0.5566, + "step": 3302 + }, + { + "epoch": 0.41, + "learning_rate": 6.606356209604683e-06, + "loss": 0.5838, + "step": 3303 + }, + { + "epoch": 0.41, + "learning_rate": 6.604433466348319e-06, + "loss": 0.5359, + "step": 3304 + }, + { + "epoch": 0.41, + "learning_rate": 6.602510458561145e-06, + "loss": 0.5535, + "step": 3305 + }, + { + "epoch": 0.41, + "learning_rate": 6.6005871865602215e-06, + "loss": 0.5937, + "step": 3306 + }, + { + "epoch": 0.41, + "learning_rate": 6.598663650662644e-06, + "loss": 0.4932, + "step": 3307 + }, + { + "epoch": 0.41, + "learning_rate": 6.596739851185557e-06, + "loss": 0.5959, + "step": 3308 + }, + { + "epoch": 0.41, + "learning_rate": 6.594815788446146e-06, + "loss": 0.6097, + "step": 3309 + }, + { + "epoch": 0.41, + "learning_rate": 6.592891462761642e-06, + "loss": 0.5742, + "step": 3310 + }, + { + "epoch": 0.42, + "learning_rate": 6.5909668744493145e-06, + "loss": 0.5393, + "step": 3311 + }, + { + "epoch": 0.42, + "learning_rate": 6.589042023826485e-06, + "loss": 0.5461, + "step": 3312 + }, + { + "epoch": 0.42, + "learning_rate": 6.58711691121051e-06, + "loss": 0.568, + "step": 3313 + }, + { + "epoch": 0.42, + "learning_rate": 6.585191536918791e-06, + "loss": 0.5767, + "step": 3314 + }, + { + "epoch": 0.42, + "learning_rate": 6.583265901268777e-06, + "loss": 0.2386, + "step": 3315 + }, + { + "epoch": 0.42, + "learning_rate": 6.581340004577954e-06, + "loss": 0.4849, + "step": 3316 + }, + { + "epoch": 0.42, + "learning_rate": 6.579413847163858e-06, + "loss": 0.5593, + "step": 3317 + }, + { + "epoch": 0.42, + "learning_rate": 6.577487429344061e-06, + "loss": 0.5394, + "step": 3318 + }, + { + "epoch": 0.42, + "learning_rate": 6.5755607514361786e-06, + "loss": 0.5725, + "step": 3319 + }, + { + "epoch": 0.42, + "learning_rate": 6.573633813757878e-06, + "loss": 0.5744, + "step": 3320 + }, + { + "epoch": 0.42, + "learning_rate": 6.571706616626857e-06, + "loss": 0.4831, + "step": 3321 + }, + { + "epoch": 0.42, + "learning_rate": 6.569779160360865e-06, + "loss": 0.5681, + "step": 3322 + }, + { + "epoch": 0.42, + "learning_rate": 6.567851445277692e-06, + "loss": 0.5392, + "step": 3323 + }, + { + "epoch": 0.42, + "learning_rate": 6.565923471695167e-06, + "loss": 0.5763, + "step": 3324 + }, + { + "epoch": 0.42, + "learning_rate": 6.5639952399311666e-06, + "loss": 0.4958, + "step": 3325 + }, + { + "epoch": 0.42, + "learning_rate": 6.562066750303607e-06, + "loss": 0.6041, + "step": 3326 + }, + { + "epoch": 0.42, + "learning_rate": 6.560138003130447e-06, + "loss": 0.5557, + "step": 3327 + }, + { + "epoch": 0.42, + "learning_rate": 6.558208998729692e-06, + "loss": 0.5537, + "step": 3328 + }, + { + "epoch": 0.42, + "learning_rate": 6.5562797374193795e-06, + "loss": 0.5494, + "step": 3329 + }, + { + "epoch": 0.42, + "learning_rate": 6.5543502195176034e-06, + "loss": 0.5918, + "step": 3330 + }, + { + "epoch": 0.42, + "learning_rate": 6.55242044534249e-06, + "loss": 0.5904, + "step": 3331 + }, + { + "epoch": 0.42, + "learning_rate": 6.5504904152122096e-06, + "loss": 0.4966, + "step": 3332 + }, + { + "epoch": 0.42, + "learning_rate": 6.5485601294449786e-06, + "loss": 0.5212, + "step": 3333 + }, + { + "epoch": 0.42, + "learning_rate": 6.546629588359048e-06, + "loss": 0.5357, + "step": 3334 + }, + { + "epoch": 0.42, + "learning_rate": 6.54469879227272e-06, + "loss": 0.5263, + "step": 3335 + }, + { + "epoch": 0.42, + "learning_rate": 6.542767741504334e-06, + "loss": 0.579, + "step": 3336 + }, + { + "epoch": 0.42, + "learning_rate": 6.540836436372267e-06, + "loss": 0.5306, + "step": 3337 + }, + { + "epoch": 0.42, + "learning_rate": 6.538904877194949e-06, + "loss": 0.5402, + "step": 3338 + }, + { + "epoch": 0.42, + "learning_rate": 6.536973064290842e-06, + "loss": 0.52, + "step": 3339 + }, + { + "epoch": 0.42, + "learning_rate": 6.535040997978455e-06, + "loss": 0.5149, + "step": 3340 + }, + { + "epoch": 0.42, + "learning_rate": 6.533108678576336e-06, + "loss": 0.2151, + "step": 3341 + }, + { + "epoch": 0.42, + "learning_rate": 6.531176106403077e-06, + "loss": 0.5527, + "step": 3342 + }, + { + "epoch": 0.42, + "learning_rate": 6.529243281777311e-06, + "loss": 0.561, + "step": 3343 + }, + { + "epoch": 0.42, + "learning_rate": 6.5273102050177104e-06, + "loss": 0.2247, + "step": 3344 + }, + { + "epoch": 0.42, + "learning_rate": 6.525376876442993e-06, + "loss": 0.5559, + "step": 3345 + }, + { + "epoch": 0.42, + "learning_rate": 6.523443296371914e-06, + "loss": 0.5352, + "step": 3346 + }, + { + "epoch": 0.42, + "learning_rate": 6.521509465123275e-06, + "loss": 0.5588, + "step": 3347 + }, + { + "epoch": 0.42, + "learning_rate": 6.519575383015914e-06, + "loss": 0.226, + "step": 3348 + }, + { + "epoch": 0.42, + "learning_rate": 6.517641050368712e-06, + "loss": 0.5529, + "step": 3349 + }, + { + "epoch": 0.42, + "learning_rate": 6.515706467500596e-06, + "loss": 0.5259, + "step": 3350 + }, + { + "epoch": 0.42, + "learning_rate": 6.513771634730526e-06, + "loss": 0.5789, + "step": 3351 + }, + { + "epoch": 0.42, + "learning_rate": 6.511836552377509e-06, + "loss": 0.5657, + "step": 3352 + }, + { + "epoch": 0.42, + "learning_rate": 6.509901220760591e-06, + "loss": 0.5735, + "step": 3353 + }, + { + "epoch": 0.42, + "learning_rate": 6.50796564019886e-06, + "loss": 0.5671, + "step": 3354 + }, + { + "epoch": 0.42, + "learning_rate": 6.506029811011444e-06, + "loss": 0.5988, + "step": 3355 + }, + { + "epoch": 0.42, + "learning_rate": 6.5040937335175136e-06, + "loss": 0.5253, + "step": 3356 + }, + { + "epoch": 0.42, + "learning_rate": 6.502157408036279e-06, + "loss": 0.5239, + "step": 3357 + }, + { + "epoch": 0.42, + "learning_rate": 6.500220834886993e-06, + "loss": 0.5891, + "step": 3358 + }, + { + "epoch": 0.42, + "learning_rate": 6.498284014388944e-06, + "loss": 0.5516, + "step": 3359 + }, + { + "epoch": 0.42, + "learning_rate": 6.4963469468614696e-06, + "loss": 0.5599, + "step": 3360 + }, + { + "epoch": 0.42, + "learning_rate": 6.494409632623942e-06, + "loss": 0.4914, + "step": 3361 + }, + { + "epoch": 0.42, + "learning_rate": 6.492472071995772e-06, + "loss": 0.4921, + "step": 3362 + }, + { + "epoch": 0.42, + "learning_rate": 6.490534265296422e-06, + "loss": 0.5345, + "step": 3363 + }, + { + "epoch": 0.42, + "learning_rate": 6.4885962128453815e-06, + "loss": 0.4995, + "step": 3364 + }, + { + "epoch": 0.42, + "learning_rate": 6.486657914962189e-06, + "loss": 0.5167, + "step": 3365 + }, + { + "epoch": 0.42, + "learning_rate": 6.484719371966424e-06, + "loss": 0.5176, + "step": 3366 + }, + { + "epoch": 0.42, + "learning_rate": 6.482780584177697e-06, + "loss": 0.5579, + "step": 3367 + }, + { + "epoch": 0.42, + "learning_rate": 6.480841551915671e-06, + "loss": 0.558, + "step": 3368 + }, + { + "epoch": 0.42, + "learning_rate": 6.478902275500042e-06, + "loss": 0.5147, + "step": 3369 + }, + { + "epoch": 0.42, + "learning_rate": 6.476962755250547e-06, + "loss": 0.5529, + "step": 3370 + }, + { + "epoch": 0.42, + "learning_rate": 6.475022991486966e-06, + "loss": 0.4901, + "step": 3371 + }, + { + "epoch": 0.42, + "learning_rate": 6.473082984529115e-06, + "loss": 0.5455, + "step": 3372 + }, + { + "epoch": 0.42, + "learning_rate": 6.471142734696855e-06, + "loss": 0.5433, + "step": 3373 + }, + { + "epoch": 0.42, + "learning_rate": 6.4692022423100845e-06, + "loss": 0.5833, + "step": 3374 + }, + { + "epoch": 0.42, + "learning_rate": 6.467261507688739e-06, + "loss": 0.6399, + "step": 3375 + }, + { + "epoch": 0.42, + "learning_rate": 6.4653205311528e-06, + "loss": 0.549, + "step": 3376 + }, + { + "epoch": 0.42, + "learning_rate": 6.463379313022284e-06, + "loss": 0.5258, + "step": 3377 + }, + { + "epoch": 0.42, + "learning_rate": 6.4614378536172495e-06, + "loss": 0.5452, + "step": 3378 + }, + { + "epoch": 0.42, + "learning_rate": 6.459496153257796e-06, + "loss": 0.6043, + "step": 3379 + }, + { + "epoch": 0.42, + "learning_rate": 6.457554212264059e-06, + "loss": 0.5063, + "step": 3380 + }, + { + "epoch": 0.42, + "learning_rate": 6.455612030956217e-06, + "loss": 0.5649, + "step": 3381 + }, + { + "epoch": 0.42, + "learning_rate": 6.453669609654487e-06, + "loss": 0.5377, + "step": 3382 + }, + { + "epoch": 0.42, + "learning_rate": 6.451726948679123e-06, + "loss": 0.6297, + "step": 3383 + }, + { + "epoch": 0.42, + "learning_rate": 6.449784048350426e-06, + "loss": 0.5735, + "step": 3384 + }, + { + "epoch": 0.42, + "learning_rate": 6.447840908988727e-06, + "loss": 0.5836, + "step": 3385 + }, + { + "epoch": 0.42, + "learning_rate": 6.445897530914403e-06, + "loss": 0.5948, + "step": 3386 + }, + { + "epoch": 0.42, + "learning_rate": 6.4439539144478695e-06, + "loss": 0.2073, + "step": 3387 + }, + { + "epoch": 0.42, + "learning_rate": 6.442010059909576e-06, + "loss": 0.5174, + "step": 3388 + }, + { + "epoch": 0.42, + "learning_rate": 6.440065967620018e-06, + "loss": 0.5201, + "step": 3389 + }, + { + "epoch": 0.42, + "learning_rate": 6.438121637899729e-06, + "loss": 0.5975, + "step": 3390 + }, + { + "epoch": 0.43, + "learning_rate": 6.436177071069277e-06, + "loss": 0.5314, + "step": 3391 + }, + { + "epoch": 0.43, + "learning_rate": 6.434232267449275e-06, + "loss": 0.5416, + "step": 3392 + }, + { + "epoch": 0.43, + "learning_rate": 6.43228722736037e-06, + "loss": 0.464, + "step": 3393 + }, + { + "epoch": 0.43, + "learning_rate": 6.430341951123251e-06, + "loss": 0.1982, + "step": 3394 + }, + { + "epoch": 0.43, + "learning_rate": 6.4283964390586465e-06, + "loss": 0.5093, + "step": 3395 + }, + { + "epoch": 0.43, + "learning_rate": 6.42645069148732e-06, + "loss": 0.4799, + "step": 3396 + }, + { + "epoch": 0.43, + "learning_rate": 6.4245047087300795e-06, + "loss": 0.5023, + "step": 3397 + }, + { + "epoch": 0.43, + "learning_rate": 6.4225584911077684e-06, + "loss": 0.5528, + "step": 3398 + }, + { + "epoch": 0.43, + "learning_rate": 6.420612038941266e-06, + "loss": 0.6513, + "step": 3399 + }, + { + "epoch": 0.43, + "learning_rate": 6.418665352551496e-06, + "loss": 0.5462, + "step": 3400 + }, + { + "epoch": 0.43, + "learning_rate": 6.416718432259419e-06, + "loss": 0.5737, + "step": 3401 + }, + { + "epoch": 0.43, + "learning_rate": 6.4147712783860295e-06, + "loss": 0.5685, + "step": 3402 + }, + { + "epoch": 0.43, + "learning_rate": 6.412823891252369e-06, + "loss": 0.5485, + "step": 3403 + }, + { + "epoch": 0.43, + "learning_rate": 6.410876271179508e-06, + "loss": 0.5455, + "step": 3404 + }, + { + "epoch": 0.43, + "learning_rate": 6.408928418488564e-06, + "loss": 0.5574, + "step": 3405 + }, + { + "epoch": 0.43, + "learning_rate": 6.406980333500687e-06, + "loss": 0.5321, + "step": 3406 + }, + { + "epoch": 0.43, + "learning_rate": 6.405032016537069e-06, + "loss": 0.627, + "step": 3407 + }, + { + "epoch": 0.43, + "learning_rate": 6.403083467918936e-06, + "loss": 0.5475, + "step": 3408 + }, + { + "epoch": 0.43, + "learning_rate": 6.401134687967556e-06, + "loss": 0.5318, + "step": 3409 + }, + { + "epoch": 0.43, + "learning_rate": 6.399185677004236e-06, + "loss": 0.5619, + "step": 3410 + }, + { + "epoch": 0.43, + "learning_rate": 6.3972364353503155e-06, + "loss": 0.59, + "step": 3411 + }, + { + "epoch": 0.43, + "learning_rate": 6.395286963327177e-06, + "loss": 0.6453, + "step": 3412 + }, + { + "epoch": 0.43, + "learning_rate": 6.393337261256241e-06, + "loss": 0.5511, + "step": 3413 + }, + { + "epoch": 0.43, + "learning_rate": 6.3913873294589635e-06, + "loss": 0.5541, + "step": 3414 + }, + { + "epoch": 0.43, + "learning_rate": 6.389437168256838e-06, + "loss": 0.5665, + "step": 3415 + }, + { + "epoch": 0.43, + "learning_rate": 6.387486777971398e-06, + "loss": 0.5776, + "step": 3416 + }, + { + "epoch": 0.43, + "learning_rate": 6.385536158924214e-06, + "loss": 0.4942, + "step": 3417 + }, + { + "epoch": 0.43, + "learning_rate": 6.383585311436893e-06, + "loss": 0.5367, + "step": 3418 + }, + { + "epoch": 0.43, + "learning_rate": 6.381634235831083e-06, + "loss": 0.5034, + "step": 3419 + }, + { + "epoch": 0.43, + "learning_rate": 6.379682932428466e-06, + "loss": 0.566, + "step": 3420 + }, + { + "epoch": 0.43, + "learning_rate": 6.377731401550763e-06, + "loss": 0.464, + "step": 3421 + }, + { + "epoch": 0.43, + "learning_rate": 6.375779643519734e-06, + "loss": 0.4747, + "step": 3422 + }, + { + "epoch": 0.43, + "learning_rate": 6.373827658657172e-06, + "loss": 0.5496, + "step": 3423 + }, + { + "epoch": 0.43, + "learning_rate": 6.371875447284912e-06, + "loss": 0.5717, + "step": 3424 + }, + { + "epoch": 0.43, + "learning_rate": 6.369923009724825e-06, + "loss": 0.4868, + "step": 3425 + }, + { + "epoch": 0.43, + "learning_rate": 6.3679703462988185e-06, + "loss": 0.5482, + "step": 3426 + }, + { + "epoch": 0.43, + "learning_rate": 6.366017457328838e-06, + "loss": 0.5397, + "step": 3427 + }, + { + "epoch": 0.43, + "learning_rate": 6.364064343136865e-06, + "loss": 0.5656, + "step": 3428 + }, + { + "epoch": 0.43, + "learning_rate": 6.36211100404492e-06, + "loss": 0.574, + "step": 3429 + }, + { + "epoch": 0.43, + "learning_rate": 6.360157440375059e-06, + "loss": 0.535, + "step": 3430 + }, + { + "epoch": 0.43, + "learning_rate": 6.358203652449375e-06, + "loss": 0.5818, + "step": 3431 + }, + { + "epoch": 0.43, + "learning_rate": 6.356249640589999e-06, + "loss": 0.2229, + "step": 3432 + }, + { + "epoch": 0.43, + "learning_rate": 6.354295405119101e-06, + "loss": 0.5899, + "step": 3433 + }, + { + "epoch": 0.43, + "learning_rate": 6.352340946358881e-06, + "loss": 0.5597, + "step": 3434 + }, + { + "epoch": 0.43, + "learning_rate": 6.350386264631584e-06, + "loss": 0.6004, + "step": 3435 + }, + { + "epoch": 0.43, + "learning_rate": 6.348431360259485e-06, + "loss": 0.5832, + "step": 3436 + }, + { + "epoch": 0.43, + "learning_rate": 6.3464762335649e-06, + "loss": 0.495, + "step": 3437 + }, + { + "epoch": 0.43, + "learning_rate": 6.344520884870182e-06, + "loss": 0.4964, + "step": 3438 + }, + { + "epoch": 0.43, + "learning_rate": 6.342565314497716e-06, + "loss": 0.554, + "step": 3439 + }, + { + "epoch": 0.43, + "learning_rate": 6.340609522769927e-06, + "loss": 0.5872, + "step": 3440 + }, + { + "epoch": 0.43, + "learning_rate": 6.338653510009279e-06, + "loss": 0.5675, + "step": 3441 + }, + { + "epoch": 0.43, + "learning_rate": 6.336697276538266e-06, + "loss": 0.5024, + "step": 3442 + }, + { + "epoch": 0.43, + "learning_rate": 6.3347408226794235e-06, + "loss": 0.5124, + "step": 3443 + }, + { + "epoch": 0.43, + "learning_rate": 6.33278414875532e-06, + "loss": 0.5552, + "step": 3444 + }, + { + "epoch": 0.43, + "learning_rate": 6.330827255088563e-06, + "loss": 0.5276, + "step": 3445 + }, + { + "epoch": 0.43, + "learning_rate": 6.3288701420017965e-06, + "loss": 0.5659, + "step": 3446 + }, + { + "epoch": 0.43, + "learning_rate": 6.326912809817696e-06, + "loss": 0.5497, + "step": 3447 + }, + { + "epoch": 0.43, + "learning_rate": 6.324955258858978e-06, + "loss": 0.5297, + "step": 3448 + }, + { + "epoch": 0.43, + "learning_rate": 6.322997489448397e-06, + "loss": 0.5074, + "step": 3449 + }, + { + "epoch": 0.43, + "learning_rate": 6.321039501908734e-06, + "loss": 0.5348, + "step": 3450 + }, + { + "epoch": 0.43, + "learning_rate": 6.319081296562817e-06, + "loss": 0.5268, + "step": 3451 + }, + { + "epoch": 0.43, + "learning_rate": 6.317122873733501e-06, + "loss": 0.5183, + "step": 3452 + }, + { + "epoch": 0.43, + "learning_rate": 6.315164233743683e-06, + "loss": 0.5621, + "step": 3453 + }, + { + "epoch": 0.43, + "learning_rate": 6.313205376916295e-06, + "loss": 0.5799, + "step": 3454 + }, + { + "epoch": 0.43, + "learning_rate": 6.3112463035743e-06, + "loss": 0.5495, + "step": 3455 + }, + { + "epoch": 0.43, + "learning_rate": 6.309287014040701e-06, + "loss": 0.5167, + "step": 3456 + }, + { + "epoch": 0.43, + "learning_rate": 6.307327508638539e-06, + "loss": 0.5171, + "step": 3457 + }, + { + "epoch": 0.43, + "learning_rate": 6.3053677876908815e-06, + "loss": 0.2324, + "step": 3458 + }, + { + "epoch": 0.43, + "learning_rate": 6.303407851520841e-06, + "loss": 0.579, + "step": 3459 + }, + { + "epoch": 0.43, + "learning_rate": 6.301447700451562e-06, + "loss": 0.5508, + "step": 3460 + }, + { + "epoch": 0.43, + "learning_rate": 6.2994873348062225e-06, + "loss": 0.5345, + "step": 3461 + }, + { + "epoch": 0.43, + "learning_rate": 6.297526754908038e-06, + "loss": 0.5389, + "step": 3462 + }, + { + "epoch": 0.43, + "learning_rate": 6.29556596108026e-06, + "loss": 0.5369, + "step": 3463 + }, + { + "epoch": 0.43, + "learning_rate": 6.293604953646173e-06, + "loss": 0.5395, + "step": 3464 + }, + { + "epoch": 0.43, + "learning_rate": 6.2916437329290984e-06, + "loss": 0.6148, + "step": 3465 + }, + { + "epoch": 0.43, + "learning_rate": 6.289682299252391e-06, + "loss": 0.5469, + "step": 3466 + }, + { + "epoch": 0.43, + "learning_rate": 6.287720652939445e-06, + "loss": 0.2062, + "step": 3467 + }, + { + "epoch": 0.43, + "learning_rate": 6.2857587943136845e-06, + "loss": 0.5286, + "step": 3468 + }, + { + "epoch": 0.43, + "learning_rate": 6.28379672369857e-06, + "loss": 0.5676, + "step": 3469 + }, + { + "epoch": 0.43, + "learning_rate": 6.2818344414176e-06, + "loss": 0.5288, + "step": 3470 + }, + { + "epoch": 0.44, + "learning_rate": 6.279871947794305e-06, + "loss": 0.4957, + "step": 3471 + }, + { + "epoch": 0.44, + "learning_rate": 6.2779092431522485e-06, + "loss": 0.5465, + "step": 3472 + }, + { + "epoch": 0.44, + "learning_rate": 6.275946327815035e-06, + "loss": 0.5421, + "step": 3473 + }, + { + "epoch": 0.44, + "learning_rate": 6.273983202106296e-06, + "loss": 0.6028, + "step": 3474 + }, + { + "epoch": 0.44, + "learning_rate": 6.272019866349705e-06, + "loss": 0.5341, + "step": 3475 + }, + { + "epoch": 0.44, + "learning_rate": 6.2700563208689654e-06, + "loss": 0.2558, + "step": 3476 + }, + { + "epoch": 0.44, + "learning_rate": 6.268092565987816e-06, + "loss": 0.481, + "step": 3477 + }, + { + "epoch": 0.44, + "learning_rate": 6.266128602030033e-06, + "loss": 0.5255, + "step": 3478 + }, + { + "epoch": 0.44, + "learning_rate": 6.264164429319422e-06, + "loss": 0.5331, + "step": 3479 + }, + { + "epoch": 0.44, + "learning_rate": 6.262200048179825e-06, + "loss": 0.5044, + "step": 3480 + }, + { + "epoch": 0.44, + "learning_rate": 6.260235458935123e-06, + "loss": 0.5397, + "step": 3481 + }, + { + "epoch": 0.44, + "learning_rate": 6.258270661909225e-06, + "loss": 0.5199, + "step": 3482 + }, + { + "epoch": 0.44, + "learning_rate": 6.256305657426078e-06, + "loss": 0.6046, + "step": 3483 + }, + { + "epoch": 0.44, + "learning_rate": 6.25434044580966e-06, + "loss": 0.2374, + "step": 3484 + }, + { + "epoch": 0.44, + "learning_rate": 6.252375027383987e-06, + "loss": 0.5123, + "step": 3485 + }, + { + "epoch": 0.44, + "learning_rate": 6.2504094024731055e-06, + "loss": 0.4773, + "step": 3486 + }, + { + "epoch": 0.44, + "learning_rate": 6.2484435714011e-06, + "loss": 0.5716, + "step": 3487 + }, + { + "epoch": 0.44, + "learning_rate": 6.246477534492082e-06, + "loss": 0.2519, + "step": 3488 + }, + { + "epoch": 0.44, + "learning_rate": 6.244511292070209e-06, + "loss": 0.5147, + "step": 3489 + }, + { + "epoch": 0.44, + "learning_rate": 6.242544844459658e-06, + "loss": 0.6232, + "step": 3490 + }, + { + "epoch": 0.44, + "learning_rate": 6.240578191984652e-06, + "loss": 0.5545, + "step": 3491 + }, + { + "epoch": 0.44, + "learning_rate": 6.238611334969437e-06, + "loss": 0.51, + "step": 3492 + }, + { + "epoch": 0.44, + "learning_rate": 6.236644273738305e-06, + "loss": 0.5228, + "step": 3493 + }, + { + "epoch": 0.44, + "learning_rate": 6.23467700861557e-06, + "loss": 0.6029, + "step": 3494 + }, + { + "epoch": 0.44, + "learning_rate": 6.232709539925588e-06, + "loss": 0.4983, + "step": 3495 + }, + { + "epoch": 0.44, + "learning_rate": 6.230741867992742e-06, + "loss": 0.547, + "step": 3496 + }, + { + "epoch": 0.44, + "learning_rate": 6.228773993141455e-06, + "loss": 0.567, + "step": 3497 + }, + { + "epoch": 0.44, + "learning_rate": 6.226805915696177e-06, + "loss": 0.5107, + "step": 3498 + }, + { + "epoch": 0.44, + "learning_rate": 6.224837635981396e-06, + "loss": 0.5711, + "step": 3499 + }, + { + "epoch": 0.44, + "learning_rate": 6.2228691543216335e-06, + "loss": 0.5384, + "step": 3500 + }, + { + "epoch": 0.44, + "learning_rate": 6.22090047104144e-06, + "loss": 0.5257, + "step": 3501 + }, + { + "epoch": 0.44, + "learning_rate": 6.218931586465403e-06, + "loss": 0.5902, + "step": 3502 + }, + { + "epoch": 0.44, + "learning_rate": 6.216962500918144e-06, + "loss": 0.6361, + "step": 3503 + }, + { + "epoch": 0.44, + "learning_rate": 6.214993214724312e-06, + "loss": 0.5154, + "step": 3504 + }, + { + "epoch": 0.44, + "learning_rate": 6.2130237282085956e-06, + "loss": 0.4929, + "step": 3505 + }, + { + "epoch": 0.44, + "learning_rate": 6.211054041695713e-06, + "loss": 0.546, + "step": 3506 + }, + { + "epoch": 0.44, + "learning_rate": 6.209084155510417e-06, + "loss": 0.5717, + "step": 3507 + }, + { + "epoch": 0.44, + "learning_rate": 6.207114069977492e-06, + "loss": 0.5505, + "step": 3508 + }, + { + "epoch": 0.44, + "learning_rate": 6.205143785421752e-06, + "loss": 0.5457, + "step": 3509 + }, + { + "epoch": 0.44, + "learning_rate": 6.203173302168054e-06, + "loss": 0.5572, + "step": 3510 + }, + { + "epoch": 0.44, + "learning_rate": 6.201202620541276e-06, + "loss": 0.5281, + "step": 3511 + }, + { + "epoch": 0.44, + "learning_rate": 6.199231740866336e-06, + "loss": 0.5368, + "step": 3512 + }, + { + "epoch": 0.44, + "learning_rate": 6.197260663468183e-06, + "loss": 0.4927, + "step": 3513 + }, + { + "epoch": 0.44, + "learning_rate": 6.195289388671797e-06, + "loss": 0.515, + "step": 3514 + }, + { + "epoch": 0.44, + "learning_rate": 6.1933179168021925e-06, + "loss": 0.5341, + "step": 3515 + }, + { + "epoch": 0.44, + "learning_rate": 6.191346248184416e-06, + "loss": 0.5663, + "step": 3516 + }, + { + "epoch": 0.44, + "learning_rate": 6.189374383143546e-06, + "loss": 0.5218, + "step": 3517 + }, + { + "epoch": 0.44, + "learning_rate": 6.187402322004692e-06, + "loss": 0.5417, + "step": 3518 + }, + { + "epoch": 0.44, + "learning_rate": 6.185430065093e-06, + "loss": 0.5156, + "step": 3519 + }, + { + "epoch": 0.44, + "learning_rate": 6.183457612733643e-06, + "loss": 0.5523, + "step": 3520 + }, + { + "epoch": 0.44, + "learning_rate": 6.18148496525183e-06, + "loss": 0.5377, + "step": 3521 + }, + { + "epoch": 0.44, + "learning_rate": 6.179512122972802e-06, + "loss": 0.5466, + "step": 3522 + }, + { + "epoch": 0.44, + "learning_rate": 6.1775390862218306e-06, + "loss": 0.4974, + "step": 3523 + }, + { + "epoch": 0.44, + "learning_rate": 6.175565855324221e-06, + "loss": 0.5481, + "step": 3524 + }, + { + "epoch": 0.44, + "learning_rate": 6.173592430605306e-06, + "loss": 0.5233, + "step": 3525 + }, + { + "epoch": 0.44, + "learning_rate": 6.171618812390457e-06, + "loss": 0.5085, + "step": 3526 + }, + { + "epoch": 0.44, + "learning_rate": 6.169645001005073e-06, + "loss": 0.5581, + "step": 3527 + }, + { + "epoch": 0.44, + "learning_rate": 6.1676709967745876e-06, + "loss": 0.5305, + "step": 3528 + }, + { + "epoch": 0.44, + "learning_rate": 6.165696800024462e-06, + "loss": 0.5035, + "step": 3529 + }, + { + "epoch": 0.44, + "learning_rate": 6.163722411080193e-06, + "loss": 0.5689, + "step": 3530 + }, + { + "epoch": 0.44, + "learning_rate": 6.161747830267307e-06, + "loss": 0.5445, + "step": 3531 + }, + { + "epoch": 0.44, + "learning_rate": 6.159773057911365e-06, + "loss": 0.5447, + "step": 3532 + }, + { + "epoch": 0.44, + "learning_rate": 6.157798094337953e-06, + "loss": 0.5318, + "step": 3533 + }, + { + "epoch": 0.44, + "learning_rate": 6.155822939872699e-06, + "loss": 0.5693, + "step": 3534 + }, + { + "epoch": 0.44, + "learning_rate": 6.153847594841251e-06, + "loss": 0.5516, + "step": 3535 + }, + { + "epoch": 0.44, + "learning_rate": 6.151872059569296e-06, + "loss": 0.5754, + "step": 3536 + }, + { + "epoch": 0.44, + "learning_rate": 6.149896334382551e-06, + "loss": 0.5447, + "step": 3537 + }, + { + "epoch": 0.44, + "learning_rate": 6.147920419606762e-06, + "loss": 0.6087, + "step": 3538 + }, + { + "epoch": 0.44, + "learning_rate": 6.145944315567709e-06, + "loss": 0.5751, + "step": 3539 + }, + { + "epoch": 0.44, + "learning_rate": 6.143968022591201e-06, + "loss": 0.5802, + "step": 3540 + }, + { + "epoch": 0.44, + "learning_rate": 6.141991541003079e-06, + "loss": 0.559, + "step": 3541 + }, + { + "epoch": 0.44, + "learning_rate": 6.140014871129217e-06, + "loss": 0.5912, + "step": 3542 + }, + { + "epoch": 0.44, + "learning_rate": 6.1380380132955155e-06, + "loss": 0.6152, + "step": 3543 + }, + { + "epoch": 0.44, + "learning_rate": 6.13606096782791e-06, + "loss": 0.552, + "step": 3544 + }, + { + "epoch": 0.44, + "learning_rate": 6.134083735052367e-06, + "loss": 0.5296, + "step": 3545 + }, + { + "epoch": 0.44, + "learning_rate": 6.132106315294881e-06, + "loss": 0.5246, + "step": 3546 + }, + { + "epoch": 0.44, + "learning_rate": 6.130128708881478e-06, + "loss": 0.5842, + "step": 3547 + }, + { + "epoch": 0.44, + "learning_rate": 6.128150916138219e-06, + "loss": 0.5292, + "step": 3548 + }, + { + "epoch": 0.44, + "learning_rate": 6.126172937391188e-06, + "loss": 0.5642, + "step": 3549 + }, + { + "epoch": 0.44, + "learning_rate": 6.1241947729665085e-06, + "loss": 0.6068, + "step": 3550 + }, + { + "epoch": 0.45, + "learning_rate": 6.122216423190326e-06, + "loss": 0.4699, + "step": 3551 + }, + { + "epoch": 0.45, + "learning_rate": 6.120237888388823e-06, + "loss": 0.5743, + "step": 3552 + }, + { + "epoch": 0.45, + "learning_rate": 6.118259168888212e-06, + "loss": 0.5277, + "step": 3553 + }, + { + "epoch": 0.45, + "learning_rate": 6.11628026501473e-06, + "loss": 0.5439, + "step": 3554 + }, + { + "epoch": 0.45, + "learning_rate": 6.114301177094651e-06, + "loss": 0.5495, + "step": 3555 + }, + { + "epoch": 0.45, + "learning_rate": 6.112321905454278e-06, + "loss": 0.5482, + "step": 3556 + }, + { + "epoch": 0.45, + "learning_rate": 6.1103424504199405e-06, + "loss": 0.509, + "step": 3557 + }, + { + "epoch": 0.45, + "learning_rate": 6.1083628123180025e-06, + "loss": 0.5777, + "step": 3558 + }, + { + "epoch": 0.45, + "learning_rate": 6.106382991474856e-06, + "loss": 0.5546, + "step": 3559 + }, + { + "epoch": 0.45, + "learning_rate": 6.104402988216926e-06, + "loss": 0.5325, + "step": 3560 + }, + { + "epoch": 0.45, + "learning_rate": 6.102422802870662e-06, + "loss": 0.5478, + "step": 3561 + }, + { + "epoch": 0.45, + "learning_rate": 6.100442435762548e-06, + "loss": 0.6033, + "step": 3562 + }, + { + "epoch": 0.45, + "learning_rate": 6.098461887219097e-06, + "loss": 0.5695, + "step": 3563 + }, + { + "epoch": 0.45, + "learning_rate": 6.096481157566854e-06, + "loss": 0.5838, + "step": 3564 + }, + { + "epoch": 0.45, + "learning_rate": 6.0945002471323874e-06, + "loss": 0.5866, + "step": 3565 + }, + { + "epoch": 0.45, + "learning_rate": 6.092519156242303e-06, + "loss": 0.5852, + "step": 3566 + }, + { + "epoch": 0.45, + "learning_rate": 6.090537885223232e-06, + "loss": 0.5772, + "step": 3567 + }, + { + "epoch": 0.45, + "learning_rate": 6.0885564344018356e-06, + "loss": 0.4847, + "step": 3568 + }, + { + "epoch": 0.45, + "learning_rate": 6.086574804104806e-06, + "loss": 0.5711, + "step": 3569 + }, + { + "epoch": 0.45, + "learning_rate": 6.084592994658862e-06, + "loss": 0.596, + "step": 3570 + }, + { + "epoch": 0.45, + "learning_rate": 6.082611006390759e-06, + "loss": 0.6046, + "step": 3571 + }, + { + "epoch": 0.45, + "learning_rate": 6.080628839627273e-06, + "loss": 0.5026, + "step": 3572 + }, + { + "epoch": 0.45, + "learning_rate": 6.078646494695215e-06, + "loss": 0.228, + "step": 3573 + }, + { + "epoch": 0.45, + "learning_rate": 6.076663971921422e-06, + "loss": 0.5868, + "step": 3574 + }, + { + "epoch": 0.45, + "learning_rate": 6.074681271632767e-06, + "loss": 0.5611, + "step": 3575 + }, + { + "epoch": 0.45, + "learning_rate": 6.072698394156143e-06, + "loss": 0.5224, + "step": 3576 + }, + { + "epoch": 0.45, + "learning_rate": 6.0707153398184795e-06, + "loss": 0.5508, + "step": 3577 + }, + { + "epoch": 0.45, + "learning_rate": 6.06873210894673e-06, + "loss": 0.5668, + "step": 3578 + }, + { + "epoch": 0.45, + "learning_rate": 6.066748701867882e-06, + "loss": 0.2483, + "step": 3579 + }, + { + "epoch": 0.45, + "learning_rate": 6.064765118908947e-06, + "loss": 0.5486, + "step": 3580 + }, + { + "epoch": 0.45, + "learning_rate": 6.062781360396969e-06, + "loss": 0.5728, + "step": 3581 + }, + { + "epoch": 0.45, + "learning_rate": 6.060797426659021e-06, + "loss": 0.525, + "step": 3582 + }, + { + "epoch": 0.45, + "learning_rate": 6.058813318022203e-06, + "loss": 0.5107, + "step": 3583 + }, + { + "epoch": 0.45, + "learning_rate": 6.056829034813645e-06, + "loss": 0.6229, + "step": 3584 + }, + { + "epoch": 0.45, + "learning_rate": 6.054844577360506e-06, + "loss": 0.5405, + "step": 3585 + }, + { + "epoch": 0.45, + "learning_rate": 6.0528599459899706e-06, + "loss": 0.5518, + "step": 3586 + }, + { + "epoch": 0.45, + "learning_rate": 6.050875141029258e-06, + "loss": 0.5048, + "step": 3587 + }, + { + "epoch": 0.45, + "learning_rate": 6.048890162805612e-06, + "loss": 0.5209, + "step": 3588 + }, + { + "epoch": 0.45, + "learning_rate": 6.046905011646303e-06, + "loss": 0.5783, + "step": 3589 + }, + { + "epoch": 0.45, + "learning_rate": 6.044919687878637e-06, + "loss": 0.2171, + "step": 3590 + }, + { + "epoch": 0.45, + "learning_rate": 6.042934191829941e-06, + "loss": 0.5492, + "step": 3591 + }, + { + "epoch": 0.45, + "learning_rate": 6.040948523827573e-06, + "loss": 0.5474, + "step": 3592 + }, + { + "epoch": 0.45, + "learning_rate": 6.038962684198924e-06, + "loss": 0.5049, + "step": 3593 + }, + { + "epoch": 0.45, + "learning_rate": 6.036976673271403e-06, + "loss": 0.5745, + "step": 3594 + }, + { + "epoch": 0.45, + "learning_rate": 6.034990491372457e-06, + "loss": 0.5293, + "step": 3595 + }, + { + "epoch": 0.45, + "learning_rate": 6.033004138829558e-06, + "loss": 0.5459, + "step": 3596 + }, + { + "epoch": 0.45, + "learning_rate": 6.031017615970204e-06, + "loss": 0.4982, + "step": 3597 + }, + { + "epoch": 0.45, + "learning_rate": 6.0290309231219235e-06, + "loss": 0.5326, + "step": 3598 + }, + { + "epoch": 0.45, + "learning_rate": 6.027044060612273e-06, + "loss": 0.6273, + "step": 3599 + }, + { + "epoch": 0.45, + "learning_rate": 6.0250570287688345e-06, + "loss": 0.6441, + "step": 3600 + }, + { + "epoch": 0.45, + "learning_rate": 6.023069827919222e-06, + "loss": 0.5242, + "step": 3601 + }, + { + "epoch": 0.45, + "learning_rate": 6.021082458391072e-06, + "loss": 0.5739, + "step": 3602 + }, + { + "epoch": 0.45, + "learning_rate": 6.019094920512054e-06, + "loss": 0.5223, + "step": 3603 + }, + { + "epoch": 0.45, + "learning_rate": 6.017107214609864e-06, + "loss": 0.4948, + "step": 3604 + }, + { + "epoch": 0.45, + "learning_rate": 6.015119341012222e-06, + "loss": 0.2198, + "step": 3605 + }, + { + "epoch": 0.45, + "learning_rate": 6.0131313000468804e-06, + "loss": 0.5013, + "step": 3606 + }, + { + "epoch": 0.45, + "learning_rate": 6.011143092041617e-06, + "loss": 0.6051, + "step": 3607 + }, + { + "epoch": 0.45, + "learning_rate": 6.009154717324237e-06, + "loss": 0.5755, + "step": 3608 + }, + { + "epoch": 0.45, + "learning_rate": 6.007166176222574e-06, + "loss": 0.53, + "step": 3609 + }, + { + "epoch": 0.45, + "learning_rate": 6.005177469064488e-06, + "loss": 0.5468, + "step": 3610 + }, + { + "epoch": 0.45, + "learning_rate": 6.003188596177867e-06, + "loss": 0.5139, + "step": 3611 + }, + { + "epoch": 0.45, + "learning_rate": 6.0011995578906266e-06, + "loss": 0.6151, + "step": 3612 + }, + { + "epoch": 0.45, + "learning_rate": 5.999210354530709e-06, + "loss": 0.4929, + "step": 3613 + }, + { + "epoch": 0.45, + "learning_rate": 5.9972209864260835e-06, + "loss": 0.4926, + "step": 3614 + }, + { + "epoch": 0.45, + "learning_rate": 5.9952314539047475e-06, + "loss": 0.5369, + "step": 3615 + }, + { + "epoch": 0.45, + "learning_rate": 5.9932417572947245e-06, + "loss": 0.5474, + "step": 3616 + }, + { + "epoch": 0.45, + "learning_rate": 5.991251896924068e-06, + "loss": 0.5408, + "step": 3617 + }, + { + "epoch": 0.45, + "learning_rate": 5.989261873120851e-06, + "loss": 0.542, + "step": 3618 + }, + { + "epoch": 0.45, + "learning_rate": 5.9872716862131815e-06, + "loss": 0.5753, + "step": 3619 + }, + { + "epoch": 0.45, + "learning_rate": 5.985281336529194e-06, + "loss": 0.238, + "step": 3620 + }, + { + "epoch": 0.45, + "learning_rate": 5.983290824397042e-06, + "loss": 0.579, + "step": 3621 + }, + { + "epoch": 0.45, + "learning_rate": 5.981300150144914e-06, + "loss": 0.5402, + "step": 3622 + }, + { + "epoch": 0.45, + "learning_rate": 5.9793093141010224e-06, + "loss": 0.5528, + "step": 3623 + }, + { + "epoch": 0.45, + "learning_rate": 5.977318316593604e-06, + "loss": 0.6446, + "step": 3624 + }, + { + "epoch": 0.45, + "learning_rate": 5.975327157950928e-06, + "loss": 0.5864, + "step": 3625 + }, + { + "epoch": 0.45, + "learning_rate": 5.9733358385012815e-06, + "loss": 0.4981, + "step": 3626 + }, + { + "epoch": 0.45, + "learning_rate": 5.9713443585729865e-06, + "loss": 0.5104, + "step": 3627 + }, + { + "epoch": 0.45, + "learning_rate": 5.9693527184943875e-06, + "loss": 0.2453, + "step": 3628 + }, + { + "epoch": 0.45, + "learning_rate": 5.967360918593856e-06, + "loss": 0.5042, + "step": 3629 + }, + { + "epoch": 0.46, + "learning_rate": 5.96536895919979e-06, + "loss": 0.5668, + "step": 3630 + }, + { + "epoch": 0.46, + "learning_rate": 5.963376840640612e-06, + "loss": 0.5703, + "step": 3631 + }, + { + "epoch": 0.46, + "learning_rate": 5.961384563244775e-06, + "loss": 0.5608, + "step": 3632 + }, + { + "epoch": 0.46, + "learning_rate": 5.959392127340753e-06, + "loss": 0.5461, + "step": 3633 + }, + { + "epoch": 0.46, + "learning_rate": 5.95739953325705e-06, + "loss": 0.5323, + "step": 3634 + }, + { + "epoch": 0.46, + "learning_rate": 5.955406781322195e-06, + "loss": 0.5371, + "step": 3635 + }, + { + "epoch": 0.46, + "learning_rate": 5.9534138718647415e-06, + "loss": 0.5641, + "step": 3636 + }, + { + "epoch": 0.46, + "learning_rate": 5.951420805213271e-06, + "loss": 0.5393, + "step": 3637 + }, + { + "epoch": 0.46, + "learning_rate": 5.949427581696391e-06, + "loss": 0.5397, + "step": 3638 + }, + { + "epoch": 0.46, + "learning_rate": 5.9474342016427336e-06, + "loss": 0.5367, + "step": 3639 + }, + { + "epoch": 0.46, + "learning_rate": 5.945440665380955e-06, + "loss": 0.5208, + "step": 3640 + }, + { + "epoch": 0.46, + "learning_rate": 5.943446973239741e-06, + "loss": 0.5586, + "step": 3641 + }, + { + "epoch": 0.46, + "learning_rate": 5.941453125547803e-06, + "loss": 0.517, + "step": 3642 + }, + { + "epoch": 0.46, + "learning_rate": 5.939459122633875e-06, + "loss": 0.5668, + "step": 3643 + }, + { + "epoch": 0.46, + "learning_rate": 5.9374649648267176e-06, + "loss": 0.4875, + "step": 3644 + }, + { + "epoch": 0.46, + "learning_rate": 5.935470652455117e-06, + "loss": 0.4632, + "step": 3645 + }, + { + "epoch": 0.46, + "learning_rate": 5.933476185847885e-06, + "loss": 0.5817, + "step": 3646 + }, + { + "epoch": 0.46, + "learning_rate": 5.931481565333862e-06, + "loss": 0.5451, + "step": 3647 + }, + { + "epoch": 0.46, + "learning_rate": 5.929486791241907e-06, + "loss": 0.4918, + "step": 3648 + }, + { + "epoch": 0.46, + "learning_rate": 5.927491863900911e-06, + "loss": 0.5282, + "step": 3649 + }, + { + "epoch": 0.46, + "learning_rate": 5.925496783639788e-06, + "loss": 0.5863, + "step": 3650 + }, + { + "epoch": 0.46, + "learning_rate": 5.923501550787473e-06, + "loss": 0.5872, + "step": 3651 + }, + { + "epoch": 0.46, + "learning_rate": 5.921506165672933e-06, + "loss": 0.5771, + "step": 3652 + }, + { + "epoch": 0.46, + "learning_rate": 5.919510628625156e-06, + "loss": 0.5478, + "step": 3653 + }, + { + "epoch": 0.46, + "learning_rate": 5.917514939973154e-06, + "loss": 0.5413, + "step": 3654 + }, + { + "epoch": 0.46, + "learning_rate": 5.915519100045971e-06, + "loss": 0.5333, + "step": 3655 + }, + { + "epoch": 0.46, + "learning_rate": 5.913523109172665e-06, + "loss": 0.6122, + "step": 3656 + }, + { + "epoch": 0.46, + "learning_rate": 5.911526967682328e-06, + "loss": 0.5885, + "step": 3657 + }, + { + "epoch": 0.46, + "learning_rate": 5.909530675904075e-06, + "loss": 0.5311, + "step": 3658 + }, + { + "epoch": 0.46, + "learning_rate": 5.907534234167041e-06, + "loss": 0.5535, + "step": 3659 + }, + { + "epoch": 0.46, + "learning_rate": 5.9055376428003915e-06, + "loss": 0.5642, + "step": 3660 + }, + { + "epoch": 0.46, + "learning_rate": 5.903540902133313e-06, + "loss": 0.5378, + "step": 3661 + }, + { + "epoch": 0.46, + "learning_rate": 5.901544012495017e-06, + "loss": 0.5435, + "step": 3662 + }, + { + "epoch": 0.46, + "learning_rate": 5.899546974214745e-06, + "loss": 0.5337, + "step": 3663 + }, + { + "epoch": 0.46, + "learning_rate": 5.897549787621753e-06, + "loss": 0.4827, + "step": 3664 + }, + { + "epoch": 0.46, + "learning_rate": 5.895552453045329e-06, + "loss": 0.5568, + "step": 3665 + }, + { + "epoch": 0.46, + "learning_rate": 5.893554970814785e-06, + "loss": 0.5272, + "step": 3666 + }, + { + "epoch": 0.46, + "learning_rate": 5.891557341259454e-06, + "loss": 0.5069, + "step": 3667 + }, + { + "epoch": 0.46, + "learning_rate": 5.889559564708697e-06, + "loss": 0.2508, + "step": 3668 + }, + { + "epoch": 0.46, + "learning_rate": 5.887561641491893e-06, + "loss": 0.5667, + "step": 3669 + }, + { + "epoch": 0.46, + "learning_rate": 5.885563571938453e-06, + "loss": 0.5802, + "step": 3670 + }, + { + "epoch": 0.46, + "learning_rate": 5.883565356377807e-06, + "loss": 0.518, + "step": 3671 + }, + { + "epoch": 0.46, + "learning_rate": 5.881566995139412e-06, + "loss": 0.2032, + "step": 3672 + }, + { + "epoch": 0.46, + "learning_rate": 5.8795684885527445e-06, + "loss": 0.5268, + "step": 3673 + }, + { + "epoch": 0.46, + "learning_rate": 5.877569836947312e-06, + "loss": 0.5796, + "step": 3674 + }, + { + "epoch": 0.46, + "learning_rate": 5.87557104065264e-06, + "loss": 0.5772, + "step": 3675 + }, + { + "epoch": 0.46, + "learning_rate": 5.8735720999982805e-06, + "loss": 0.4954, + "step": 3676 + }, + { + "epoch": 0.46, + "learning_rate": 5.871573015313807e-06, + "loss": 0.5524, + "step": 3677 + }, + { + "epoch": 0.46, + "learning_rate": 5.869573786928821e-06, + "loss": 0.5434, + "step": 3678 + }, + { + "epoch": 0.46, + "learning_rate": 5.867574415172943e-06, + "loss": 0.525, + "step": 3679 + }, + { + "epoch": 0.46, + "learning_rate": 5.865574900375819e-06, + "loss": 0.5858, + "step": 3680 + }, + { + "epoch": 0.46, + "learning_rate": 5.86357524286712e-06, + "loss": 0.5973, + "step": 3681 + }, + { + "epoch": 0.46, + "learning_rate": 5.8615754429765405e-06, + "loss": 0.2441, + "step": 3682 + }, + { + "epoch": 0.46, + "learning_rate": 5.859575501033794e-06, + "loss": 0.5077, + "step": 3683 + }, + { + "epoch": 0.46, + "learning_rate": 5.8575754173686225e-06, + "loss": 0.5727, + "step": 3684 + }, + { + "epoch": 0.46, + "learning_rate": 5.855575192310792e-06, + "loss": 0.5108, + "step": 3685 + }, + { + "epoch": 0.46, + "learning_rate": 5.8535748261900845e-06, + "loss": 0.5234, + "step": 3686 + }, + { + "epoch": 0.46, + "learning_rate": 5.8515743193363125e-06, + "loss": 0.5639, + "step": 3687 + }, + { + "epoch": 0.46, + "learning_rate": 5.849573672079312e-06, + "loss": 0.2083, + "step": 3688 + }, + { + "epoch": 0.46, + "learning_rate": 5.8475728847489356e-06, + "loss": 0.5835, + "step": 3689 + }, + { + "epoch": 0.46, + "learning_rate": 5.8455719576750635e-06, + "loss": 0.5208, + "step": 3690 + }, + { + "epoch": 0.46, + "learning_rate": 5.8435708911876e-06, + "loss": 0.5925, + "step": 3691 + }, + { + "epoch": 0.46, + "learning_rate": 5.841569685616472e-06, + "loss": 0.5713, + "step": 3692 + }, + { + "epoch": 0.46, + "learning_rate": 5.8395683412916235e-06, + "loss": 0.5396, + "step": 3693 + }, + { + "epoch": 0.46, + "learning_rate": 5.837566858543029e-06, + "loss": 0.523, + "step": 3694 + }, + { + "epoch": 0.46, + "learning_rate": 5.835565237700683e-06, + "loss": 0.5192, + "step": 3695 + }, + { + "epoch": 0.46, + "learning_rate": 5.833563479094601e-06, + "loss": 0.5686, + "step": 3696 + }, + { + "epoch": 0.46, + "learning_rate": 5.8315615830548245e-06, + "loss": 0.6077, + "step": 3697 + }, + { + "epoch": 0.46, + "learning_rate": 5.829559549911414e-06, + "loss": 0.4868, + "step": 3698 + }, + { + "epoch": 0.46, + "learning_rate": 5.827557379994456e-06, + "loss": 0.4824, + "step": 3699 + }, + { + "epoch": 0.46, + "learning_rate": 5.825555073634059e-06, + "loss": 0.4562, + "step": 3700 + }, + { + "epoch": 0.46, + "learning_rate": 5.8235526311603495e-06, + "loss": 0.5691, + "step": 3701 + }, + { + "epoch": 0.46, + "learning_rate": 5.8215500529034826e-06, + "loss": 0.5362, + "step": 3702 + }, + { + "epoch": 0.46, + "learning_rate": 5.819547339193634e-06, + "loss": 0.4822, + "step": 3703 + }, + { + "epoch": 0.46, + "learning_rate": 5.817544490360998e-06, + "loss": 0.4883, + "step": 3704 + }, + { + "epoch": 0.46, + "learning_rate": 5.8155415067357956e-06, + "loss": 0.5794, + "step": 3705 + }, + { + "epoch": 0.46, + "learning_rate": 5.81353838864827e-06, + "loss": 0.5301, + "step": 3706 + }, + { + "epoch": 0.46, + "learning_rate": 5.811535136428683e-06, + "loss": 0.5298, + "step": 3707 + }, + { + "epoch": 0.46, + "learning_rate": 5.809531750407322e-06, + "loss": 0.5553, + "step": 3708 + }, + { + "epoch": 0.46, + "learning_rate": 5.807528230914493e-06, + "loss": 0.5599, + "step": 3709 + }, + { + "epoch": 0.47, + "learning_rate": 5.805524578280528e-06, + "loss": 0.5119, + "step": 3710 + }, + { + "epoch": 0.47, + "learning_rate": 5.80352079283578e-06, + "loss": 0.4845, + "step": 3711 + }, + { + "epoch": 0.47, + "learning_rate": 5.80151687491062e-06, + "loss": 0.5366, + "step": 3712 + }, + { + "epoch": 0.47, + "learning_rate": 5.799512824835445e-06, + "loss": 0.5355, + "step": 3713 + }, + { + "epoch": 0.47, + "learning_rate": 5.797508642940675e-06, + "loss": 0.527, + "step": 3714 + }, + { + "epoch": 0.47, + "learning_rate": 5.795504329556745e-06, + "loss": 0.511, + "step": 3715 + }, + { + "epoch": 0.47, + "learning_rate": 5.7934998850141175e-06, + "loss": 0.5538, + "step": 3716 + }, + { + "epoch": 0.47, + "learning_rate": 5.791495309643278e-06, + "loss": 0.543, + "step": 3717 + }, + { + "epoch": 0.47, + "learning_rate": 5.789490603774725e-06, + "loss": 0.5207, + "step": 3718 + }, + { + "epoch": 0.47, + "learning_rate": 5.787485767738992e-06, + "loss": 0.5294, + "step": 3719 + }, + { + "epoch": 0.47, + "learning_rate": 5.785480801866619e-06, + "loss": 0.2249, + "step": 3720 + }, + { + "epoch": 0.47, + "learning_rate": 5.783475706488176e-06, + "loss": 0.5357, + "step": 3721 + }, + { + "epoch": 0.47, + "learning_rate": 5.781470481934256e-06, + "loss": 0.5501, + "step": 3722 + }, + { + "epoch": 0.47, + "learning_rate": 5.779465128535467e-06, + "loss": 0.5301, + "step": 3723 + }, + { + "epoch": 0.47, + "learning_rate": 5.777459646622443e-06, + "loss": 0.5388, + "step": 3724 + }, + { + "epoch": 0.47, + "learning_rate": 5.775454036525839e-06, + "loss": 0.4872, + "step": 3725 + }, + { + "epoch": 0.47, + "learning_rate": 5.7734482985763264e-06, + "loss": 0.5535, + "step": 3726 + }, + { + "epoch": 0.47, + "learning_rate": 5.771442433104604e-06, + "loss": 0.5706, + "step": 3727 + }, + { + "epoch": 0.47, + "learning_rate": 5.769436440441385e-06, + "loss": 0.5204, + "step": 3728 + }, + { + "epoch": 0.47, + "learning_rate": 5.767430320917411e-06, + "loss": 0.5706, + "step": 3729 + }, + { + "epoch": 0.47, + "learning_rate": 5.765424074863441e-06, + "loss": 0.57, + "step": 3730 + }, + { + "epoch": 0.47, + "learning_rate": 5.7634177026102515e-06, + "loss": 0.5575, + "step": 3731 + }, + { + "epoch": 0.47, + "learning_rate": 5.761411204488644e-06, + "loss": 0.6016, + "step": 3732 + }, + { + "epoch": 0.47, + "learning_rate": 5.7594045808294405e-06, + "loss": 0.5551, + "step": 3733 + }, + { + "epoch": 0.47, + "learning_rate": 5.757397831963483e-06, + "loss": 0.501, + "step": 3734 + }, + { + "epoch": 0.47, + "learning_rate": 5.755390958221634e-06, + "loss": 0.5252, + "step": 3735 + }, + { + "epoch": 0.47, + "learning_rate": 5.753383959934775e-06, + "loss": 0.5733, + "step": 3736 + }, + { + "epoch": 0.47, + "learning_rate": 5.751376837433811e-06, + "loss": 0.5151, + "step": 3737 + }, + { + "epoch": 0.47, + "learning_rate": 5.749369591049667e-06, + "loss": 0.5331, + "step": 3738 + }, + { + "epoch": 0.47, + "learning_rate": 5.747362221113285e-06, + "loss": 0.2385, + "step": 3739 + }, + { + "epoch": 0.47, + "learning_rate": 5.745354727955632e-06, + "loss": 0.4572, + "step": 3740 + }, + { + "epoch": 0.47, + "learning_rate": 5.743347111907693e-06, + "loss": 0.5378, + "step": 3741 + }, + { + "epoch": 0.47, + "learning_rate": 5.741339373300472e-06, + "loss": 0.4789, + "step": 3742 + }, + { + "epoch": 0.47, + "learning_rate": 5.739331512464996e-06, + "loss": 0.522, + "step": 3743 + }, + { + "epoch": 0.47, + "learning_rate": 5.73732352973231e-06, + "loss": 0.6536, + "step": 3744 + }, + { + "epoch": 0.47, + "learning_rate": 5.735315425433483e-06, + "loss": 0.2069, + "step": 3745 + }, + { + "epoch": 0.47, + "learning_rate": 5.7333071998995975e-06, + "loss": 0.5468, + "step": 3746 + }, + { + "epoch": 0.47, + "learning_rate": 5.73129885346176e-06, + "loss": 0.2175, + "step": 3747 + }, + { + "epoch": 0.47, + "learning_rate": 5.729290386451098e-06, + "loss": 0.51, + "step": 3748 + }, + { + "epoch": 0.47, + "learning_rate": 5.727281799198755e-06, + "loss": 0.5232, + "step": 3749 + }, + { + "epoch": 0.47, + "learning_rate": 5.725273092035899e-06, + "loss": 0.4931, + "step": 3750 + }, + { + "epoch": 0.47, + "learning_rate": 5.723264265293714e-06, + "loss": 0.5602, + "step": 3751 + }, + { + "epoch": 0.47, + "learning_rate": 5.721255319303406e-06, + "loss": 0.6004, + "step": 3752 + }, + { + "epoch": 0.47, + "learning_rate": 5.719246254396198e-06, + "loss": 0.5423, + "step": 3753 + }, + { + "epoch": 0.47, + "learning_rate": 5.717237070903336e-06, + "loss": 0.5449, + "step": 3754 + }, + { + "epoch": 0.47, + "learning_rate": 5.715227769156085e-06, + "loss": 0.564, + "step": 3755 + }, + { + "epoch": 0.47, + "learning_rate": 5.7132183494857244e-06, + "loss": 0.5544, + "step": 3756 + }, + { + "epoch": 0.47, + "learning_rate": 5.711208812223561e-06, + "loss": 0.5388, + "step": 3757 + }, + { + "epoch": 0.47, + "learning_rate": 5.709199157700915e-06, + "loss": 0.6155, + "step": 3758 + }, + { + "epoch": 0.47, + "learning_rate": 5.70718938624913e-06, + "loss": 0.5757, + "step": 3759 + }, + { + "epoch": 0.47, + "learning_rate": 5.705179498199564e-06, + "loss": 0.5304, + "step": 3760 + }, + { + "epoch": 0.47, + "learning_rate": 5.7031694938836e-06, + "loss": 0.4494, + "step": 3761 + }, + { + "epoch": 0.47, + "learning_rate": 5.701159373632635e-06, + "loss": 0.583, + "step": 3762 + }, + { + "epoch": 0.47, + "learning_rate": 5.699149137778088e-06, + "loss": 0.4648, + "step": 3763 + }, + { + "epoch": 0.47, + "learning_rate": 5.697138786651397e-06, + "loss": 0.5335, + "step": 3764 + }, + { + "epoch": 0.47, + "learning_rate": 5.695128320584019e-06, + "loss": 0.5652, + "step": 3765 + }, + { + "epoch": 0.47, + "learning_rate": 5.693117739907428e-06, + "loss": 0.5684, + "step": 3766 + }, + { + "epoch": 0.47, + "learning_rate": 5.691107044953118e-06, + "loss": 0.5402, + "step": 3767 + }, + { + "epoch": 0.47, + "learning_rate": 5.689096236052604e-06, + "loss": 0.5369, + "step": 3768 + }, + { + "epoch": 0.47, + "learning_rate": 5.687085313537415e-06, + "loss": 0.5363, + "step": 3769 + }, + { + "epoch": 0.47, + "learning_rate": 5.6850742777391055e-06, + "loss": 0.5245, + "step": 3770 + }, + { + "epoch": 0.47, + "learning_rate": 5.683063128989242e-06, + "loss": 0.4703, + "step": 3771 + }, + { + "epoch": 0.47, + "learning_rate": 5.681051867619413e-06, + "loss": 0.5223, + "step": 3772 + }, + { + "epoch": 0.47, + "learning_rate": 5.679040493961226e-06, + "loss": 0.4537, + "step": 3773 + }, + { + "epoch": 0.47, + "learning_rate": 5.6770290083463046e-06, + "loss": 0.4775, + "step": 3774 + }, + { + "epoch": 0.47, + "learning_rate": 5.675017411106294e-06, + "loss": 0.2511, + "step": 3775 + }, + { + "epoch": 0.47, + "learning_rate": 5.673005702572855e-06, + "loss": 0.227, + "step": 3776 + }, + { + "epoch": 0.47, + "learning_rate": 5.6709938830776665e-06, + "loss": 0.5716, + "step": 3777 + }, + { + "epoch": 0.47, + "learning_rate": 5.66898195295243e-06, + "loss": 0.5445, + "step": 3778 + }, + { + "epoch": 0.47, + "learning_rate": 5.666969912528859e-06, + "loss": 0.5373, + "step": 3779 + }, + { + "epoch": 0.47, + "learning_rate": 5.664957762138692e-06, + "loss": 0.5222, + "step": 3780 + }, + { + "epoch": 0.47, + "learning_rate": 5.66294550211368e-06, + "loss": 0.5772, + "step": 3781 + }, + { + "epoch": 0.47, + "learning_rate": 5.660933132785593e-06, + "loss": 0.5689, + "step": 3782 + }, + { + "epoch": 0.47, + "learning_rate": 5.658920654486224e-06, + "loss": 0.5295, + "step": 3783 + }, + { + "epoch": 0.47, + "learning_rate": 5.656908067547376e-06, + "loss": 0.5539, + "step": 3784 + }, + { + "epoch": 0.47, + "learning_rate": 5.654895372300877e-06, + "loss": 0.5594, + "step": 3785 + }, + { + "epoch": 0.47, + "learning_rate": 5.652882569078569e-06, + "loss": 0.5838, + "step": 3786 + }, + { + "epoch": 0.47, + "learning_rate": 5.650869658212312e-06, + "loss": 0.5244, + "step": 3787 + }, + { + "epoch": 0.47, + "learning_rate": 5.648856640033984e-06, + "loss": 0.5353, + "step": 3788 + }, + { + "epoch": 0.47, + "learning_rate": 5.646843514875484e-06, + "loss": 0.4901, + "step": 3789 + }, + { + "epoch": 0.48, + "learning_rate": 5.644830283068723e-06, + "loss": 0.5016, + "step": 3790 + }, + { + "epoch": 0.48, + "learning_rate": 5.6428169449456336e-06, + "loss": 0.5212, + "step": 3791 + }, + { + "epoch": 0.48, + "learning_rate": 5.640803500838166e-06, + "loss": 0.521, + "step": 3792 + }, + { + "epoch": 0.48, + "learning_rate": 5.6387899510782826e-06, + "loss": 0.5518, + "step": 3793 + }, + { + "epoch": 0.48, + "learning_rate": 5.636776295997972e-06, + "loss": 0.5622, + "step": 3794 + }, + { + "epoch": 0.48, + "learning_rate": 5.634762535929232e-06, + "loss": 0.5708, + "step": 3795 + }, + { + "epoch": 0.48, + "learning_rate": 5.632748671204083e-06, + "loss": 0.5096, + "step": 3796 + }, + { + "epoch": 0.48, + "learning_rate": 5.630734702154558e-06, + "loss": 0.5092, + "step": 3797 + }, + { + "epoch": 0.48, + "learning_rate": 5.628720629112714e-06, + "loss": 0.5516, + "step": 3798 + }, + { + "epoch": 0.48, + "learning_rate": 5.626706452410618e-06, + "loss": 0.5517, + "step": 3799 + }, + { + "epoch": 0.48, + "learning_rate": 5.624692172380358e-06, + "loss": 0.5479, + "step": 3800 + }, + { + "epoch": 0.48, + "learning_rate": 5.62267778935404e-06, + "loss": 0.4993, + "step": 3801 + }, + { + "epoch": 0.48, + "learning_rate": 5.620663303663781e-06, + "loss": 0.546, + "step": 3802 + }, + { + "epoch": 0.48, + "learning_rate": 5.618648715641724e-06, + "loss": 0.5318, + "step": 3803 + }, + { + "epoch": 0.48, + "learning_rate": 5.616634025620021e-06, + "loss": 0.5349, + "step": 3804 + }, + { + "epoch": 0.48, + "learning_rate": 5.6146192339308445e-06, + "loss": 0.5233, + "step": 3805 + }, + { + "epoch": 0.48, + "learning_rate": 5.612604340906382e-06, + "loss": 0.5238, + "step": 3806 + }, + { + "epoch": 0.48, + "learning_rate": 5.610589346878841e-06, + "loss": 0.5837, + "step": 3807 + }, + { + "epoch": 0.48, + "learning_rate": 5.608574252180443e-06, + "loss": 0.4902, + "step": 3808 + }, + { + "epoch": 0.48, + "learning_rate": 5.606559057143426e-06, + "loss": 0.5606, + "step": 3809 + }, + { + "epoch": 0.48, + "learning_rate": 5.604543762100044e-06, + "loss": 0.52, + "step": 3810 + }, + { + "epoch": 0.48, + "learning_rate": 5.60252836738257e-06, + "loss": 0.5584, + "step": 3811 + }, + { + "epoch": 0.48, + "learning_rate": 5.600512873323292e-06, + "loss": 0.572, + "step": 3812 + }, + { + "epoch": 0.48, + "learning_rate": 5.598497280254513e-06, + "loss": 0.5247, + "step": 3813 + }, + { + "epoch": 0.48, + "learning_rate": 5.5964815885085554e-06, + "loss": 0.5891, + "step": 3814 + }, + { + "epoch": 0.48, + "learning_rate": 5.594465798417754e-06, + "loss": 0.4903, + "step": 3815 + }, + { + "epoch": 0.48, + "learning_rate": 5.592449910314466e-06, + "loss": 0.4691, + "step": 3816 + }, + { + "epoch": 0.48, + "learning_rate": 5.5904339245310555e-06, + "loss": 0.5953, + "step": 3817 + }, + { + "epoch": 0.48, + "learning_rate": 5.588417841399913e-06, + "loss": 0.5289, + "step": 3818 + }, + { + "epoch": 0.48, + "learning_rate": 5.586401661253435e-06, + "loss": 0.2362, + "step": 3819 + }, + { + "epoch": 0.48, + "learning_rate": 5.584385384424042e-06, + "loss": 0.5791, + "step": 3820 + }, + { + "epoch": 0.48, + "learning_rate": 5.5823690112441696e-06, + "loss": 0.5566, + "step": 3821 + }, + { + "epoch": 0.48, + "learning_rate": 5.580352542046263e-06, + "loss": 0.553, + "step": 3822 + }, + { + "epoch": 0.48, + "learning_rate": 5.578335977162789e-06, + "loss": 0.5575, + "step": 3823 + }, + { + "epoch": 0.48, + "learning_rate": 5.5763193169262285e-06, + "loss": 0.517, + "step": 3824 + }, + { + "epoch": 0.48, + "learning_rate": 5.5743025616690785e-06, + "loss": 0.5664, + "step": 3825 + }, + { + "epoch": 0.48, + "learning_rate": 5.57228571172385e-06, + "loss": 0.5472, + "step": 3826 + }, + { + "epoch": 0.48, + "learning_rate": 5.570268767423072e-06, + "loss": 0.4513, + "step": 3827 + }, + { + "epoch": 0.48, + "learning_rate": 5.568251729099289e-06, + "loss": 0.562, + "step": 3828 + }, + { + "epoch": 0.48, + "learning_rate": 5.566234597085058e-06, + "loss": 0.5257, + "step": 3829 + }, + { + "epoch": 0.48, + "learning_rate": 5.564217371712954e-06, + "loss": 0.5612, + "step": 3830 + }, + { + "epoch": 0.48, + "learning_rate": 5.562200053315567e-06, + "loss": 0.5077, + "step": 3831 + }, + { + "epoch": 0.48, + "learning_rate": 5.560182642225505e-06, + "loss": 0.5571, + "step": 3832 + }, + { + "epoch": 0.48, + "learning_rate": 5.558165138775382e-06, + "loss": 0.5676, + "step": 3833 + }, + { + "epoch": 0.48, + "learning_rate": 5.556147543297841e-06, + "loss": 0.5139, + "step": 3834 + }, + { + "epoch": 0.48, + "learning_rate": 5.554129856125527e-06, + "loss": 0.5314, + "step": 3835 + }, + { + "epoch": 0.48, + "learning_rate": 5.552112077591107e-06, + "loss": 0.5663, + "step": 3836 + }, + { + "epoch": 0.48, + "learning_rate": 5.550094208027266e-06, + "loss": 0.5233, + "step": 3837 + }, + { + "epoch": 0.48, + "learning_rate": 5.548076247766696e-06, + "loss": 0.5436, + "step": 3838 + }, + { + "epoch": 0.48, + "learning_rate": 5.546058197142109e-06, + "loss": 0.5618, + "step": 3839 + }, + { + "epoch": 0.48, + "learning_rate": 5.544040056486233e-06, + "loss": 0.5294, + "step": 3840 + }, + { + "epoch": 0.48, + "learning_rate": 5.542021826131805e-06, + "loss": 0.5396, + "step": 3841 + }, + { + "epoch": 0.48, + "learning_rate": 5.5400035064115844e-06, + "loss": 0.504, + "step": 3842 + }, + { + "epoch": 0.48, + "learning_rate": 5.537985097658338e-06, + "loss": 0.6125, + "step": 3843 + }, + { + "epoch": 0.48, + "learning_rate": 5.535966600204853e-06, + "loss": 0.5653, + "step": 3844 + }, + { + "epoch": 0.48, + "learning_rate": 5.533948014383928e-06, + "loss": 0.5039, + "step": 3845 + }, + { + "epoch": 0.48, + "learning_rate": 5.531929340528378e-06, + "loss": 0.5211, + "step": 3846 + }, + { + "epoch": 0.48, + "learning_rate": 5.5299105789710295e-06, + "loss": 0.53, + "step": 3847 + }, + { + "epoch": 0.48, + "learning_rate": 5.527891730044729e-06, + "loss": 0.5246, + "step": 3848 + }, + { + "epoch": 0.48, + "learning_rate": 5.525872794082331e-06, + "loss": 0.5315, + "step": 3849 + }, + { + "epoch": 0.48, + "learning_rate": 5.52385377141671e-06, + "loss": 0.5318, + "step": 3850 + }, + { + "epoch": 0.48, + "learning_rate": 5.52183466238075e-06, + "loss": 0.5208, + "step": 3851 + }, + { + "epoch": 0.48, + "learning_rate": 5.519815467307351e-06, + "loss": 0.4808, + "step": 3852 + }, + { + "epoch": 0.48, + "learning_rate": 5.51779618652943e-06, + "loss": 0.5367, + "step": 3853 + }, + { + "epoch": 0.48, + "learning_rate": 5.515776820379914e-06, + "loss": 0.4501, + "step": 3854 + }, + { + "epoch": 0.48, + "learning_rate": 5.513757369191746e-06, + "loss": 0.5767, + "step": 3855 + }, + { + "epoch": 0.48, + "learning_rate": 5.5117378332978824e-06, + "loss": 0.5488, + "step": 3856 + }, + { + "epoch": 0.48, + "learning_rate": 5.509718213031295e-06, + "loss": 0.5325, + "step": 3857 + }, + { + "epoch": 0.48, + "learning_rate": 5.507698508724969e-06, + "loss": 0.5099, + "step": 3858 + }, + { + "epoch": 0.48, + "learning_rate": 5.505678720711901e-06, + "loss": 0.5717, + "step": 3859 + }, + { + "epoch": 0.48, + "learning_rate": 5.503658849325104e-06, + "loss": 0.5454, + "step": 3860 + }, + { + "epoch": 0.48, + "learning_rate": 5.5016388948976066e-06, + "loss": 0.5583, + "step": 3861 + }, + { + "epoch": 0.48, + "learning_rate": 5.499618857762444e-06, + "loss": 0.4817, + "step": 3862 + }, + { + "epoch": 0.48, + "learning_rate": 5.497598738252673e-06, + "loss": 0.5449, + "step": 3863 + }, + { + "epoch": 0.48, + "learning_rate": 5.4955785367013615e-06, + "loss": 0.4878, + "step": 3864 + }, + { + "epoch": 0.48, + "learning_rate": 5.493558253441586e-06, + "loss": 0.5174, + "step": 3865 + }, + { + "epoch": 0.48, + "learning_rate": 5.491537888806443e-06, + "loss": 0.5416, + "step": 3866 + }, + { + "epoch": 0.48, + "learning_rate": 5.489517443129041e-06, + "loss": 0.532, + "step": 3867 + }, + { + "epoch": 0.48, + "learning_rate": 5.487496916742497e-06, + "loss": 0.5554, + "step": 3868 + }, + { + "epoch": 0.48, + "learning_rate": 5.48547630997995e-06, + "loss": 0.587, + "step": 3869 + }, + { + "epoch": 0.49, + "learning_rate": 5.483455623174544e-06, + "loss": 0.2332, + "step": 3870 + }, + { + "epoch": 0.49, + "learning_rate": 5.48143485665944e-06, + "loss": 0.4995, + "step": 3871 + }, + { + "epoch": 0.49, + "learning_rate": 5.479414010767813e-06, + "loss": 0.5792, + "step": 3872 + }, + { + "epoch": 0.49, + "learning_rate": 5.477393085832847e-06, + "loss": 0.486, + "step": 3873 + }, + { + "epoch": 0.49, + "learning_rate": 5.475372082187744e-06, + "loss": 0.531, + "step": 3874 + }, + { + "epoch": 0.49, + "learning_rate": 5.473351000165718e-06, + "loss": 0.4724, + "step": 3875 + }, + { + "epoch": 0.49, + "learning_rate": 5.471329840099992e-06, + "loss": 0.6, + "step": 3876 + }, + { + "epoch": 0.49, + "learning_rate": 5.469308602323806e-06, + "loss": 0.5518, + "step": 3877 + }, + { + "epoch": 0.49, + "learning_rate": 5.467287287170411e-06, + "loss": 0.4933, + "step": 3878 + }, + { + "epoch": 0.49, + "learning_rate": 5.46526589497307e-06, + "loss": 0.5371, + "step": 3879 + }, + { + "epoch": 0.49, + "learning_rate": 5.463244426065063e-06, + "loss": 0.527, + "step": 3880 + }, + { + "epoch": 0.49, + "learning_rate": 5.4612228807796755e-06, + "loss": 0.5034, + "step": 3881 + }, + { + "epoch": 0.49, + "learning_rate": 5.459201259450212e-06, + "loss": 0.2664, + "step": 3882 + }, + { + "epoch": 0.49, + "learning_rate": 5.457179562409988e-06, + "loss": 0.5139, + "step": 3883 + }, + { + "epoch": 0.49, + "learning_rate": 5.455157789992328e-06, + "loss": 0.5101, + "step": 3884 + }, + { + "epoch": 0.49, + "learning_rate": 5.453135942530574e-06, + "loss": 0.5241, + "step": 3885 + }, + { + "epoch": 0.49, + "learning_rate": 5.451114020358076e-06, + "loss": 0.5639, + "step": 3886 + }, + { + "epoch": 0.49, + "learning_rate": 5.449092023808198e-06, + "loss": 0.5489, + "step": 3887 + }, + { + "epoch": 0.49, + "learning_rate": 5.447069953214319e-06, + "loss": 0.5951, + "step": 3888 + }, + { + "epoch": 0.49, + "learning_rate": 5.4450478089098245e-06, + "loss": 0.5182, + "step": 3889 + }, + { + "epoch": 0.49, + "learning_rate": 5.443025591228117e-06, + "loss": 0.5292, + "step": 3890 + }, + { + "epoch": 0.49, + "learning_rate": 5.4410033005026105e-06, + "loss": 0.5666, + "step": 3891 + }, + { + "epoch": 0.49, + "learning_rate": 5.438980937066729e-06, + "loss": 0.572, + "step": 3892 + }, + { + "epoch": 0.49, + "learning_rate": 5.436958501253908e-06, + "loss": 0.5067, + "step": 3893 + }, + { + "epoch": 0.49, + "learning_rate": 5.434935993397599e-06, + "loss": 0.5441, + "step": 3894 + }, + { + "epoch": 0.49, + "learning_rate": 5.43291341383126e-06, + "loss": 0.5992, + "step": 3895 + }, + { + "epoch": 0.49, + "learning_rate": 5.430890762888367e-06, + "loss": 0.5136, + "step": 3896 + }, + { + "epoch": 0.49, + "learning_rate": 5.428868040902401e-06, + "loss": 0.5491, + "step": 3897 + }, + { + "epoch": 0.49, + "learning_rate": 5.42684524820686e-06, + "loss": 0.6113, + "step": 3898 + }, + { + "epoch": 0.49, + "learning_rate": 5.424822385135251e-06, + "loss": 0.5767, + "step": 3899 + }, + { + "epoch": 0.49, + "learning_rate": 5.422799452021094e-06, + "loss": 0.5421, + "step": 3900 + }, + { + "epoch": 0.49, + "learning_rate": 5.42077644919792e-06, + "loss": 0.5654, + "step": 3901 + }, + { + "epoch": 0.49, + "learning_rate": 5.418753376999271e-06, + "loss": 0.5204, + "step": 3902 + }, + { + "epoch": 0.49, + "learning_rate": 5.416730235758699e-06, + "loss": 0.5584, + "step": 3903 + }, + { + "epoch": 0.49, + "learning_rate": 5.414707025809772e-06, + "loss": 0.5297, + "step": 3904 + }, + { + "epoch": 0.49, + "learning_rate": 5.412683747486066e-06, + "loss": 0.5185, + "step": 3905 + }, + { + "epoch": 0.49, + "learning_rate": 5.410660401121168e-06, + "loss": 0.5454, + "step": 3906 + }, + { + "epoch": 0.49, + "learning_rate": 5.408636987048678e-06, + "loss": 0.4912, + "step": 3907 + }, + { + "epoch": 0.49, + "learning_rate": 5.406613505602204e-06, + "loss": 0.5503, + "step": 3908 + }, + { + "epoch": 0.49, + "learning_rate": 5.404589957115369e-06, + "loss": 0.5485, + "step": 3909 + }, + { + "epoch": 0.49, + "learning_rate": 5.402566341921807e-06, + "loss": 0.5452, + "step": 3910 + }, + { + "epoch": 0.49, + "learning_rate": 5.400542660355157e-06, + "loss": 0.4808, + "step": 3911 + }, + { + "epoch": 0.49, + "learning_rate": 5.398518912749078e-06, + "loss": 0.542, + "step": 3912 + }, + { + "epoch": 0.49, + "learning_rate": 5.396495099437231e-06, + "loss": 0.6043, + "step": 3913 + }, + { + "epoch": 0.49, + "learning_rate": 5.394471220753296e-06, + "loss": 0.4794, + "step": 3914 + }, + { + "epoch": 0.49, + "learning_rate": 5.392447277030956e-06, + "loss": 0.5188, + "step": 3915 + }, + { + "epoch": 0.49, + "learning_rate": 5.390423268603912e-06, + "loss": 0.5088, + "step": 3916 + }, + { + "epoch": 0.49, + "learning_rate": 5.388399195805871e-06, + "loss": 0.5619, + "step": 3917 + }, + { + "epoch": 0.49, + "learning_rate": 5.3863750589705496e-06, + "loss": 0.5298, + "step": 3918 + }, + { + "epoch": 0.49, + "learning_rate": 5.384350858431679e-06, + "loss": 0.6032, + "step": 3919 + }, + { + "epoch": 0.49, + "learning_rate": 5.382326594523001e-06, + "loss": 0.585, + "step": 3920 + }, + { + "epoch": 0.49, + "learning_rate": 5.3803022675782624e-06, + "loss": 0.5255, + "step": 3921 + }, + { + "epoch": 0.49, + "learning_rate": 5.378277877931225e-06, + "loss": 0.5129, + "step": 3922 + }, + { + "epoch": 0.49, + "learning_rate": 5.376253425915661e-06, + "loss": 0.5679, + "step": 3923 + }, + { + "epoch": 0.49, + "learning_rate": 5.37422891186535e-06, + "loss": 0.494, + "step": 3924 + }, + { + "epoch": 0.49, + "learning_rate": 5.3722043361140865e-06, + "loss": 0.4905, + "step": 3925 + }, + { + "epoch": 0.49, + "learning_rate": 5.3701796989956674e-06, + "loss": 0.5188, + "step": 3926 + }, + { + "epoch": 0.49, + "learning_rate": 5.36815500084391e-06, + "loss": 0.5515, + "step": 3927 + }, + { + "epoch": 0.49, + "learning_rate": 5.366130241992633e-06, + "loss": 0.5348, + "step": 3928 + }, + { + "epoch": 0.49, + "learning_rate": 5.3641054227756675e-06, + "loss": 0.2299, + "step": 3929 + }, + { + "epoch": 0.49, + "learning_rate": 5.362080543526857e-06, + "loss": 0.576, + "step": 3930 + }, + { + "epoch": 0.49, + "learning_rate": 5.360055604580054e-06, + "loss": 0.5502, + "step": 3931 + }, + { + "epoch": 0.49, + "learning_rate": 5.358030606269119e-06, + "loss": 0.5169, + "step": 3932 + }, + { + "epoch": 0.49, + "learning_rate": 5.356005548927923e-06, + "loss": 0.557, + "step": 3933 + }, + { + "epoch": 0.49, + "learning_rate": 5.353980432890349e-06, + "loss": 0.5363, + "step": 3934 + }, + { + "epoch": 0.49, + "learning_rate": 5.3519552584902846e-06, + "loss": 0.5224, + "step": 3935 + }, + { + "epoch": 0.49, + "learning_rate": 5.349930026061634e-06, + "loss": 0.5918, + "step": 3936 + }, + { + "epoch": 0.49, + "learning_rate": 5.347904735938303e-06, + "loss": 0.511, + "step": 3937 + }, + { + "epoch": 0.49, + "learning_rate": 5.345879388454214e-06, + "loss": 0.4891, + "step": 3938 + }, + { + "epoch": 0.49, + "learning_rate": 5.343853983943297e-06, + "loss": 0.493, + "step": 3939 + }, + { + "epoch": 0.49, + "learning_rate": 5.341828522739487e-06, + "loss": 0.5164, + "step": 3940 + }, + { + "epoch": 0.49, + "learning_rate": 5.339803005176733e-06, + "loss": 0.5323, + "step": 3941 + }, + { + "epoch": 0.49, + "learning_rate": 5.3377774315889945e-06, + "loss": 0.5323, + "step": 3942 + }, + { + "epoch": 0.49, + "learning_rate": 5.3357518023102335e-06, + "loss": 0.5096, + "step": 3943 + }, + { + "epoch": 0.49, + "learning_rate": 5.3337261176744305e-06, + "loss": 0.5339, + "step": 3944 + }, + { + "epoch": 0.49, + "learning_rate": 5.331700378015564e-06, + "loss": 0.5011, + "step": 3945 + }, + { + "epoch": 0.49, + "learning_rate": 5.329674583667632e-06, + "loss": 0.551, + "step": 3946 + }, + { + "epoch": 0.49, + "learning_rate": 5.327648734964636e-06, + "loss": 0.5332, + "step": 3947 + }, + { + "epoch": 0.49, + "learning_rate": 5.325622832240588e-06, + "loss": 0.233, + "step": 3948 + }, + { + "epoch": 0.5, + "learning_rate": 5.323596875829506e-06, + "loss": 0.5595, + "step": 3949 + }, + { + "epoch": 0.5, + "learning_rate": 5.321570866065424e-06, + "loss": 0.6219, + "step": 3950 + }, + { + "epoch": 0.5, + "learning_rate": 5.319544803282377e-06, + "loss": 0.6446, + "step": 3951 + }, + { + "epoch": 0.5, + "learning_rate": 5.317518687814413e-06, + "loss": 0.5138, + "step": 3952 + }, + { + "epoch": 0.5, + "learning_rate": 5.315492519995585e-06, + "loss": 0.5407, + "step": 3953 + }, + { + "epoch": 0.5, + "learning_rate": 5.31346630015996e-06, + "loss": 0.5743, + "step": 3954 + }, + { + "epoch": 0.5, + "learning_rate": 5.31144002864161e-06, + "loss": 0.2062, + "step": 3955 + }, + { + "epoch": 0.5, + "learning_rate": 5.309413705774616e-06, + "loss": 0.5201, + "step": 3956 + }, + { + "epoch": 0.5, + "learning_rate": 5.307387331893069e-06, + "loss": 0.5202, + "step": 3957 + }, + { + "epoch": 0.5, + "learning_rate": 5.305360907331068e-06, + "loss": 0.6003, + "step": 3958 + }, + { + "epoch": 0.5, + "learning_rate": 5.303334432422714e-06, + "loss": 0.5209, + "step": 3959 + }, + { + "epoch": 0.5, + "learning_rate": 5.301307907502129e-06, + "loss": 0.504, + "step": 3960 + }, + { + "epoch": 0.5, + "learning_rate": 5.299281332903431e-06, + "loss": 0.4613, + "step": 3961 + }, + { + "epoch": 0.5, + "learning_rate": 5.297254708960753e-06, + "loss": 0.2161, + "step": 3962 + }, + { + "epoch": 0.5, + "learning_rate": 5.295228036008237e-06, + "loss": 0.5656, + "step": 3963 + }, + { + "epoch": 0.5, + "learning_rate": 5.293201314380025e-06, + "loss": 0.5425, + "step": 3964 + }, + { + "epoch": 0.5, + "learning_rate": 5.291174544410276e-06, + "loss": 0.5178, + "step": 3965 + }, + { + "epoch": 0.5, + "learning_rate": 5.2891477264331545e-06, + "loss": 0.521, + "step": 3966 + }, + { + "epoch": 0.5, + "learning_rate": 5.287120860782829e-06, + "loss": 0.6019, + "step": 3967 + }, + { + "epoch": 0.5, + "learning_rate": 5.285093947793481e-06, + "loss": 0.564, + "step": 3968 + }, + { + "epoch": 0.5, + "learning_rate": 5.283066987799296e-06, + "loss": 0.6037, + "step": 3969 + }, + { + "epoch": 0.5, + "learning_rate": 5.2810399811344716e-06, + "loss": 0.5785, + "step": 3970 + }, + { + "epoch": 0.5, + "learning_rate": 5.279012928133206e-06, + "loss": 0.5411, + "step": 3971 + }, + { + "epoch": 0.5, + "learning_rate": 5.276985829129713e-06, + "loss": 0.6191, + "step": 3972 + }, + { + "epoch": 0.5, + "learning_rate": 5.274958684458209e-06, + "loss": 0.6212, + "step": 3973 + }, + { + "epoch": 0.5, + "learning_rate": 5.272931494452919e-06, + "loss": 0.592, + "step": 3974 + }, + { + "epoch": 0.5, + "learning_rate": 5.270904259448076e-06, + "loss": 0.5318, + "step": 3975 + }, + { + "epoch": 0.5, + "learning_rate": 5.268876979777922e-06, + "loss": 0.5603, + "step": 3976 + }, + { + "epoch": 0.5, + "learning_rate": 5.2668496557767e-06, + "loss": 0.5469, + "step": 3977 + }, + { + "epoch": 0.5, + "learning_rate": 5.264822287778668e-06, + "loss": 0.5507, + "step": 3978 + }, + { + "epoch": 0.5, + "learning_rate": 5.26279487611809e-06, + "loss": 0.5844, + "step": 3979 + }, + { + "epoch": 0.5, + "learning_rate": 5.260767421129232e-06, + "loss": 0.5362, + "step": 3980 + }, + { + "epoch": 0.5, + "learning_rate": 5.25873992314637e-06, + "loss": 0.5733, + "step": 3981 + }, + { + "epoch": 0.5, + "learning_rate": 5.256712382503792e-06, + "loss": 0.5465, + "step": 3982 + }, + { + "epoch": 0.5, + "learning_rate": 5.254684799535782e-06, + "loss": 0.2419, + "step": 3983 + }, + { + "epoch": 0.5, + "learning_rate": 5.252657174576644e-06, + "loss": 0.5053, + "step": 3984 + }, + { + "epoch": 0.5, + "learning_rate": 5.250629507960678e-06, + "loss": 0.4808, + "step": 3985 + }, + { + "epoch": 0.5, + "learning_rate": 5.248601800022197e-06, + "loss": 0.5346, + "step": 3986 + }, + { + "epoch": 0.5, + "learning_rate": 5.24657405109552e-06, + "loss": 0.5041, + "step": 3987 + }, + { + "epoch": 0.5, + "learning_rate": 5.24454626151497e-06, + "loss": 0.5703, + "step": 3988 + }, + { + "epoch": 0.5, + "learning_rate": 5.242518431614879e-06, + "loss": 0.5333, + "step": 3989 + }, + { + "epoch": 0.5, + "learning_rate": 5.240490561729588e-06, + "loss": 0.5, + "step": 3990 + }, + { + "epoch": 0.5, + "learning_rate": 5.2384626521934364e-06, + "loss": 0.593, + "step": 3991 + }, + { + "epoch": 0.5, + "learning_rate": 5.236434703340782e-06, + "loss": 0.5263, + "step": 3992 + }, + { + "epoch": 0.5, + "learning_rate": 5.234406715505978e-06, + "loss": 0.5171, + "step": 3993 + }, + { + "epoch": 0.5, + "learning_rate": 5.232378689023389e-06, + "loss": 0.4676, + "step": 3994 + }, + { + "epoch": 0.5, + "learning_rate": 5.23035062422739e-06, + "loss": 0.5631, + "step": 3995 + }, + { + "epoch": 0.5, + "learning_rate": 5.2283225214523514e-06, + "loss": 0.493, + "step": 3996 + }, + { + "epoch": 0.5, + "learning_rate": 5.226294381032662e-06, + "loss": 0.5259, + "step": 3997 + }, + { + "epoch": 0.5, + "learning_rate": 5.224266203302708e-06, + "loss": 0.5797, + "step": 3998 + }, + { + "epoch": 0.5, + "learning_rate": 5.222237988596886e-06, + "loss": 0.5957, + "step": 3999 + }, + { + "epoch": 0.5, + "learning_rate": 5.2202097372496e-06, + "loss": 0.5788, + "step": 4000 + }, + { + "epoch": 0.5, + "learning_rate": 5.2181814495952535e-06, + "loss": 0.552, + "step": 4001 + }, + { + "epoch": 0.5, + "learning_rate": 5.216153125968262e-06, + "loss": 0.548, + "step": 4002 + }, + { + "epoch": 0.5, + "learning_rate": 5.214124766703047e-06, + "loss": 0.4737, + "step": 4003 + }, + { + "epoch": 0.5, + "learning_rate": 5.2120963721340315e-06, + "loss": 0.4695, + "step": 4004 + }, + { + "epoch": 0.5, + "learning_rate": 5.2100679425956466e-06, + "loss": 0.5565, + "step": 4005 + }, + { + "epoch": 0.5, + "learning_rate": 5.208039478422333e-06, + "loss": 0.5768, + "step": 4006 + }, + { + "epoch": 0.5, + "learning_rate": 5.20601097994853e-06, + "loss": 0.5696, + "step": 4007 + }, + { + "epoch": 0.5, + "learning_rate": 5.2039824475086865e-06, + "loss": 0.5539, + "step": 4008 + }, + { + "epoch": 0.5, + "learning_rate": 5.20195388143726e-06, + "loss": 0.6191, + "step": 4009 + }, + { + "epoch": 0.5, + "learning_rate": 5.199925282068704e-06, + "loss": 0.5776, + "step": 4010 + }, + { + "epoch": 0.5, + "learning_rate": 5.197896649737489e-06, + "loss": 0.51, + "step": 4011 + }, + { + "epoch": 0.5, + "learning_rate": 5.195867984778083e-06, + "loss": 0.517, + "step": 4012 + }, + { + "epoch": 0.5, + "learning_rate": 5.193839287524963e-06, + "loss": 0.519, + "step": 4013 + }, + { + "epoch": 0.5, + "learning_rate": 5.19181055831261e-06, + "loss": 0.5829, + "step": 4014 + }, + { + "epoch": 0.5, + "learning_rate": 5.1897817974755105e-06, + "loss": 0.5284, + "step": 4015 + }, + { + "epoch": 0.5, + "learning_rate": 5.187753005348155e-06, + "loss": 0.612, + "step": 4016 + }, + { + "epoch": 0.5, + "learning_rate": 5.1857241822650426e-06, + "loss": 0.4793, + "step": 4017 + }, + { + "epoch": 0.5, + "learning_rate": 5.183695328560673e-06, + "loss": 0.5094, + "step": 4018 + }, + { + "epoch": 0.5, + "learning_rate": 5.181666444569556e-06, + "loss": 0.5127, + "step": 4019 + }, + { + "epoch": 0.5, + "learning_rate": 5.1796375306262e-06, + "loss": 0.5215, + "step": 4020 + }, + { + "epoch": 0.5, + "learning_rate": 5.177608587065125e-06, + "loss": 0.5707, + "step": 4021 + }, + { + "epoch": 0.5, + "learning_rate": 5.175579614220851e-06, + "loss": 0.4929, + "step": 4022 + }, + { + "epoch": 0.5, + "learning_rate": 5.173550612427909e-06, + "loss": 0.5476, + "step": 4023 + }, + { + "epoch": 0.5, + "learning_rate": 5.171521582020824e-06, + "loss": 0.5964, + "step": 4024 + }, + { + "epoch": 0.5, + "learning_rate": 5.169492523334135e-06, + "loss": 0.4737, + "step": 4025 + }, + { + "epoch": 0.5, + "learning_rate": 5.167463436702386e-06, + "loss": 0.544, + "step": 4026 + }, + { + "epoch": 0.5, + "learning_rate": 5.165434322460116e-06, + "loss": 0.5177, + "step": 4027 + }, + { + "epoch": 0.5, + "learning_rate": 5.1634051809418804e-06, + "loss": 0.5143, + "step": 4028 + }, + { + "epoch": 0.51, + "learning_rate": 5.161376012482232e-06, + "loss": 0.4845, + "step": 4029 + }, + { + "epoch": 0.51, + "learning_rate": 5.15934681741573e-06, + "loss": 0.5095, + "step": 4030 + }, + { + "epoch": 0.51, + "learning_rate": 5.157317596076937e-06, + "loss": 0.4802, + "step": 4031 + }, + { + "epoch": 0.51, + "learning_rate": 5.15528834880042e-06, + "loss": 0.5423, + "step": 4032 + }, + { + "epoch": 0.51, + "learning_rate": 5.153259075920755e-06, + "loss": 0.211, + "step": 4033 + }, + { + "epoch": 0.51, + "learning_rate": 5.151229777772512e-06, + "loss": 0.5614, + "step": 4034 + }, + { + "epoch": 0.51, + "learning_rate": 5.149200454690276e-06, + "loss": 0.5656, + "step": 4035 + }, + { + "epoch": 0.51, + "learning_rate": 5.1471711070086295e-06, + "loss": 0.4646, + "step": 4036 + }, + { + "epoch": 0.51, + "learning_rate": 5.145141735062162e-06, + "loss": 0.5907, + "step": 4037 + }, + { + "epoch": 0.51, + "learning_rate": 5.1431123391854645e-06, + "loss": 0.5825, + "step": 4038 + }, + { + "epoch": 0.51, + "learning_rate": 5.141082919713134e-06, + "loss": 0.5558, + "step": 4039 + }, + { + "epoch": 0.51, + "learning_rate": 5.139053476979771e-06, + "loss": 0.5402, + "step": 4040 + }, + { + "epoch": 0.51, + "learning_rate": 5.137024011319982e-06, + "loss": 0.5172, + "step": 4041 + }, + { + "epoch": 0.51, + "learning_rate": 5.134994523068372e-06, + "loss": 0.5576, + "step": 4042 + }, + { + "epoch": 0.51, + "learning_rate": 5.132965012559553e-06, + "loss": 0.5344, + "step": 4043 + }, + { + "epoch": 0.51, + "learning_rate": 5.130935480128141e-06, + "loss": 0.5396, + "step": 4044 + }, + { + "epoch": 0.51, + "learning_rate": 5.128905926108755e-06, + "loss": 0.5268, + "step": 4045 + }, + { + "epoch": 0.51, + "learning_rate": 5.126876350836016e-06, + "loss": 0.5986, + "step": 4046 + }, + { + "epoch": 0.51, + "learning_rate": 5.124846754644553e-06, + "loss": 0.5412, + "step": 4047 + }, + { + "epoch": 0.51, + "learning_rate": 5.122817137868992e-06, + "loss": 0.216, + "step": 4048 + }, + { + "epoch": 0.51, + "learning_rate": 5.12078750084397e-06, + "loss": 0.6011, + "step": 4049 + }, + { + "epoch": 0.51, + "learning_rate": 5.118757843904118e-06, + "loss": 0.5647, + "step": 4050 + }, + { + "epoch": 0.51, + "learning_rate": 5.116728167384082e-06, + "loss": 0.5676, + "step": 4051 + }, + { + "epoch": 0.51, + "learning_rate": 5.114698471618498e-06, + "loss": 0.5958, + "step": 4052 + }, + { + "epoch": 0.51, + "learning_rate": 5.112668756942015e-06, + "loss": 0.5901, + "step": 4053 + }, + { + "epoch": 0.51, + "learning_rate": 5.1106390236892845e-06, + "loss": 0.5793, + "step": 4054 + }, + { + "epoch": 0.51, + "learning_rate": 5.108609272194954e-06, + "loss": 0.5239, + "step": 4055 + }, + { + "epoch": 0.51, + "learning_rate": 5.106579502793681e-06, + "loss": 0.552, + "step": 4056 + }, + { + "epoch": 0.51, + "learning_rate": 5.104549715820123e-06, + "loss": 0.5047, + "step": 4057 + }, + { + "epoch": 0.51, + "learning_rate": 5.102519911608941e-06, + "loss": 0.4812, + "step": 4058 + }, + { + "epoch": 0.51, + "learning_rate": 5.100490090494799e-06, + "loss": 0.6119, + "step": 4059 + }, + { + "epoch": 0.51, + "learning_rate": 5.098460252812363e-06, + "loss": 0.5451, + "step": 4060 + }, + { + "epoch": 0.51, + "learning_rate": 5.096430398896303e-06, + "loss": 0.5733, + "step": 4061 + }, + { + "epoch": 0.51, + "learning_rate": 5.09440052908129e-06, + "loss": 0.6102, + "step": 4062 + }, + { + "epoch": 0.51, + "learning_rate": 5.092370643702001e-06, + "loss": 0.5486, + "step": 4063 + }, + { + "epoch": 0.51, + "learning_rate": 5.090340743093109e-06, + "loss": 0.5722, + "step": 4064 + }, + { + "epoch": 0.51, + "learning_rate": 5.088310827589298e-06, + "loss": 0.5547, + "step": 4065 + }, + { + "epoch": 0.51, + "learning_rate": 5.086280897525246e-06, + "loss": 0.5037, + "step": 4066 + }, + { + "epoch": 0.51, + "learning_rate": 5.084250953235642e-06, + "loss": 0.6072, + "step": 4067 + }, + { + "epoch": 0.51, + "learning_rate": 5.082220995055168e-06, + "loss": 0.5202, + "step": 4068 + }, + { + "epoch": 0.51, + "learning_rate": 5.080191023318517e-06, + "loss": 0.5214, + "step": 4069 + }, + { + "epoch": 0.51, + "learning_rate": 5.07816103836038e-06, + "loss": 0.5282, + "step": 4070 + }, + { + "epoch": 0.51, + "learning_rate": 5.076131040515447e-06, + "loss": 0.496, + "step": 4071 + }, + { + "epoch": 0.51, + "learning_rate": 5.074101030118418e-06, + "loss": 0.2295, + "step": 4072 + }, + { + "epoch": 0.51, + "learning_rate": 5.07207100750399e-06, + "loss": 0.5473, + "step": 4073 + }, + { + "epoch": 0.51, + "learning_rate": 5.07004097300686e-06, + "loss": 0.5243, + "step": 4074 + }, + { + "epoch": 0.51, + "learning_rate": 5.068010926961732e-06, + "loss": 0.5058, + "step": 4075 + }, + { + "epoch": 0.51, + "learning_rate": 5.0659808697033095e-06, + "loss": 0.56, + "step": 4076 + }, + { + "epoch": 0.51, + "learning_rate": 5.063950801566297e-06, + "loss": 0.5325, + "step": 4077 + }, + { + "epoch": 0.51, + "learning_rate": 5.061920722885404e-06, + "loss": 0.601, + "step": 4078 + }, + { + "epoch": 0.51, + "learning_rate": 5.059890633995337e-06, + "loss": 0.5708, + "step": 4079 + }, + { + "epoch": 0.51, + "learning_rate": 5.057860535230807e-06, + "loss": 0.578, + "step": 4080 + }, + { + "epoch": 0.51, + "learning_rate": 5.055830426926528e-06, + "loss": 0.5283, + "step": 4081 + }, + { + "epoch": 0.51, + "learning_rate": 5.053800309417213e-06, + "loss": 0.5016, + "step": 4082 + }, + { + "epoch": 0.51, + "learning_rate": 5.051770183037577e-06, + "loss": 0.5833, + "step": 4083 + }, + { + "epoch": 0.51, + "learning_rate": 5.049740048122338e-06, + "loss": 0.539, + "step": 4084 + }, + { + "epoch": 0.51, + "learning_rate": 5.047709905006212e-06, + "loss": 0.572, + "step": 4085 + }, + { + "epoch": 0.51, + "learning_rate": 5.045679754023922e-06, + "loss": 0.5575, + "step": 4086 + }, + { + "epoch": 0.51, + "learning_rate": 5.0436495955101865e-06, + "loss": 0.5398, + "step": 4087 + }, + { + "epoch": 0.51, + "learning_rate": 5.041619429799726e-06, + "loss": 0.2212, + "step": 4088 + }, + { + "epoch": 0.51, + "learning_rate": 5.039589257227269e-06, + "loss": 0.577, + "step": 4089 + }, + { + "epoch": 0.51, + "learning_rate": 5.037559078127536e-06, + "loss": 0.4923, + "step": 4090 + }, + { + "epoch": 0.51, + "learning_rate": 5.035528892835254e-06, + "loss": 0.5287, + "step": 4091 + }, + { + "epoch": 0.51, + "learning_rate": 5.03349870168515e-06, + "loss": 0.5063, + "step": 4092 + }, + { + "epoch": 0.51, + "learning_rate": 5.03146850501195e-06, + "loss": 0.5442, + "step": 4093 + }, + { + "epoch": 0.51, + "learning_rate": 5.029438303150385e-06, + "loss": 0.5293, + "step": 4094 + }, + { + "epoch": 0.51, + "learning_rate": 5.02740809643518e-06, + "loss": 0.5585, + "step": 4095 + }, + { + "epoch": 0.51, + "learning_rate": 5.025377885201069e-06, + "loss": 0.4702, + "step": 4096 + }, + { + "epoch": 0.51, + "learning_rate": 5.023347669782783e-06, + "loss": 0.5446, + "step": 4097 + }, + { + "epoch": 0.51, + "learning_rate": 5.021317450515051e-06, + "loss": 0.5011, + "step": 4098 + }, + { + "epoch": 0.51, + "learning_rate": 5.0192872277326055e-06, + "loss": 0.5527, + "step": 4099 + }, + { + "epoch": 0.51, + "learning_rate": 5.017257001770183e-06, + "loss": 0.5111, + "step": 4100 + }, + { + "epoch": 0.51, + "learning_rate": 5.0152267729625115e-06, + "loss": 0.5404, + "step": 4101 + }, + { + "epoch": 0.51, + "learning_rate": 5.013196541644329e-06, + "loss": 0.5052, + "step": 4102 + }, + { + "epoch": 0.51, + "learning_rate": 5.011166308150367e-06, + "loss": 0.5492, + "step": 4103 + }, + { + "epoch": 0.51, + "learning_rate": 5.009136072815361e-06, + "loss": 0.4966, + "step": 4104 + }, + { + "epoch": 0.51, + "learning_rate": 5.007105835974046e-06, + "loss": 0.2644, + "step": 4105 + }, + { + "epoch": 0.51, + "learning_rate": 5.005075597961158e-06, + "loss": 0.5406, + "step": 4106 + }, + { + "epoch": 0.51, + "learning_rate": 5.003045359111431e-06, + "loss": 0.555, + "step": 4107 + }, + { + "epoch": 0.51, + "learning_rate": 5.0010151197596e-06, + "loss": 0.5239, + "step": 4108 + }, + { + "epoch": 0.52, + "learning_rate": 4.998984880240401e-06, + "loss": 0.5815, + "step": 4109 + }, + { + "epoch": 0.52, + "learning_rate": 4.99695464088857e-06, + "loss": 0.4723, + "step": 4110 + }, + { + "epoch": 0.52, + "learning_rate": 4.994924402038842e-06, + "loss": 0.5564, + "step": 4111 + }, + { + "epoch": 0.52, + "learning_rate": 4.992894164025955e-06, + "loss": 0.5457, + "step": 4112 + }, + { + "epoch": 0.52, + "learning_rate": 4.9908639271846405e-06, + "loss": 0.6309, + "step": 4113 + }, + { + "epoch": 0.52, + "learning_rate": 4.988833691849634e-06, + "loss": 0.5441, + "step": 4114 + }, + { + "epoch": 0.52, + "learning_rate": 4.986803458355672e-06, + "loss": 0.5011, + "step": 4115 + }, + { + "epoch": 0.52, + "learning_rate": 4.9847732270374885e-06, + "loss": 0.4838, + "step": 4116 + }, + { + "epoch": 0.52, + "learning_rate": 4.98274299822982e-06, + "loss": 0.5398, + "step": 4117 + }, + { + "epoch": 0.52, + "learning_rate": 4.980712772267395e-06, + "loss": 0.5797, + "step": 4118 + }, + { + "epoch": 0.52, + "learning_rate": 4.9786825494849505e-06, + "loss": 0.6339, + "step": 4119 + }, + { + "epoch": 0.52, + "learning_rate": 4.976652330217219e-06, + "loss": 0.5883, + "step": 4120 + }, + { + "epoch": 0.52, + "learning_rate": 4.974622114798931e-06, + "loss": 0.5034, + "step": 4121 + }, + { + "epoch": 0.52, + "learning_rate": 4.972591903564822e-06, + "loss": 0.5899, + "step": 4122 + }, + { + "epoch": 0.52, + "learning_rate": 4.9705616968496175e-06, + "loss": 0.5318, + "step": 4123 + }, + { + "epoch": 0.52, + "learning_rate": 4.968531494988051e-06, + "loss": 0.5058, + "step": 4124 + }, + { + "epoch": 0.52, + "learning_rate": 4.9665012983148515e-06, + "loss": 0.5177, + "step": 4125 + }, + { + "epoch": 0.52, + "learning_rate": 4.964471107164746e-06, + "loss": 0.5398, + "step": 4126 + }, + { + "epoch": 0.52, + "learning_rate": 4.962440921872464e-06, + "loss": 0.5839, + "step": 4127 + }, + { + "epoch": 0.52, + "learning_rate": 4.960410742772732e-06, + "loss": 0.5639, + "step": 4128 + }, + { + "epoch": 0.52, + "learning_rate": 4.9583805702002744e-06, + "loss": 0.548, + "step": 4129 + }, + { + "epoch": 0.52, + "learning_rate": 4.956350404489816e-06, + "loss": 0.5174, + "step": 4130 + }, + { + "epoch": 0.52, + "learning_rate": 4.95432024597608e-06, + "loss": 0.5153, + "step": 4131 + }, + { + "epoch": 0.52, + "learning_rate": 4.952290094993789e-06, + "loss": 0.5504, + "step": 4132 + }, + { + "epoch": 0.52, + "learning_rate": 4.950259951877664e-06, + "loss": 0.5163, + "step": 4133 + }, + { + "epoch": 0.52, + "learning_rate": 4.9482298169624235e-06, + "loss": 0.5333, + "step": 4134 + }, + { + "epoch": 0.52, + "learning_rate": 4.946199690582789e-06, + "loss": 0.5367, + "step": 4135 + }, + { + "epoch": 0.52, + "learning_rate": 4.944169573073473e-06, + "loss": 0.5172, + "step": 4136 + }, + { + "epoch": 0.52, + "learning_rate": 4.942139464769194e-06, + "loss": 0.6341, + "step": 4137 + }, + { + "epoch": 0.52, + "learning_rate": 4.940109366004665e-06, + "loss": 0.5276, + "step": 4138 + }, + { + "epoch": 0.52, + "learning_rate": 4.938079277114598e-06, + "loss": 0.5362, + "step": 4139 + }, + { + "epoch": 0.52, + "learning_rate": 4.9360491984337035e-06, + "loss": 0.5604, + "step": 4140 + }, + { + "epoch": 0.52, + "learning_rate": 4.934019130296691e-06, + "loss": 0.5481, + "step": 4141 + }, + { + "epoch": 0.52, + "learning_rate": 4.93198907303827e-06, + "loss": 0.2358, + "step": 4142 + }, + { + "epoch": 0.52, + "learning_rate": 4.9299590269931415e-06, + "loss": 0.5671, + "step": 4143 + }, + { + "epoch": 0.52, + "learning_rate": 4.927928992496013e-06, + "loss": 0.5773, + "step": 4144 + }, + { + "epoch": 0.52, + "learning_rate": 4.925898969881583e-06, + "loss": 0.559, + "step": 4145 + }, + { + "epoch": 0.52, + "learning_rate": 4.923868959484554e-06, + "loss": 0.473, + "step": 4146 + }, + { + "epoch": 0.52, + "learning_rate": 4.921838961639622e-06, + "loss": 0.5797, + "step": 4147 + }, + { + "epoch": 0.52, + "learning_rate": 4.919808976681483e-06, + "loss": 0.5146, + "step": 4148 + }, + { + "epoch": 0.52, + "learning_rate": 4.917779004944834e-06, + "loss": 0.538, + "step": 4149 + }, + { + "epoch": 0.52, + "learning_rate": 4.915749046764361e-06, + "loss": 0.5813, + "step": 4150 + }, + { + "epoch": 0.52, + "learning_rate": 4.913719102474755e-06, + "loss": 0.5763, + "step": 4151 + }, + { + "epoch": 0.52, + "learning_rate": 4.911689172410704e-06, + "loss": 0.5431, + "step": 4152 + }, + { + "epoch": 0.52, + "learning_rate": 4.909659256906892e-06, + "loss": 0.5693, + "step": 4153 + }, + { + "epoch": 0.52, + "learning_rate": 4.907629356298e-06, + "loss": 0.5187, + "step": 4154 + }, + { + "epoch": 0.52, + "learning_rate": 4.9055994709187115e-06, + "loss": 0.4992, + "step": 4155 + }, + { + "epoch": 0.52, + "learning_rate": 4.903569601103699e-06, + "loss": 0.4956, + "step": 4156 + }, + { + "epoch": 0.52, + "learning_rate": 4.901539747187638e-06, + "loss": 0.5787, + "step": 4157 + }, + { + "epoch": 0.52, + "learning_rate": 4.899509909505202e-06, + "loss": 0.5229, + "step": 4158 + }, + { + "epoch": 0.52, + "learning_rate": 4.89748008839106e-06, + "loss": 0.5996, + "step": 4159 + }, + { + "epoch": 0.52, + "learning_rate": 4.89545028417988e-06, + "loss": 0.4807, + "step": 4160 + }, + { + "epoch": 0.52, + "learning_rate": 4.893420497206321e-06, + "loss": 0.5343, + "step": 4161 + }, + { + "epoch": 0.52, + "learning_rate": 4.8913907278050474e-06, + "loss": 0.5211, + "step": 4162 + }, + { + "epoch": 0.52, + "learning_rate": 4.889360976310717e-06, + "loss": 0.5233, + "step": 4163 + }, + { + "epoch": 0.52, + "learning_rate": 4.887331243057984e-06, + "loss": 0.5571, + "step": 4164 + }, + { + "epoch": 0.52, + "learning_rate": 4.8853015283815046e-06, + "loss": 0.5917, + "step": 4165 + }, + { + "epoch": 0.52, + "learning_rate": 4.883271832615921e-06, + "loss": 0.4976, + "step": 4166 + }, + { + "epoch": 0.52, + "learning_rate": 4.881242156095882e-06, + "loss": 0.2276, + "step": 4167 + }, + { + "epoch": 0.52, + "learning_rate": 4.8792124991560314e-06, + "loss": 0.5358, + "step": 4168 + }, + { + "epoch": 0.52, + "learning_rate": 4.877182862131008e-06, + "loss": 0.4881, + "step": 4169 + }, + { + "epoch": 0.52, + "learning_rate": 4.875153245355447e-06, + "loss": 0.5821, + "step": 4170 + }, + { + "epoch": 0.52, + "learning_rate": 4.873123649163985e-06, + "loss": 0.5669, + "step": 4171 + }, + { + "epoch": 0.52, + "learning_rate": 4.871094073891247e-06, + "loss": 0.5469, + "step": 4172 + }, + { + "epoch": 0.52, + "learning_rate": 4.869064519871861e-06, + "loss": 0.5031, + "step": 4173 + }, + { + "epoch": 0.52, + "learning_rate": 4.867034987440448e-06, + "loss": 0.5876, + "step": 4174 + }, + { + "epoch": 0.52, + "learning_rate": 4.865005476931629e-06, + "loss": 0.5186, + "step": 4175 + }, + { + "epoch": 0.52, + "learning_rate": 4.86297598868002e-06, + "loss": 0.5971, + "step": 4176 + }, + { + "epoch": 0.52, + "learning_rate": 4.8609465230202295e-06, + "loss": 0.5287, + "step": 4177 + }, + { + "epoch": 0.52, + "learning_rate": 4.858917080286867e-06, + "loss": 0.5307, + "step": 4178 + }, + { + "epoch": 0.52, + "learning_rate": 4.856887660814536e-06, + "loss": 0.5693, + "step": 4179 + }, + { + "epoch": 0.52, + "learning_rate": 4.854858264937839e-06, + "loss": 0.5633, + "step": 4180 + }, + { + "epoch": 0.52, + "learning_rate": 4.852828892991373e-06, + "loss": 0.536, + "step": 4181 + }, + { + "epoch": 0.52, + "learning_rate": 4.850799545309726e-06, + "loss": 0.5607, + "step": 4182 + }, + { + "epoch": 0.52, + "learning_rate": 4.84877022222749e-06, + "loss": 0.517, + "step": 4183 + }, + { + "epoch": 0.52, + "learning_rate": 4.846740924079247e-06, + "loss": 0.5481, + "step": 4184 + }, + { + "epoch": 0.52, + "learning_rate": 4.844711651199579e-06, + "loss": 0.5588, + "step": 4185 + }, + { + "epoch": 0.52, + "learning_rate": 4.8426824039230635e-06, + "loss": 0.5346, + "step": 4186 + }, + { + "epoch": 0.52, + "learning_rate": 4.840653182584271e-06, + "loss": 0.5584, + "step": 4187 + }, + { + "epoch": 0.52, + "learning_rate": 4.83862398751777e-06, + "loss": 0.2348, + "step": 4188 + }, + { + "epoch": 0.53, + "learning_rate": 4.83659481905812e-06, + "loss": 0.487, + "step": 4189 + }, + { + "epoch": 0.53, + "learning_rate": 4.8345656775398846e-06, + "loss": 0.4837, + "step": 4190 + }, + { + "epoch": 0.53, + "learning_rate": 4.832536563297616e-06, + "loss": 0.5352, + "step": 4191 + }, + { + "epoch": 0.53, + "learning_rate": 4.830507476665866e-06, + "loss": 0.5458, + "step": 4192 + }, + { + "epoch": 0.53, + "learning_rate": 4.828478417979178e-06, + "loss": 0.5263, + "step": 4193 + }, + { + "epoch": 0.53, + "learning_rate": 4.8264493875720934e-06, + "loss": 0.4827, + "step": 4194 + }, + { + "epoch": 0.53, + "learning_rate": 4.82442038577915e-06, + "loss": 0.5255, + "step": 4195 + }, + { + "epoch": 0.53, + "learning_rate": 4.822391412934876e-06, + "loss": 0.5767, + "step": 4196 + }, + { + "epoch": 0.53, + "learning_rate": 4.820362469373802e-06, + "loss": 0.5649, + "step": 4197 + }, + { + "epoch": 0.53, + "learning_rate": 4.818333555430447e-06, + "loss": 0.5651, + "step": 4198 + }, + { + "epoch": 0.53, + "learning_rate": 4.8163046714393285e-06, + "loss": 0.5597, + "step": 4199 + }, + { + "epoch": 0.53, + "learning_rate": 4.814275817734959e-06, + "loss": 0.5851, + "step": 4200 + }, + { + "epoch": 0.53, + "learning_rate": 4.8122469946518455e-06, + "loss": 0.5791, + "step": 4201 + }, + { + "epoch": 0.53, + "learning_rate": 4.810218202524491e-06, + "loss": 0.5198, + "step": 4202 + }, + { + "epoch": 0.53, + "learning_rate": 4.808189441687392e-06, + "loss": 0.5709, + "step": 4203 + }, + { + "epoch": 0.53, + "learning_rate": 4.8061607124750386e-06, + "loss": 0.5895, + "step": 4204 + }, + { + "epoch": 0.53, + "learning_rate": 4.804132015221918e-06, + "loss": 0.5655, + "step": 4205 + }, + { + "epoch": 0.53, + "learning_rate": 4.802103350262512e-06, + "loss": 0.5105, + "step": 4206 + }, + { + "epoch": 0.53, + "learning_rate": 4.800074717931296e-06, + "loss": 0.5209, + "step": 4207 + }, + { + "epoch": 0.53, + "learning_rate": 4.7980461185627436e-06, + "loss": 0.5478, + "step": 4208 + }, + { + "epoch": 0.53, + "learning_rate": 4.796017552491314e-06, + "loss": 0.5177, + "step": 4209 + }, + { + "epoch": 0.53, + "learning_rate": 4.793989020051471e-06, + "loss": 0.5372, + "step": 4210 + }, + { + "epoch": 0.53, + "learning_rate": 4.791960521577668e-06, + "loss": 0.593, + "step": 4211 + }, + { + "epoch": 0.53, + "learning_rate": 4.7899320574043526e-06, + "loss": 0.5334, + "step": 4212 + }, + { + "epoch": 0.53, + "learning_rate": 4.787903627865971e-06, + "loss": 0.5868, + "step": 4213 + }, + { + "epoch": 0.53, + "learning_rate": 4.785875233296956e-06, + "loss": 0.5264, + "step": 4214 + }, + { + "epoch": 0.53, + "learning_rate": 4.783846874031739e-06, + "loss": 0.4951, + "step": 4215 + }, + { + "epoch": 0.53, + "learning_rate": 4.781818550404748e-06, + "loss": 0.4841, + "step": 4216 + }, + { + "epoch": 0.53, + "learning_rate": 4.779790262750402e-06, + "loss": 0.5339, + "step": 4217 + }, + { + "epoch": 0.53, + "learning_rate": 4.777762011403114e-06, + "loss": 0.5393, + "step": 4218 + }, + { + "epoch": 0.53, + "learning_rate": 4.775733796697294e-06, + "loss": 0.4942, + "step": 4219 + }, + { + "epoch": 0.53, + "learning_rate": 4.77370561896734e-06, + "loss": 0.4985, + "step": 4220 + }, + { + "epoch": 0.53, + "learning_rate": 4.771677478547649e-06, + "loss": 0.4764, + "step": 4221 + }, + { + "epoch": 0.53, + "learning_rate": 4.769649375772612e-06, + "loss": 0.5069, + "step": 4222 + }, + { + "epoch": 0.53, + "learning_rate": 4.767621310976611e-06, + "loss": 0.5277, + "step": 4223 + }, + { + "epoch": 0.53, + "learning_rate": 4.765593284494025e-06, + "loss": 0.5091, + "step": 4224 + }, + { + "epoch": 0.53, + "learning_rate": 4.76356529665922e-06, + "loss": 0.5177, + "step": 4225 + }, + { + "epoch": 0.53, + "learning_rate": 4.761537347806564e-06, + "loss": 0.5562, + "step": 4226 + }, + { + "epoch": 0.53, + "learning_rate": 4.759509438270414e-06, + "loss": 0.5408, + "step": 4227 + }, + { + "epoch": 0.53, + "learning_rate": 4.757481568385121e-06, + "loss": 0.5808, + "step": 4228 + }, + { + "epoch": 0.53, + "learning_rate": 4.755453738485031e-06, + "loss": 0.5131, + "step": 4229 + }, + { + "epoch": 0.53, + "learning_rate": 4.753425948904483e-06, + "loss": 0.5048, + "step": 4230 + }, + { + "epoch": 0.53, + "learning_rate": 4.7513981999778045e-06, + "loss": 0.5074, + "step": 4231 + }, + { + "epoch": 0.53, + "learning_rate": 4.749370492039323e-06, + "loss": 0.526, + "step": 4232 + }, + { + "epoch": 0.53, + "learning_rate": 4.747342825423357e-06, + "loss": 0.5032, + "step": 4233 + }, + { + "epoch": 0.53, + "learning_rate": 4.7453152004642175e-06, + "loss": 0.575, + "step": 4234 + }, + { + "epoch": 0.53, + "learning_rate": 4.743287617496212e-06, + "loss": 0.5157, + "step": 4235 + }, + { + "epoch": 0.53, + "learning_rate": 4.741260076853631e-06, + "loss": 0.5742, + "step": 4236 + }, + { + "epoch": 0.53, + "learning_rate": 4.73923257887077e-06, + "loss": 0.5935, + "step": 4237 + }, + { + "epoch": 0.53, + "learning_rate": 4.7372051238819114e-06, + "loss": 0.5494, + "step": 4238 + }, + { + "epoch": 0.53, + "learning_rate": 4.735177712221332e-06, + "loss": 0.5467, + "step": 4239 + }, + { + "epoch": 0.53, + "learning_rate": 4.733150344223301e-06, + "loss": 0.4823, + "step": 4240 + }, + { + "epoch": 0.53, + "learning_rate": 4.731123020222081e-06, + "loss": 0.5639, + "step": 4241 + }, + { + "epoch": 0.53, + "learning_rate": 4.729095740551925e-06, + "loss": 0.5973, + "step": 4242 + }, + { + "epoch": 0.53, + "learning_rate": 4.727068505547082e-06, + "loss": 0.5437, + "step": 4243 + }, + { + "epoch": 0.53, + "learning_rate": 4.7250413155417925e-06, + "loss": 0.2478, + "step": 4244 + }, + { + "epoch": 0.53, + "learning_rate": 4.723014170870287e-06, + "loss": 0.5229, + "step": 4245 + }, + { + "epoch": 0.53, + "learning_rate": 4.7209870718667955e-06, + "loss": 0.567, + "step": 4246 + }, + { + "epoch": 0.53, + "learning_rate": 4.71896001886553e-06, + "loss": 0.5463, + "step": 4247 + }, + { + "epoch": 0.53, + "learning_rate": 4.716933012200706e-06, + "loss": 0.4972, + "step": 4248 + }, + { + "epoch": 0.53, + "learning_rate": 4.714906052206521e-06, + "loss": 0.5728, + "step": 4249 + }, + { + "epoch": 0.53, + "learning_rate": 4.7128791392171725e-06, + "loss": 0.5513, + "step": 4250 + }, + { + "epoch": 0.53, + "learning_rate": 4.710852273566848e-06, + "loss": 0.575, + "step": 4251 + }, + { + "epoch": 0.53, + "learning_rate": 4.708825455589726e-06, + "loss": 0.5097, + "step": 4252 + }, + { + "epoch": 0.53, + "learning_rate": 4.706798685619976e-06, + "loss": 0.5558, + "step": 4253 + }, + { + "epoch": 0.53, + "learning_rate": 4.704771963991766e-06, + "loss": 0.547, + "step": 4254 + }, + { + "epoch": 0.53, + "learning_rate": 4.702745291039248e-06, + "loss": 0.2161, + "step": 4255 + }, + { + "epoch": 0.53, + "learning_rate": 4.700718667096572e-06, + "loss": 0.6261, + "step": 4256 + }, + { + "epoch": 0.53, + "learning_rate": 4.698692092497873e-06, + "loss": 0.4601, + "step": 4257 + }, + { + "epoch": 0.53, + "learning_rate": 4.696665567577287e-06, + "loss": 0.5259, + "step": 4258 + }, + { + "epoch": 0.53, + "learning_rate": 4.694639092668935e-06, + "loss": 0.5493, + "step": 4259 + }, + { + "epoch": 0.53, + "learning_rate": 4.6926126681069315e-06, + "loss": 0.2263, + "step": 4260 + }, + { + "epoch": 0.53, + "learning_rate": 4.690586294225383e-06, + "loss": 0.5413, + "step": 4261 + }, + { + "epoch": 0.53, + "learning_rate": 4.6885599713583915e-06, + "loss": 0.5232, + "step": 4262 + }, + { + "epoch": 0.53, + "learning_rate": 4.686533699840042e-06, + "loss": 0.5821, + "step": 4263 + }, + { + "epoch": 0.53, + "learning_rate": 4.684507480004416e-06, + "loss": 0.5455, + "step": 4264 + }, + { + "epoch": 0.53, + "learning_rate": 4.682481312185589e-06, + "loss": 0.4837, + "step": 4265 + }, + { + "epoch": 0.53, + "learning_rate": 4.680455196717624e-06, + "loss": 0.5223, + "step": 4266 + }, + { + "epoch": 0.53, + "learning_rate": 4.678429133934578e-06, + "loss": 0.2323, + "step": 4267 + }, + { + "epoch": 0.53, + "learning_rate": 4.676403124170495e-06, + "loss": 0.4967, + "step": 4268 + }, + { + "epoch": 0.54, + "learning_rate": 4.674377167759414e-06, + "loss": 0.5621, + "step": 4269 + }, + { + "epoch": 0.54, + "learning_rate": 4.6723512650353645e-06, + "loss": 0.5183, + "step": 4270 + }, + { + "epoch": 0.54, + "learning_rate": 4.670325416332369e-06, + "loss": 0.5672, + "step": 4271 + }, + { + "epoch": 0.54, + "learning_rate": 4.668299621984439e-06, + "loss": 0.5461, + "step": 4272 + }, + { + "epoch": 0.54, + "learning_rate": 4.666273882325573e-06, + "loss": 0.5702, + "step": 4273 + }, + { + "epoch": 0.54, + "learning_rate": 4.664248197689767e-06, + "loss": 0.5556, + "step": 4274 + }, + { + "epoch": 0.54, + "learning_rate": 4.662222568411006e-06, + "loss": 0.2159, + "step": 4275 + }, + { + "epoch": 0.54, + "learning_rate": 4.6601969948232675e-06, + "loss": 0.5294, + "step": 4276 + }, + { + "epoch": 0.54, + "learning_rate": 4.658171477260514e-06, + "loss": 0.5302, + "step": 4277 + }, + { + "epoch": 0.54, + "learning_rate": 4.6561460160567066e-06, + "loss": 0.5479, + "step": 4278 + }, + { + "epoch": 0.54, + "learning_rate": 4.654120611545787e-06, + "loss": 0.5523, + "step": 4279 + }, + { + "epoch": 0.54, + "learning_rate": 4.652095264061698e-06, + "loss": 0.567, + "step": 4280 + }, + { + "epoch": 0.54, + "learning_rate": 4.6500699739383685e-06, + "loss": 0.5134, + "step": 4281 + }, + { + "epoch": 0.54, + "learning_rate": 4.648044741509715e-06, + "loss": 0.5693, + "step": 4282 + }, + { + "epoch": 0.54, + "learning_rate": 4.646019567109654e-06, + "loss": 0.5926, + "step": 4283 + }, + { + "epoch": 0.54, + "learning_rate": 4.6439944510720785e-06, + "loss": 0.5933, + "step": 4284 + }, + { + "epoch": 0.54, + "learning_rate": 4.641969393730883e-06, + "loss": 0.5188, + "step": 4285 + }, + { + "epoch": 0.54, + "learning_rate": 4.639944395419947e-06, + "loss": 0.5199, + "step": 4286 + }, + { + "epoch": 0.54, + "learning_rate": 4.637919456473144e-06, + "loss": 0.5377, + "step": 4287 + }, + { + "epoch": 0.54, + "learning_rate": 4.635894577224335e-06, + "loss": 0.4983, + "step": 4288 + }, + { + "epoch": 0.54, + "learning_rate": 4.63386975800737e-06, + "loss": 0.498, + "step": 4289 + }, + { + "epoch": 0.54, + "learning_rate": 4.631844999156092e-06, + "loss": 0.5373, + "step": 4290 + }, + { + "epoch": 0.54, + "learning_rate": 4.629820301004333e-06, + "loss": 0.6057, + "step": 4291 + }, + { + "epoch": 0.54, + "learning_rate": 4.627795663885915e-06, + "loss": 0.4806, + "step": 4292 + }, + { + "epoch": 0.54, + "learning_rate": 4.62577108813465e-06, + "loss": 0.5517, + "step": 4293 + }, + { + "epoch": 0.54, + "learning_rate": 4.623746574084342e-06, + "loss": 0.6164, + "step": 4294 + }, + { + "epoch": 0.54, + "learning_rate": 4.621722122068777e-06, + "loss": 0.5155, + "step": 4295 + }, + { + "epoch": 0.54, + "learning_rate": 4.619697732421739e-06, + "loss": 0.546, + "step": 4296 + }, + { + "epoch": 0.54, + "learning_rate": 4.6176734054770005e-06, + "loss": 0.5847, + "step": 4297 + }, + { + "epoch": 0.54, + "learning_rate": 4.615649141568321e-06, + "loss": 0.4995, + "step": 4298 + }, + { + "epoch": 0.54, + "learning_rate": 4.613624941029452e-06, + "loss": 0.5481, + "step": 4299 + }, + { + "epoch": 0.54, + "learning_rate": 4.611600804194131e-06, + "loss": 0.4873, + "step": 4300 + }, + { + "epoch": 0.54, + "learning_rate": 4.609576731396089e-06, + "loss": 0.579, + "step": 4301 + }, + { + "epoch": 0.54, + "learning_rate": 4.607552722969045e-06, + "loss": 0.5414, + "step": 4302 + }, + { + "epoch": 0.54, + "learning_rate": 4.605528779246705e-06, + "loss": 0.5171, + "step": 4303 + }, + { + "epoch": 0.54, + "learning_rate": 4.603504900562769e-06, + "loss": 0.5992, + "step": 4304 + }, + { + "epoch": 0.54, + "learning_rate": 4.601481087250924e-06, + "loss": 0.5463, + "step": 4305 + }, + { + "epoch": 0.54, + "learning_rate": 4.599457339644844e-06, + "loss": 0.5906, + "step": 4306 + }, + { + "epoch": 0.54, + "learning_rate": 4.597433658078195e-06, + "loss": 0.2322, + "step": 4307 + }, + { + "epoch": 0.54, + "learning_rate": 4.595410042884633e-06, + "loss": 0.5791, + "step": 4308 + }, + { + "epoch": 0.54, + "learning_rate": 4.593386494397798e-06, + "loss": 0.6095, + "step": 4309 + }, + { + "epoch": 0.54, + "learning_rate": 4.5913630129513254e-06, + "loss": 0.5231, + "step": 4310 + }, + { + "epoch": 0.54, + "learning_rate": 4.589339598878834e-06, + "loss": 0.5934, + "step": 4311 + }, + { + "epoch": 0.54, + "learning_rate": 4.587316252513935e-06, + "loss": 0.5089, + "step": 4312 + }, + { + "epoch": 0.54, + "learning_rate": 4.5852929741902284e-06, + "loss": 0.5335, + "step": 4313 + }, + { + "epoch": 0.54, + "learning_rate": 4.583269764241302e-06, + "loss": 0.5495, + "step": 4314 + }, + { + "epoch": 0.54, + "learning_rate": 4.581246623000732e-06, + "loss": 0.569, + "step": 4315 + }, + { + "epoch": 0.54, + "learning_rate": 4.579223550802082e-06, + "loss": 0.5791, + "step": 4316 + }, + { + "epoch": 0.54, + "learning_rate": 4.577200547978907e-06, + "loss": 0.5363, + "step": 4317 + }, + { + "epoch": 0.54, + "learning_rate": 4.57517761486475e-06, + "loss": 0.5431, + "step": 4318 + }, + { + "epoch": 0.54, + "learning_rate": 4.573154751793141e-06, + "loss": 0.2139, + "step": 4319 + }, + { + "epoch": 0.54, + "learning_rate": 4.571131959097599e-06, + "loss": 0.5093, + "step": 4320 + }, + { + "epoch": 0.54, + "learning_rate": 4.569109237111636e-06, + "loss": 0.5074, + "step": 4321 + }, + { + "epoch": 0.54, + "learning_rate": 4.567086586168741e-06, + "loss": 0.214, + "step": 4322 + }, + { + "epoch": 0.54, + "learning_rate": 4.565064006602403e-06, + "loss": 0.5446, + "step": 4323 + }, + { + "epoch": 0.54, + "learning_rate": 4.563041498746093e-06, + "loss": 0.6078, + "step": 4324 + }, + { + "epoch": 0.54, + "learning_rate": 4.561019062933272e-06, + "loss": 0.5953, + "step": 4325 + }, + { + "epoch": 0.54, + "learning_rate": 4.558996699497392e-06, + "loss": 0.5023, + "step": 4326 + }, + { + "epoch": 0.54, + "learning_rate": 4.556974408771884e-06, + "loss": 0.4478, + "step": 4327 + }, + { + "epoch": 0.54, + "learning_rate": 4.554952191090176e-06, + "loss": 0.2594, + "step": 4328 + }, + { + "epoch": 0.54, + "learning_rate": 4.552930046785683e-06, + "loss": 0.5822, + "step": 4329 + }, + { + "epoch": 0.54, + "learning_rate": 4.550907976191802e-06, + "loss": 0.5614, + "step": 4330 + }, + { + "epoch": 0.54, + "learning_rate": 4.548885979641927e-06, + "loss": 0.5478, + "step": 4331 + }, + { + "epoch": 0.54, + "learning_rate": 4.546864057469428e-06, + "loss": 0.5174, + "step": 4332 + }, + { + "epoch": 0.54, + "learning_rate": 4.544842210007673e-06, + "loss": 0.5253, + "step": 4333 + }, + { + "epoch": 0.54, + "learning_rate": 4.542820437590013e-06, + "loss": 0.5995, + "step": 4334 + }, + { + "epoch": 0.54, + "learning_rate": 4.540798740549789e-06, + "loss": 0.5319, + "step": 4335 + }, + { + "epoch": 0.54, + "learning_rate": 4.5387771192203245e-06, + "loss": 0.5601, + "step": 4336 + }, + { + "epoch": 0.54, + "learning_rate": 4.53675557393494e-06, + "loss": 0.5439, + "step": 4337 + }, + { + "epoch": 0.54, + "learning_rate": 4.534734105026931e-06, + "loss": 0.5164, + "step": 4338 + }, + { + "epoch": 0.54, + "learning_rate": 4.532712712829591e-06, + "loss": 0.5236, + "step": 4339 + }, + { + "epoch": 0.54, + "learning_rate": 4.530691397676195e-06, + "loss": 0.6213, + "step": 4340 + }, + { + "epoch": 0.54, + "learning_rate": 4.528670159900009e-06, + "loss": 0.5324, + "step": 4341 + }, + { + "epoch": 0.54, + "learning_rate": 4.526648999834285e-06, + "loss": 0.5447, + "step": 4342 + }, + { + "epoch": 0.54, + "learning_rate": 4.524627917812257e-06, + "loss": 0.5441, + "step": 4343 + }, + { + "epoch": 0.54, + "learning_rate": 4.522606914167154e-06, + "loss": 0.4343, + "step": 4344 + }, + { + "epoch": 0.54, + "learning_rate": 4.520585989232189e-06, + "loss": 0.5166, + "step": 4345 + }, + { + "epoch": 0.54, + "learning_rate": 4.518565143340561e-06, + "loss": 0.5016, + "step": 4346 + }, + { + "epoch": 0.54, + "learning_rate": 4.516544376825459e-06, + "loss": 0.5362, + "step": 4347 + }, + { + "epoch": 0.55, + "learning_rate": 4.5145236900200526e-06, + "loss": 0.5388, + "step": 4348 + }, + { + "epoch": 0.55, + "learning_rate": 4.512503083257504e-06, + "loss": 0.4759, + "step": 4349 + }, + { + "epoch": 0.55, + "learning_rate": 4.510482556870961e-06, + "loss": 0.5858, + "step": 4350 + }, + { + "epoch": 0.55, + "learning_rate": 4.508462111193558e-06, + "loss": 0.5506, + "step": 4351 + }, + { + "epoch": 0.55, + "learning_rate": 4.506441746558414e-06, + "loss": 0.5107, + "step": 4352 + }, + { + "epoch": 0.55, + "learning_rate": 4.50442146329864e-06, + "loss": 0.5833, + "step": 4353 + }, + { + "epoch": 0.55, + "learning_rate": 4.5024012617473275e-06, + "loss": 0.5937, + "step": 4354 + }, + { + "epoch": 0.55, + "learning_rate": 4.500381142237557e-06, + "loss": 0.587, + "step": 4355 + }, + { + "epoch": 0.55, + "learning_rate": 4.498361105102395e-06, + "loss": 0.5157, + "step": 4356 + }, + { + "epoch": 0.55, + "learning_rate": 4.496341150674896e-06, + "loss": 0.525, + "step": 4357 + }, + { + "epoch": 0.55, + "learning_rate": 4.494321279288101e-06, + "loss": 0.5259, + "step": 4358 + }, + { + "epoch": 0.55, + "learning_rate": 4.492301491275033e-06, + "loss": 0.5631, + "step": 4359 + }, + { + "epoch": 0.55, + "learning_rate": 4.490281786968706e-06, + "loss": 0.5624, + "step": 4360 + }, + { + "epoch": 0.55, + "learning_rate": 4.488262166702119e-06, + "loss": 0.5285, + "step": 4361 + }, + { + "epoch": 0.55, + "learning_rate": 4.486242630808256e-06, + "loss": 0.618, + "step": 4362 + }, + { + "epoch": 0.55, + "learning_rate": 4.484223179620088e-06, + "loss": 0.4984, + "step": 4363 + }, + { + "epoch": 0.55, + "learning_rate": 4.482203813470573e-06, + "loss": 0.5174, + "step": 4364 + }, + { + "epoch": 0.55, + "learning_rate": 4.480184532692651e-06, + "loss": 0.5607, + "step": 4365 + }, + { + "epoch": 0.55, + "learning_rate": 4.478165337619253e-06, + "loss": 0.5249, + "step": 4366 + }, + { + "epoch": 0.55, + "learning_rate": 4.476146228583291e-06, + "loss": 0.5465, + "step": 4367 + }, + { + "epoch": 0.55, + "learning_rate": 4.47412720591767e-06, + "loss": 0.4771, + "step": 4368 + }, + { + "epoch": 0.55, + "learning_rate": 4.472108269955273e-06, + "loss": 0.558, + "step": 4369 + }, + { + "epoch": 0.55, + "learning_rate": 4.470089421028972e-06, + "loss": 0.5381, + "step": 4370 + }, + { + "epoch": 0.55, + "learning_rate": 4.468070659471624e-06, + "loss": 0.5515, + "step": 4371 + }, + { + "epoch": 0.55, + "learning_rate": 4.466051985616073e-06, + "loss": 0.6043, + "step": 4372 + }, + { + "epoch": 0.55, + "learning_rate": 4.464033399795148e-06, + "loss": 0.228, + "step": 4373 + }, + { + "epoch": 0.55, + "learning_rate": 4.462014902341664e-06, + "loss": 0.5432, + "step": 4374 + }, + { + "epoch": 0.55, + "learning_rate": 4.459996493588418e-06, + "loss": 0.5405, + "step": 4375 + }, + { + "epoch": 0.55, + "learning_rate": 4.457978173868196e-06, + "loss": 0.499, + "step": 4376 + }, + { + "epoch": 0.55, + "learning_rate": 4.455959943513768e-06, + "loss": 0.5641, + "step": 4377 + }, + { + "epoch": 0.55, + "learning_rate": 4.4539418028578915e-06, + "loss": 0.4707, + "step": 4378 + }, + { + "epoch": 0.55, + "learning_rate": 4.4519237522333045e-06, + "loss": 0.5537, + "step": 4379 + }, + { + "epoch": 0.55, + "learning_rate": 4.449905791972736e-06, + "loss": 0.5237, + "step": 4380 + }, + { + "epoch": 0.55, + "learning_rate": 4.4478879224088935e-06, + "loss": 0.5067, + "step": 4381 + }, + { + "epoch": 0.55, + "learning_rate": 4.445870143874476e-06, + "loss": 0.6143, + "step": 4382 + }, + { + "epoch": 0.55, + "learning_rate": 4.443852456702161e-06, + "loss": 0.5235, + "step": 4383 + }, + { + "epoch": 0.55, + "learning_rate": 4.4418348612246184e-06, + "loss": 0.6316, + "step": 4384 + }, + { + "epoch": 0.55, + "learning_rate": 4.4398173577744986e-06, + "loss": 0.5397, + "step": 4385 + }, + { + "epoch": 0.55, + "learning_rate": 4.437799946684434e-06, + "loss": 0.6015, + "step": 4386 + }, + { + "epoch": 0.55, + "learning_rate": 4.435782628287047e-06, + "loss": 0.4628, + "step": 4387 + }, + { + "epoch": 0.55, + "learning_rate": 4.433765402914943e-06, + "loss": 0.4807, + "step": 4388 + }, + { + "epoch": 0.55, + "learning_rate": 4.431748270900712e-06, + "loss": 0.4938, + "step": 4389 + }, + { + "epoch": 0.55, + "learning_rate": 4.42973123257693e-06, + "loss": 0.549, + "step": 4390 + }, + { + "epoch": 0.55, + "learning_rate": 4.427714288276152e-06, + "loss": 0.5472, + "step": 4391 + }, + { + "epoch": 0.55, + "learning_rate": 4.425697438330924e-06, + "loss": 0.4546, + "step": 4392 + }, + { + "epoch": 0.55, + "learning_rate": 4.423680683073772e-06, + "loss": 0.4795, + "step": 4393 + }, + { + "epoch": 0.55, + "learning_rate": 4.421664022837212e-06, + "loss": 0.4936, + "step": 4394 + }, + { + "epoch": 0.55, + "learning_rate": 4.419647457953738e-06, + "loss": 0.5282, + "step": 4395 + }, + { + "epoch": 0.55, + "learning_rate": 4.417630988755833e-06, + "loss": 0.4567, + "step": 4396 + }, + { + "epoch": 0.55, + "learning_rate": 4.4156146155759586e-06, + "loss": 0.5088, + "step": 4397 + }, + { + "epoch": 0.55, + "learning_rate": 4.413598338746566e-06, + "loss": 0.5296, + "step": 4398 + }, + { + "epoch": 0.55, + "learning_rate": 4.411582158600089e-06, + "loss": 0.5187, + "step": 4399 + }, + { + "epoch": 0.55, + "learning_rate": 4.4095660754689445e-06, + "loss": 0.5017, + "step": 4400 + }, + { + "epoch": 0.55, + "learning_rate": 4.407550089685537e-06, + "loss": 0.4873, + "step": 4401 + }, + { + "epoch": 0.55, + "learning_rate": 4.405534201582247e-06, + "loss": 0.5195, + "step": 4402 + }, + { + "epoch": 0.55, + "learning_rate": 4.403518411491446e-06, + "loss": 0.535, + "step": 4403 + }, + { + "epoch": 0.55, + "learning_rate": 4.401502719745488e-06, + "loss": 0.6092, + "step": 4404 + }, + { + "epoch": 0.55, + "learning_rate": 4.399487126676709e-06, + "loss": 0.5039, + "step": 4405 + }, + { + "epoch": 0.55, + "learning_rate": 4.397471632617431e-06, + "loss": 0.5033, + "step": 4406 + }, + { + "epoch": 0.55, + "learning_rate": 4.395456237899957e-06, + "loss": 0.5321, + "step": 4407 + }, + { + "epoch": 0.55, + "learning_rate": 4.393440942856576e-06, + "loss": 0.496, + "step": 4408 + }, + { + "epoch": 0.55, + "learning_rate": 4.391425747819558e-06, + "loss": 0.4983, + "step": 4409 + }, + { + "epoch": 0.55, + "learning_rate": 4.389410653121159e-06, + "loss": 0.4793, + "step": 4410 + }, + { + "epoch": 0.55, + "learning_rate": 4.387395659093618e-06, + "loss": 0.551, + "step": 4411 + }, + { + "epoch": 0.55, + "learning_rate": 4.385380766069157e-06, + "loss": 0.5629, + "step": 4412 + }, + { + "epoch": 0.55, + "learning_rate": 4.3833659743799805e-06, + "loss": 0.4836, + "step": 4413 + }, + { + "epoch": 0.55, + "learning_rate": 4.381351284358278e-06, + "loss": 0.5335, + "step": 4414 + }, + { + "epoch": 0.55, + "learning_rate": 4.37933669633622e-06, + "loss": 0.5717, + "step": 4415 + }, + { + "epoch": 0.55, + "learning_rate": 4.377322210645962e-06, + "loss": 0.5245, + "step": 4416 + }, + { + "epoch": 0.55, + "learning_rate": 4.3753078276196435e-06, + "loss": 0.561, + "step": 4417 + }, + { + "epoch": 0.55, + "learning_rate": 4.373293547589383e-06, + "loss": 0.2332, + "step": 4418 + }, + { + "epoch": 0.55, + "learning_rate": 4.371279370887288e-06, + "loss": 0.5314, + "step": 4419 + }, + { + "epoch": 0.55, + "learning_rate": 4.3692652978454435e-06, + "loss": 0.2177, + "step": 4420 + }, + { + "epoch": 0.55, + "learning_rate": 4.36725132879592e-06, + "loss": 0.5263, + "step": 4421 + }, + { + "epoch": 0.55, + "learning_rate": 4.365237464070771e-06, + "loss": 0.5144, + "step": 4422 + }, + { + "epoch": 0.55, + "learning_rate": 4.36322370400203e-06, + "loss": 0.5714, + "step": 4423 + }, + { + "epoch": 0.55, + "learning_rate": 4.361210048921718e-06, + "loss": 0.5739, + "step": 4424 + }, + { + "epoch": 0.55, + "learning_rate": 4.359196499161836e-06, + "loss": 0.5235, + "step": 4425 + }, + { + "epoch": 0.55, + "learning_rate": 4.357183055054366e-06, + "loss": 0.5267, + "step": 4426 + }, + { + "epoch": 0.55, + "learning_rate": 4.355169716931278e-06, + "loss": 0.5055, + "step": 4427 + }, + { + "epoch": 0.56, + "learning_rate": 4.353156485124518e-06, + "loss": 0.5548, + "step": 4428 + }, + { + "epoch": 0.56, + "learning_rate": 4.3511433599660165e-06, + "loss": 0.551, + "step": 4429 + }, + { + "epoch": 0.56, + "learning_rate": 4.349130341787689e-06, + "loss": 0.5574, + "step": 4430 + }, + { + "epoch": 0.56, + "learning_rate": 4.347117430921432e-06, + "loss": 0.5812, + "step": 4431 + }, + { + "epoch": 0.56, + "learning_rate": 4.345104627699124e-06, + "loss": 0.5215, + "step": 4432 + }, + { + "epoch": 0.56, + "learning_rate": 4.343091932452626e-06, + "loss": 0.5018, + "step": 4433 + }, + { + "epoch": 0.56, + "learning_rate": 4.341079345513779e-06, + "loss": 0.5202, + "step": 4434 + }, + { + "epoch": 0.56, + "learning_rate": 4.339066867214408e-06, + "loss": 0.5035, + "step": 4435 + }, + { + "epoch": 0.56, + "learning_rate": 4.337054497886322e-06, + "loss": 0.5472, + "step": 4436 + }, + { + "epoch": 0.56, + "learning_rate": 4.335042237861309e-06, + "loss": 0.1979, + "step": 4437 + }, + { + "epoch": 0.56, + "learning_rate": 4.33303008747114e-06, + "loss": 0.5358, + "step": 4438 + }, + { + "epoch": 0.56, + "learning_rate": 4.331018047047572e-06, + "loss": 0.1996, + "step": 4439 + }, + { + "epoch": 0.56, + "learning_rate": 4.329006116922335e-06, + "loss": 0.5229, + "step": 4440 + }, + { + "epoch": 0.56, + "learning_rate": 4.326994297427147e-06, + "loss": 0.5935, + "step": 4441 + }, + { + "epoch": 0.56, + "learning_rate": 4.324982588893707e-06, + "loss": 0.5493, + "step": 4442 + }, + { + "epoch": 0.56, + "learning_rate": 4.322970991653695e-06, + "loss": 0.5339, + "step": 4443 + }, + { + "epoch": 0.56, + "learning_rate": 4.320959506038776e-06, + "loss": 0.4847, + "step": 4444 + }, + { + "epoch": 0.56, + "learning_rate": 4.318948132380588e-06, + "loss": 0.239, + "step": 4445 + }, + { + "epoch": 0.56, + "learning_rate": 4.31693687101076e-06, + "loss": 0.5912, + "step": 4446 + }, + { + "epoch": 0.56, + "learning_rate": 4.314925722260895e-06, + "loss": 0.5572, + "step": 4447 + }, + { + "epoch": 0.56, + "learning_rate": 4.312914686462585e-06, + "loss": 0.5402, + "step": 4448 + }, + { + "epoch": 0.56, + "learning_rate": 4.310903763947399e-06, + "loss": 0.5546, + "step": 4449 + }, + { + "epoch": 0.56, + "learning_rate": 4.308892955046884e-06, + "loss": 0.4944, + "step": 4450 + }, + { + "epoch": 0.56, + "learning_rate": 4.306882260092574e-06, + "loss": 0.4806, + "step": 4451 + }, + { + "epoch": 0.56, + "learning_rate": 4.3048716794159825e-06, + "loss": 0.5234, + "step": 4452 + }, + { + "epoch": 0.56, + "learning_rate": 4.302861213348603e-06, + "loss": 0.5835, + "step": 4453 + }, + { + "epoch": 0.56, + "learning_rate": 4.300850862221912e-06, + "loss": 0.5634, + "step": 4454 + }, + { + "epoch": 0.56, + "learning_rate": 4.298840626367367e-06, + "loss": 0.5673, + "step": 4455 + }, + { + "epoch": 0.56, + "learning_rate": 4.296830506116401e-06, + "loss": 0.543, + "step": 4456 + }, + { + "epoch": 0.56, + "learning_rate": 4.2948205018004365e-06, + "loss": 0.4699, + "step": 4457 + }, + { + "epoch": 0.56, + "learning_rate": 4.292810613750871e-06, + "loss": 0.5975, + "step": 4458 + }, + { + "epoch": 0.56, + "learning_rate": 4.290800842299085e-06, + "loss": 0.554, + "step": 4459 + }, + { + "epoch": 0.56, + "learning_rate": 4.288791187776441e-06, + "loss": 0.5246, + "step": 4460 + }, + { + "epoch": 0.56, + "learning_rate": 4.286781650514277e-06, + "loss": 0.5645, + "step": 4461 + }, + { + "epoch": 0.56, + "learning_rate": 4.284772230843918e-06, + "loss": 0.5319, + "step": 4462 + }, + { + "epoch": 0.56, + "learning_rate": 4.282762929096665e-06, + "loss": 0.5361, + "step": 4463 + }, + { + "epoch": 0.56, + "learning_rate": 4.280753745603803e-06, + "loss": 0.5438, + "step": 4464 + }, + { + "epoch": 0.56, + "learning_rate": 4.278744680696596e-06, + "loss": 0.5292, + "step": 4465 + }, + { + "epoch": 0.56, + "learning_rate": 4.2767357347062886e-06, + "loss": 0.4971, + "step": 4466 + }, + { + "epoch": 0.56, + "learning_rate": 4.274726907964103e-06, + "loss": 0.4887, + "step": 4467 + }, + { + "epoch": 0.56, + "learning_rate": 4.2727182008012455e-06, + "loss": 0.4671, + "step": 4468 + }, + { + "epoch": 0.56, + "learning_rate": 4.270709613548903e-06, + "loss": 0.5521, + "step": 4469 + }, + { + "epoch": 0.56, + "learning_rate": 4.26870114653824e-06, + "loss": 0.4997, + "step": 4470 + }, + { + "epoch": 0.56, + "learning_rate": 4.266692800100404e-06, + "loss": 0.4922, + "step": 4471 + }, + { + "epoch": 0.56, + "learning_rate": 4.264684574566518e-06, + "loss": 0.5547, + "step": 4472 + }, + { + "epoch": 0.56, + "learning_rate": 4.262676470267691e-06, + "loss": 0.5601, + "step": 4473 + }, + { + "epoch": 0.56, + "learning_rate": 4.260668487535005e-06, + "loss": 0.5416, + "step": 4474 + }, + { + "epoch": 0.56, + "learning_rate": 4.2586606266995295e-06, + "loss": 0.5719, + "step": 4475 + }, + { + "epoch": 0.56, + "learning_rate": 4.25665288809231e-06, + "loss": 0.5566, + "step": 4476 + }, + { + "epoch": 0.56, + "learning_rate": 4.25464527204437e-06, + "loss": 0.5123, + "step": 4477 + }, + { + "epoch": 0.56, + "learning_rate": 4.252637778886717e-06, + "loss": 0.4944, + "step": 4478 + }, + { + "epoch": 0.56, + "learning_rate": 4.250630408950334e-06, + "loss": 0.5424, + "step": 4479 + }, + { + "epoch": 0.56, + "learning_rate": 4.248623162566191e-06, + "loss": 0.5124, + "step": 4480 + }, + { + "epoch": 0.56, + "learning_rate": 4.246616040065227e-06, + "loss": 0.4489, + "step": 4481 + }, + { + "epoch": 0.56, + "learning_rate": 4.244609041778368e-06, + "loss": 0.5094, + "step": 4482 + }, + { + "epoch": 0.56, + "learning_rate": 4.242602168036518e-06, + "loss": 0.5266, + "step": 4483 + }, + { + "epoch": 0.56, + "learning_rate": 4.24059541917056e-06, + "loss": 0.5834, + "step": 4484 + }, + { + "epoch": 0.56, + "learning_rate": 4.238588795511357e-06, + "loss": 0.5619, + "step": 4485 + }, + { + "epoch": 0.56, + "learning_rate": 4.236582297389749e-06, + "loss": 0.5113, + "step": 4486 + }, + { + "epoch": 0.56, + "learning_rate": 4.234575925136562e-06, + "loss": 0.5382, + "step": 4487 + }, + { + "epoch": 0.56, + "learning_rate": 4.23256967908259e-06, + "loss": 0.5096, + "step": 4488 + }, + { + "epoch": 0.56, + "learning_rate": 4.230563559558616e-06, + "loss": 0.548, + "step": 4489 + }, + { + "epoch": 0.56, + "learning_rate": 4.228557566895399e-06, + "loss": 0.5659, + "step": 4490 + }, + { + "epoch": 0.56, + "learning_rate": 4.226551701423674e-06, + "loss": 0.5582, + "step": 4491 + }, + { + "epoch": 0.56, + "learning_rate": 4.224545963474164e-06, + "loss": 0.5096, + "step": 4492 + }, + { + "epoch": 0.56, + "learning_rate": 4.222540353377558e-06, + "loss": 0.5211, + "step": 4493 + }, + { + "epoch": 0.56, + "learning_rate": 4.2205348714645345e-06, + "loss": 0.5162, + "step": 4494 + }, + { + "epoch": 0.56, + "learning_rate": 4.218529518065745e-06, + "loss": 0.5406, + "step": 4495 + }, + { + "epoch": 0.56, + "learning_rate": 4.216524293511825e-06, + "loss": 0.4781, + "step": 4496 + }, + { + "epoch": 0.56, + "learning_rate": 4.214519198133384e-06, + "loss": 0.5104, + "step": 4497 + }, + { + "epoch": 0.56, + "learning_rate": 4.212514232261011e-06, + "loss": 0.4968, + "step": 4498 + }, + { + "epoch": 0.56, + "learning_rate": 4.2105093962252756e-06, + "loss": 0.5325, + "step": 4499 + }, + { + "epoch": 0.56, + "learning_rate": 4.208504690356724e-06, + "loss": 0.5354, + "step": 4500 + }, + { + "epoch": 0.56, + "learning_rate": 4.2065001149858825e-06, + "loss": 0.5437, + "step": 4501 + }, + { + "epoch": 0.56, + "learning_rate": 4.204495670443256e-06, + "loss": 0.5226, + "step": 4502 + }, + { + "epoch": 0.56, + "learning_rate": 4.202491357059328e-06, + "loss": 0.4977, + "step": 4503 + }, + { + "epoch": 0.56, + "learning_rate": 4.200487175164556e-06, + "loss": 0.5883, + "step": 4504 + }, + { + "epoch": 0.56, + "learning_rate": 4.198483125089381e-06, + "loss": 0.4944, + "step": 4505 + }, + { + "epoch": 0.56, + "learning_rate": 4.196479207164221e-06, + "loss": 0.4986, + "step": 4506 + }, + { + "epoch": 0.56, + "learning_rate": 4.194475421719471e-06, + "loss": 0.5664, + "step": 4507 + }, + { + "epoch": 0.57, + "learning_rate": 4.1924717690855085e-06, + "loss": 0.5062, + "step": 4508 + }, + { + "epoch": 0.57, + "learning_rate": 4.190468249592681e-06, + "loss": 0.553, + "step": 4509 + }, + { + "epoch": 0.57, + "learning_rate": 4.1884648635713186e-06, + "loss": 0.5193, + "step": 4510 + }, + { + "epoch": 0.57, + "learning_rate": 4.1864616113517315e-06, + "loss": 0.5045, + "step": 4511 + }, + { + "epoch": 0.57, + "learning_rate": 4.184458493264204e-06, + "loss": 0.4979, + "step": 4512 + }, + { + "epoch": 0.57, + "learning_rate": 4.182455509639002e-06, + "loss": 0.5696, + "step": 4513 + }, + { + "epoch": 0.57, + "learning_rate": 4.180452660806369e-06, + "loss": 0.5412, + "step": 4514 + }, + { + "epoch": 0.57, + "learning_rate": 4.178449947096519e-06, + "loss": 0.516, + "step": 4515 + }, + { + "epoch": 0.57, + "learning_rate": 4.176447368839652e-06, + "loss": 0.4936, + "step": 4516 + }, + { + "epoch": 0.57, + "learning_rate": 4.174444926365943e-06, + "loss": 0.5663, + "step": 4517 + }, + { + "epoch": 0.57, + "learning_rate": 4.172442620005543e-06, + "loss": 0.5794, + "step": 4518 + }, + { + "epoch": 0.57, + "learning_rate": 4.170440450088588e-06, + "loss": 0.5517, + "step": 4519 + }, + { + "epoch": 0.57, + "learning_rate": 4.168438416945177e-06, + "loss": 0.5289, + "step": 4520 + }, + { + "epoch": 0.57, + "learning_rate": 4.166436520905399e-06, + "loss": 0.5898, + "step": 4521 + }, + { + "epoch": 0.57, + "learning_rate": 4.164434762299318e-06, + "loss": 0.53, + "step": 4522 + }, + { + "epoch": 0.57, + "learning_rate": 4.162433141456971e-06, + "loss": 0.4777, + "step": 4523 + }, + { + "epoch": 0.57, + "learning_rate": 4.160431658708378e-06, + "loss": 0.5632, + "step": 4524 + }, + { + "epoch": 0.57, + "learning_rate": 4.15843031438353e-06, + "loss": 0.5138, + "step": 4525 + }, + { + "epoch": 0.57, + "learning_rate": 4.156429108812401e-06, + "loss": 0.5895, + "step": 4526 + }, + { + "epoch": 0.57, + "learning_rate": 4.154428042324937e-06, + "loss": 0.4805, + "step": 4527 + }, + { + "epoch": 0.57, + "learning_rate": 4.152427115251066e-06, + "loss": 0.5185, + "step": 4528 + }, + { + "epoch": 0.57, + "learning_rate": 4.150426327920689e-06, + "loss": 0.5629, + "step": 4529 + }, + { + "epoch": 0.57, + "learning_rate": 4.148425680663688e-06, + "loss": 0.5436, + "step": 4530 + }, + { + "epoch": 0.57, + "learning_rate": 4.146425173809917e-06, + "loss": 0.5549, + "step": 4531 + }, + { + "epoch": 0.57, + "learning_rate": 4.14442480768921e-06, + "loss": 0.4941, + "step": 4532 + }, + { + "epoch": 0.57, + "learning_rate": 4.142424582631378e-06, + "loss": 0.5035, + "step": 4533 + }, + { + "epoch": 0.57, + "learning_rate": 4.140424498966207e-06, + "loss": 0.4576, + "step": 4534 + }, + { + "epoch": 0.57, + "learning_rate": 4.138424557023462e-06, + "loss": 0.5483, + "step": 4535 + }, + { + "epoch": 0.57, + "learning_rate": 4.1364247571328815e-06, + "loss": 0.5862, + "step": 4536 + }, + { + "epoch": 0.57, + "learning_rate": 4.134425099624182e-06, + "loss": 0.5227, + "step": 4537 + }, + { + "epoch": 0.57, + "learning_rate": 4.132425584827059e-06, + "loss": 0.5009, + "step": 4538 + }, + { + "epoch": 0.57, + "learning_rate": 4.13042621307118e-06, + "loss": 0.5661, + "step": 4539 + }, + { + "epoch": 0.57, + "learning_rate": 4.1284269846861956e-06, + "loss": 0.5347, + "step": 4540 + }, + { + "epoch": 0.57, + "learning_rate": 4.126427900001721e-06, + "loss": 0.2202, + "step": 4541 + }, + { + "epoch": 0.57, + "learning_rate": 4.124428959347361e-06, + "loss": 0.5236, + "step": 4542 + }, + { + "epoch": 0.57, + "learning_rate": 4.122430163052689e-06, + "loss": 0.6241, + "step": 4543 + }, + { + "epoch": 0.57, + "learning_rate": 4.1204315114472555e-06, + "loss": 0.4819, + "step": 4544 + }, + { + "epoch": 0.57, + "learning_rate": 4.118433004860589e-06, + "loss": 0.5252, + "step": 4545 + }, + { + "epoch": 0.57, + "learning_rate": 4.116434643622195e-06, + "loss": 0.5474, + "step": 4546 + }, + { + "epoch": 0.57, + "learning_rate": 4.114436428061549e-06, + "loss": 0.5663, + "step": 4547 + }, + { + "epoch": 0.57, + "learning_rate": 4.112438358508108e-06, + "loss": 0.5623, + "step": 4548 + }, + { + "epoch": 0.57, + "learning_rate": 4.1104404352913055e-06, + "loss": 0.5232, + "step": 4549 + }, + { + "epoch": 0.57, + "learning_rate": 4.1084426587405455e-06, + "loss": 0.2239, + "step": 4550 + }, + { + "epoch": 0.57, + "learning_rate": 4.106445029185216e-06, + "loss": 0.5761, + "step": 4551 + }, + { + "epoch": 0.57, + "learning_rate": 4.104447546954672e-06, + "loss": 0.5466, + "step": 4552 + }, + { + "epoch": 0.57, + "learning_rate": 4.102450212378248e-06, + "loss": 0.4865, + "step": 4553 + }, + { + "epoch": 0.57, + "learning_rate": 4.100453025785257e-06, + "loss": 0.5429, + "step": 4554 + }, + { + "epoch": 0.57, + "learning_rate": 4.098455987504983e-06, + "loss": 0.5892, + "step": 4555 + }, + { + "epoch": 0.57, + "learning_rate": 4.09645909786669e-06, + "loss": 0.5282, + "step": 4556 + }, + { + "epoch": 0.57, + "learning_rate": 4.094462357199611e-06, + "loss": 0.5638, + "step": 4557 + }, + { + "epoch": 0.57, + "learning_rate": 4.092465765832961e-06, + "loss": 0.5639, + "step": 4558 + }, + { + "epoch": 0.57, + "learning_rate": 4.090469324095927e-06, + "loss": 0.5397, + "step": 4559 + }, + { + "epoch": 0.57, + "learning_rate": 4.0884730323176716e-06, + "loss": 0.5351, + "step": 4560 + }, + { + "epoch": 0.57, + "learning_rate": 4.086476890827335e-06, + "loss": 0.5823, + "step": 4561 + }, + { + "epoch": 0.57, + "learning_rate": 4.084480899954032e-06, + "loss": 0.468, + "step": 4562 + }, + { + "epoch": 0.57, + "learning_rate": 4.082485060026847e-06, + "loss": 0.5704, + "step": 4563 + }, + { + "epoch": 0.57, + "learning_rate": 4.080489371374846e-06, + "loss": 0.6298, + "step": 4564 + }, + { + "epoch": 0.57, + "learning_rate": 4.078493834327068e-06, + "loss": 0.5128, + "step": 4565 + }, + { + "epoch": 0.57, + "learning_rate": 4.076498449212527e-06, + "loss": 0.4915, + "step": 4566 + }, + { + "epoch": 0.57, + "learning_rate": 4.074503216360215e-06, + "loss": 0.2081, + "step": 4567 + }, + { + "epoch": 0.57, + "learning_rate": 4.0725081360990896e-06, + "loss": 0.5379, + "step": 4568 + }, + { + "epoch": 0.57, + "learning_rate": 4.0705132087580935e-06, + "loss": 0.5412, + "step": 4569 + }, + { + "epoch": 0.57, + "learning_rate": 4.068518434666139e-06, + "loss": 0.5252, + "step": 4570 + }, + { + "epoch": 0.57, + "learning_rate": 4.0665238141521146e-06, + "loss": 0.5167, + "step": 4571 + }, + { + "epoch": 0.57, + "learning_rate": 4.064529347544886e-06, + "loss": 0.525, + "step": 4572 + }, + { + "epoch": 0.57, + "learning_rate": 4.062535035173285e-06, + "loss": 0.5089, + "step": 4573 + }, + { + "epoch": 0.57, + "learning_rate": 4.060540877366127e-06, + "loss": 0.5382, + "step": 4574 + }, + { + "epoch": 0.57, + "learning_rate": 4.058546874452198e-06, + "loss": 0.4821, + "step": 4575 + }, + { + "epoch": 0.57, + "learning_rate": 4.0565530267602586e-06, + "loss": 0.5445, + "step": 4576 + }, + { + "epoch": 0.57, + "learning_rate": 4.054559334619046e-06, + "loss": 0.4635, + "step": 4577 + }, + { + "epoch": 0.57, + "learning_rate": 4.052565798357269e-06, + "loss": 0.5439, + "step": 4578 + }, + { + "epoch": 0.57, + "learning_rate": 4.050572418303611e-06, + "loss": 0.581, + "step": 4579 + }, + { + "epoch": 0.57, + "learning_rate": 4.0485791947867295e-06, + "loss": 0.5385, + "step": 4580 + }, + { + "epoch": 0.57, + "learning_rate": 4.046586128135259e-06, + "loss": 0.558, + "step": 4581 + }, + { + "epoch": 0.57, + "learning_rate": 4.044593218677806e-06, + "loss": 0.5213, + "step": 4582 + }, + { + "epoch": 0.57, + "learning_rate": 4.042600466742951e-06, + "loss": 0.4682, + "step": 4583 + }, + { + "epoch": 0.57, + "learning_rate": 4.040607872659248e-06, + "loss": 0.5101, + "step": 4584 + }, + { + "epoch": 0.57, + "learning_rate": 4.038615436755226e-06, + "loss": 0.556, + "step": 4585 + }, + { + "epoch": 0.57, + "learning_rate": 4.036623159359389e-06, + "loss": 0.5497, + "step": 4586 + }, + { + "epoch": 0.57, + "learning_rate": 4.034631040800211e-06, + "loss": 0.598, + "step": 4587 + }, + { + "epoch": 0.58, + "learning_rate": 4.032639081406145e-06, + "loss": 0.5975, + "step": 4588 + }, + { + "epoch": 0.58, + "learning_rate": 4.030647281505613e-06, + "loss": 0.522, + "step": 4589 + }, + { + "epoch": 0.58, + "learning_rate": 4.028655641427014e-06, + "loss": 0.2498, + "step": 4590 + }, + { + "epoch": 0.58, + "learning_rate": 4.026664161498719e-06, + "loss": 0.5876, + "step": 4591 + }, + { + "epoch": 0.58, + "learning_rate": 4.024672842049074e-06, + "loss": 0.543, + "step": 4592 + }, + { + "epoch": 0.58, + "learning_rate": 4.022681683406397e-06, + "loss": 0.5177, + "step": 4593 + }, + { + "epoch": 0.58, + "learning_rate": 4.02069068589898e-06, + "loss": 0.5446, + "step": 4594 + }, + { + "epoch": 0.58, + "learning_rate": 4.018699849855088e-06, + "loss": 0.529, + "step": 4595 + }, + { + "epoch": 0.58, + "learning_rate": 4.0167091756029595e-06, + "loss": 0.5337, + "step": 4596 + }, + { + "epoch": 0.58, + "learning_rate": 4.014718663470808e-06, + "loss": 0.5403, + "step": 4597 + }, + { + "epoch": 0.58, + "learning_rate": 4.0127283137868185e-06, + "loss": 0.5071, + "step": 4598 + }, + { + "epoch": 0.58, + "learning_rate": 4.010738126879152e-06, + "loss": 0.5607, + "step": 4599 + }, + { + "epoch": 0.58, + "learning_rate": 4.008748103075936e-06, + "loss": 0.5249, + "step": 4600 + }, + { + "epoch": 0.58, + "learning_rate": 4.006758242705277e-06, + "loss": 0.5411, + "step": 4601 + }, + { + "epoch": 0.58, + "learning_rate": 4.004768546095254e-06, + "loss": 0.539, + "step": 4602 + }, + { + "epoch": 0.58, + "learning_rate": 4.002779013573917e-06, + "loss": 0.5091, + "step": 4603 + }, + { + "epoch": 0.58, + "learning_rate": 4.000789645469292e-06, + "loss": 0.5627, + "step": 4604 + }, + { + "epoch": 0.58, + "learning_rate": 3.998800442109376e-06, + "loss": 0.528, + "step": 4605 + }, + { + "epoch": 0.58, + "learning_rate": 3.996811403822135e-06, + "loss": 0.5123, + "step": 4606 + }, + { + "epoch": 0.58, + "learning_rate": 3.994822530935513e-06, + "loss": 0.5663, + "step": 4607 + }, + { + "epoch": 0.58, + "learning_rate": 3.992833823777427e-06, + "loss": 0.4853, + "step": 4608 + }, + { + "epoch": 0.58, + "learning_rate": 3.9908452826757634e-06, + "loss": 0.2279, + "step": 4609 + }, + { + "epoch": 0.58, + "learning_rate": 3.988856907958385e-06, + "loss": 0.5462, + "step": 4610 + }, + { + "epoch": 0.58, + "learning_rate": 3.986868699953121e-06, + "loss": 0.5045, + "step": 4611 + }, + { + "epoch": 0.58, + "learning_rate": 3.984880658987779e-06, + "loss": 0.6173, + "step": 4612 + }, + { + "epoch": 0.58, + "learning_rate": 3.9828927853901374e-06, + "loss": 0.5289, + "step": 4613 + }, + { + "epoch": 0.58, + "learning_rate": 3.980905079487946e-06, + "loss": 0.5406, + "step": 4614 + }, + { + "epoch": 0.58, + "learning_rate": 3.9789175416089304e-06, + "loss": 0.5482, + "step": 4615 + }, + { + "epoch": 0.58, + "learning_rate": 3.9769301720807804e-06, + "loss": 0.2356, + "step": 4616 + }, + { + "epoch": 0.58, + "learning_rate": 3.974942971231167e-06, + "loss": 0.6211, + "step": 4617 + }, + { + "epoch": 0.58, + "learning_rate": 3.972955939387728e-06, + "loss": 0.5855, + "step": 4618 + }, + { + "epoch": 0.58, + "learning_rate": 3.9709690768780765e-06, + "loss": 0.485, + "step": 4619 + }, + { + "epoch": 0.58, + "learning_rate": 3.968982384029796e-06, + "loss": 0.5768, + "step": 4620 + }, + { + "epoch": 0.58, + "learning_rate": 3.966995861170444e-06, + "loss": 0.5233, + "step": 4621 + }, + { + "epoch": 0.58, + "learning_rate": 3.965009508627544e-06, + "loss": 0.2516, + "step": 4622 + }, + { + "epoch": 0.58, + "learning_rate": 3.963023326728598e-06, + "loss": 0.5553, + "step": 4623 + }, + { + "epoch": 0.58, + "learning_rate": 3.961037315801079e-06, + "loss": 0.5181, + "step": 4624 + }, + { + "epoch": 0.58, + "learning_rate": 3.959051476172427e-06, + "loss": 0.5341, + "step": 4625 + }, + { + "epoch": 0.58, + "learning_rate": 3.957065808170062e-06, + "loss": 0.5271, + "step": 4626 + }, + { + "epoch": 0.58, + "learning_rate": 3.955080312121365e-06, + "loss": 0.5203, + "step": 4627 + }, + { + "epoch": 0.58, + "learning_rate": 3.953094988353698e-06, + "loss": 0.4949, + "step": 4628 + }, + { + "epoch": 0.58, + "learning_rate": 3.951109837194389e-06, + "loss": 0.524, + "step": 4629 + }, + { + "epoch": 0.58, + "learning_rate": 3.949124858970742e-06, + "loss": 0.6105, + "step": 4630 + }, + { + "epoch": 0.58, + "learning_rate": 3.94714005401003e-06, + "loss": 0.4758, + "step": 4631 + }, + { + "epoch": 0.58, + "learning_rate": 3.9451554226394965e-06, + "loss": 0.4623, + "step": 4632 + }, + { + "epoch": 0.58, + "learning_rate": 3.943170965186357e-06, + "loss": 0.5088, + "step": 4633 + }, + { + "epoch": 0.58, + "learning_rate": 3.941186681977799e-06, + "loss": 0.5154, + "step": 4634 + }, + { + "epoch": 0.58, + "learning_rate": 3.93920257334098e-06, + "loss": 0.5363, + "step": 4635 + }, + { + "epoch": 0.58, + "learning_rate": 3.937218639603031e-06, + "loss": 0.5196, + "step": 4636 + }, + { + "epoch": 0.58, + "learning_rate": 3.935234881091054e-06, + "loss": 0.5445, + "step": 4637 + }, + { + "epoch": 0.58, + "learning_rate": 3.93325129813212e-06, + "loss": 0.5365, + "step": 4638 + }, + { + "epoch": 0.58, + "learning_rate": 3.931267891053272e-06, + "loss": 0.543, + "step": 4639 + }, + { + "epoch": 0.58, + "learning_rate": 3.929284660181522e-06, + "loss": 0.5252, + "step": 4640 + }, + { + "epoch": 0.58, + "learning_rate": 3.927301605843858e-06, + "loss": 0.5325, + "step": 4641 + }, + { + "epoch": 0.58, + "learning_rate": 3.925318728367235e-06, + "loss": 0.2383, + "step": 4642 + }, + { + "epoch": 0.58, + "learning_rate": 3.923336028078578e-06, + "loss": 0.5643, + "step": 4643 + }, + { + "epoch": 0.58, + "learning_rate": 3.921353505304787e-06, + "loss": 0.4834, + "step": 4644 + }, + { + "epoch": 0.58, + "learning_rate": 3.919371160372729e-06, + "loss": 0.563, + "step": 4645 + }, + { + "epoch": 0.58, + "learning_rate": 3.917388993609244e-06, + "loss": 0.5397, + "step": 4646 + }, + { + "epoch": 0.58, + "learning_rate": 3.91540700534114e-06, + "loss": 0.543, + "step": 4647 + }, + { + "epoch": 0.58, + "learning_rate": 3.913425195895197e-06, + "loss": 0.5038, + "step": 4648 + }, + { + "epoch": 0.58, + "learning_rate": 3.911443565598166e-06, + "loss": 0.5342, + "step": 4649 + }, + { + "epoch": 0.58, + "learning_rate": 3.90946211477677e-06, + "loss": 0.6098, + "step": 4650 + }, + { + "epoch": 0.58, + "learning_rate": 3.907480843757698e-06, + "loss": 0.5335, + "step": 4651 + }, + { + "epoch": 0.58, + "learning_rate": 3.9054997528676125e-06, + "loss": 0.5338, + "step": 4652 + }, + { + "epoch": 0.58, + "learning_rate": 3.9035188424331484e-06, + "loss": 0.5151, + "step": 4653 + }, + { + "epoch": 0.58, + "learning_rate": 3.901538112780903e-06, + "loss": 0.5455, + "step": 4654 + }, + { + "epoch": 0.58, + "learning_rate": 3.8995575642374535e-06, + "loss": 0.5066, + "step": 4655 + }, + { + "epoch": 0.58, + "learning_rate": 3.897577197129339e-06, + "loss": 0.4705, + "step": 4656 + }, + { + "epoch": 0.58, + "learning_rate": 3.895597011783075e-06, + "loss": 0.5367, + "step": 4657 + }, + { + "epoch": 0.58, + "learning_rate": 3.893617008525145e-06, + "loss": 0.5869, + "step": 4658 + }, + { + "epoch": 0.58, + "learning_rate": 3.891637187682e-06, + "loss": 0.5576, + "step": 4659 + }, + { + "epoch": 0.58, + "learning_rate": 3.889657549580061e-06, + "loss": 0.5545, + "step": 4660 + }, + { + "epoch": 0.58, + "learning_rate": 3.8876780945457234e-06, + "loss": 0.5861, + "step": 4661 + }, + { + "epoch": 0.58, + "learning_rate": 3.8856988229053496e-06, + "loss": 0.4854, + "step": 4662 + }, + { + "epoch": 0.58, + "learning_rate": 3.88371973498527e-06, + "loss": 0.5066, + "step": 4663 + }, + { + "epoch": 0.58, + "learning_rate": 3.881740831111791e-06, + "loss": 0.545, + "step": 4664 + }, + { + "epoch": 0.58, + "learning_rate": 3.8797621116111775e-06, + "loss": 0.487, + "step": 4665 + }, + { + "epoch": 0.58, + "learning_rate": 3.877783576809675e-06, + "loss": 0.5633, + "step": 4666 + }, + { + "epoch": 0.59, + "learning_rate": 3.875805227033493e-06, + "loss": 0.4797, + "step": 4667 + }, + { + "epoch": 0.59, + "learning_rate": 3.873827062608812e-06, + "loss": 0.2125, + "step": 4668 + }, + { + "epoch": 0.59, + "learning_rate": 3.871849083861784e-06, + "loss": 0.5664, + "step": 4669 + }, + { + "epoch": 0.59, + "learning_rate": 3.869871291118523e-06, + "loss": 0.4898, + "step": 4670 + }, + { + "epoch": 0.59, + "learning_rate": 3.8678936847051204e-06, + "loss": 0.5342, + "step": 4671 + }, + { + "epoch": 0.59, + "learning_rate": 3.865916264947634e-06, + "loss": 0.5655, + "step": 4672 + }, + { + "epoch": 0.59, + "learning_rate": 3.86393903217209e-06, + "loss": 0.5514, + "step": 4673 + }, + { + "epoch": 0.59, + "learning_rate": 3.861961986704487e-06, + "loss": 0.5586, + "step": 4674 + }, + { + "epoch": 0.59, + "learning_rate": 3.859985128870785e-06, + "loss": 0.5149, + "step": 4675 + }, + { + "epoch": 0.59, + "learning_rate": 3.8580084589969215e-06, + "loss": 0.5745, + "step": 4676 + }, + { + "epoch": 0.59, + "learning_rate": 3.8560319774088e-06, + "loss": 0.4521, + "step": 4677 + }, + { + "epoch": 0.59, + "learning_rate": 3.8540556844322916e-06, + "loss": 0.4648, + "step": 4678 + }, + { + "epoch": 0.59, + "learning_rate": 3.8520795803932376e-06, + "loss": 0.5839, + "step": 4679 + }, + { + "epoch": 0.59, + "learning_rate": 3.8501036656174504e-06, + "loss": 0.5796, + "step": 4680 + }, + { + "epoch": 0.59, + "learning_rate": 3.848127940430705e-06, + "loss": 0.5132, + "step": 4681 + }, + { + "epoch": 0.59, + "learning_rate": 3.84615240515875e-06, + "loss": 0.5203, + "step": 4682 + }, + { + "epoch": 0.59, + "learning_rate": 3.844177060127303e-06, + "loss": 0.5587, + "step": 4683 + }, + { + "epoch": 0.59, + "learning_rate": 3.842201905662047e-06, + "loss": 0.4814, + "step": 4684 + }, + { + "epoch": 0.59, + "learning_rate": 3.8402269420886375e-06, + "loss": 0.5368, + "step": 4685 + }, + { + "epoch": 0.59, + "learning_rate": 3.838252169732694e-06, + "loss": 0.5809, + "step": 4686 + }, + { + "epoch": 0.59, + "learning_rate": 3.836277588919809e-06, + "loss": 0.5488, + "step": 4687 + }, + { + "epoch": 0.59, + "learning_rate": 3.834303199975539e-06, + "loss": 0.5266, + "step": 4688 + }, + { + "epoch": 0.59, + "learning_rate": 3.832329003225413e-06, + "loss": 0.5794, + "step": 4689 + }, + { + "epoch": 0.59, + "learning_rate": 3.830354998994928e-06, + "loss": 0.5029, + "step": 4690 + }, + { + "epoch": 0.59, + "learning_rate": 3.828381187609544e-06, + "loss": 0.5602, + "step": 4691 + }, + { + "epoch": 0.59, + "learning_rate": 3.8264075693946955e-06, + "loss": 0.5421, + "step": 4692 + }, + { + "epoch": 0.59, + "learning_rate": 3.824434144675782e-06, + "loss": 0.5844, + "step": 4693 + }, + { + "epoch": 0.59, + "learning_rate": 3.82246091377817e-06, + "loss": 0.5108, + "step": 4694 + }, + { + "epoch": 0.59, + "learning_rate": 3.820487877027198e-06, + "loss": 0.4976, + "step": 4695 + }, + { + "epoch": 0.59, + "learning_rate": 3.818515034748171e-06, + "loss": 0.5006, + "step": 4696 + }, + { + "epoch": 0.59, + "learning_rate": 3.816542387266358e-06, + "loss": 0.5071, + "step": 4697 + }, + { + "epoch": 0.59, + "learning_rate": 3.8145699349070015e-06, + "loss": 0.5373, + "step": 4698 + }, + { + "epoch": 0.59, + "learning_rate": 3.8125976779953095e-06, + "loss": 0.5112, + "step": 4699 + }, + { + "epoch": 0.59, + "learning_rate": 3.810625616856456e-06, + "loss": 0.4424, + "step": 4700 + }, + { + "epoch": 0.59, + "learning_rate": 3.8086537518155857e-06, + "loss": 0.5643, + "step": 4701 + }, + { + "epoch": 0.59, + "learning_rate": 3.806682083197809e-06, + "loss": 0.5535, + "step": 4702 + }, + { + "epoch": 0.59, + "learning_rate": 3.804710611328204e-06, + "loss": 0.5257, + "step": 4703 + }, + { + "epoch": 0.59, + "learning_rate": 3.8027393365318177e-06, + "loss": 0.5562, + "step": 4704 + }, + { + "epoch": 0.59, + "learning_rate": 3.8007682591336654e-06, + "loss": 0.2124, + "step": 4705 + }, + { + "epoch": 0.59, + "learning_rate": 3.7987973794587262e-06, + "loss": 0.5064, + "step": 4706 + }, + { + "epoch": 0.59, + "learning_rate": 3.796826697831949e-06, + "loss": 0.5169, + "step": 4707 + }, + { + "epoch": 0.59, + "learning_rate": 3.794856214578249e-06, + "loss": 0.5217, + "step": 4708 + }, + { + "epoch": 0.59, + "learning_rate": 3.792885930022511e-06, + "loss": 0.6012, + "step": 4709 + }, + { + "epoch": 0.59, + "learning_rate": 3.7909158444895845e-06, + "loss": 0.6084, + "step": 4710 + }, + { + "epoch": 0.59, + "learning_rate": 3.788945958304287e-06, + "loss": 0.5488, + "step": 4711 + }, + { + "epoch": 0.59, + "learning_rate": 3.7869762717914065e-06, + "loss": 0.5511, + "step": 4712 + }, + { + "epoch": 0.59, + "learning_rate": 3.78500678527569e-06, + "loss": 0.557, + "step": 4713 + }, + { + "epoch": 0.59, + "learning_rate": 3.7830374990818588e-06, + "loss": 0.512, + "step": 4714 + }, + { + "epoch": 0.59, + "learning_rate": 3.781068413534598e-06, + "loss": 0.5122, + "step": 4715 + }, + { + "epoch": 0.59, + "learning_rate": 3.779099528958561e-06, + "loss": 0.4698, + "step": 4716 + }, + { + "epoch": 0.59, + "learning_rate": 3.7771308456783694e-06, + "loss": 0.6007, + "step": 4717 + }, + { + "epoch": 0.59, + "learning_rate": 3.7751623640186052e-06, + "loss": 0.5444, + "step": 4718 + }, + { + "epoch": 0.59, + "learning_rate": 3.7731940843038246e-06, + "loss": 0.543, + "step": 4719 + }, + { + "epoch": 0.59, + "learning_rate": 3.7712260068585468e-06, + "loss": 0.5539, + "step": 4720 + }, + { + "epoch": 0.59, + "learning_rate": 3.7692581320072585e-06, + "loss": 0.4757, + "step": 4721 + }, + { + "epoch": 0.59, + "learning_rate": 3.767290460074415e-06, + "loss": 0.4999, + "step": 4722 + }, + { + "epoch": 0.59, + "learning_rate": 3.7653229913844312e-06, + "loss": 0.5387, + "step": 4723 + }, + { + "epoch": 0.59, + "learning_rate": 3.7633557262616967e-06, + "loss": 0.5062, + "step": 4724 + }, + { + "epoch": 0.59, + "learning_rate": 3.761388665030563e-06, + "loss": 0.5511, + "step": 4725 + }, + { + "epoch": 0.59, + "learning_rate": 3.75942180801535e-06, + "loss": 0.5008, + "step": 4726 + }, + { + "epoch": 0.59, + "learning_rate": 3.7574551555403423e-06, + "loss": 0.5065, + "step": 4727 + }, + { + "epoch": 0.59, + "learning_rate": 3.755488707929794e-06, + "loss": 0.496, + "step": 4728 + }, + { + "epoch": 0.59, + "learning_rate": 3.7535224655079184e-06, + "loss": 0.5822, + "step": 4729 + }, + { + "epoch": 0.59, + "learning_rate": 3.7515564285989024e-06, + "loss": 0.5574, + "step": 4730 + }, + { + "epoch": 0.59, + "learning_rate": 3.7495905975268953e-06, + "loss": 0.5757, + "step": 4731 + }, + { + "epoch": 0.59, + "learning_rate": 3.747624972616014e-06, + "loss": 0.4949, + "step": 4732 + }, + { + "epoch": 0.59, + "learning_rate": 3.7456595541903423e-06, + "loss": 0.4908, + "step": 4733 + }, + { + "epoch": 0.59, + "learning_rate": 3.743694342573925e-06, + "loss": 0.5123, + "step": 4734 + }, + { + "epoch": 0.59, + "learning_rate": 3.741729338090776e-06, + "loss": 0.6108, + "step": 4735 + }, + { + "epoch": 0.59, + "learning_rate": 3.7397645410648783e-06, + "loss": 0.5392, + "step": 4736 + }, + { + "epoch": 0.59, + "learning_rate": 3.737799951820175e-06, + "loss": 0.5349, + "step": 4737 + }, + { + "epoch": 0.59, + "learning_rate": 3.7358355706805795e-06, + "loss": 0.582, + "step": 4738 + }, + { + "epoch": 0.59, + "learning_rate": 3.7338713979699703e-06, + "loss": 0.5735, + "step": 4739 + }, + { + "epoch": 0.59, + "learning_rate": 3.7319074340121853e-06, + "loss": 0.6002, + "step": 4740 + }, + { + "epoch": 0.59, + "learning_rate": 3.7299436791310362e-06, + "loss": 0.5625, + "step": 4741 + }, + { + "epoch": 0.59, + "learning_rate": 3.727980133650296e-06, + "loss": 0.5126, + "step": 4742 + }, + { + "epoch": 0.59, + "learning_rate": 3.726016797893704e-06, + "loss": 0.5932, + "step": 4743 + }, + { + "epoch": 0.59, + "learning_rate": 3.7240536721849667e-06, + "loss": 0.5095, + "step": 4744 + }, + { + "epoch": 0.59, + "learning_rate": 3.7220907568477528e-06, + "loss": 0.5681, + "step": 4745 + }, + { + "epoch": 0.59, + "learning_rate": 3.720128052205697e-06, + "loss": 0.5489, + "step": 4746 + }, + { + "epoch": 0.6, + "learning_rate": 3.7181655585824004e-06, + "loss": 0.5718, + "step": 4747 + }, + { + "epoch": 0.6, + "learning_rate": 3.71620327630143e-06, + "loss": 0.5317, + "step": 4748 + }, + { + "epoch": 0.6, + "learning_rate": 3.7142412056863176e-06, + "loss": 0.5089, + "step": 4749 + }, + { + "epoch": 0.6, + "learning_rate": 3.7122793470605562e-06, + "loss": 0.508, + "step": 4750 + }, + { + "epoch": 0.6, + "learning_rate": 3.71031770074761e-06, + "loss": 0.5206, + "step": 4751 + }, + { + "epoch": 0.6, + "learning_rate": 3.708356267070904e-06, + "loss": 0.5002, + "step": 4752 + }, + { + "epoch": 0.6, + "learning_rate": 3.7063950463538288e-06, + "loss": 0.5606, + "step": 4753 + }, + { + "epoch": 0.6, + "learning_rate": 3.704434038919742e-06, + "loss": 0.5388, + "step": 4754 + }, + { + "epoch": 0.6, + "learning_rate": 3.7024732450919644e-06, + "loss": 0.56, + "step": 4755 + }, + { + "epoch": 0.6, + "learning_rate": 3.7005126651937796e-06, + "loss": 0.5947, + "step": 4756 + }, + { + "epoch": 0.6, + "learning_rate": 3.6985522995484398e-06, + "loss": 0.5124, + "step": 4757 + }, + { + "epoch": 0.6, + "learning_rate": 3.6965921484791604e-06, + "loss": 0.5831, + "step": 4758 + }, + { + "epoch": 0.6, + "learning_rate": 3.69463221230912e-06, + "loss": 0.5543, + "step": 4759 + }, + { + "epoch": 0.6, + "learning_rate": 3.692672491361464e-06, + "loss": 0.4837, + "step": 4760 + }, + { + "epoch": 0.6, + "learning_rate": 3.6907129859593e-06, + "loss": 0.5475, + "step": 4761 + }, + { + "epoch": 0.6, + "learning_rate": 3.688753696425701e-06, + "loss": 0.557, + "step": 4762 + }, + { + "epoch": 0.6, + "learning_rate": 3.686794623083706e-06, + "loss": 0.5535, + "step": 4763 + }, + { + "epoch": 0.6, + "learning_rate": 3.6848357662563166e-06, + "loss": 0.5216, + "step": 4764 + }, + { + "epoch": 0.6, + "learning_rate": 3.682877126266501e-06, + "loss": 0.5577, + "step": 4765 + }, + { + "epoch": 0.6, + "learning_rate": 3.680918703437185e-06, + "loss": 0.4946, + "step": 4766 + }, + { + "epoch": 0.6, + "learning_rate": 3.6789604980912673e-06, + "loss": 0.5245, + "step": 4767 + }, + { + "epoch": 0.6, + "learning_rate": 3.677002510551605e-06, + "loss": 0.5452, + "step": 4768 + }, + { + "epoch": 0.6, + "learning_rate": 3.6750447411410216e-06, + "loss": 0.5435, + "step": 4769 + }, + { + "epoch": 0.6, + "learning_rate": 3.6730871901823047e-06, + "loss": 0.5609, + "step": 4770 + }, + { + "epoch": 0.6, + "learning_rate": 3.671129857998207e-06, + "loss": 0.5551, + "step": 4771 + }, + { + "epoch": 0.6, + "learning_rate": 3.669172744911439e-06, + "loss": 0.5349, + "step": 4772 + }, + { + "epoch": 0.6, + "learning_rate": 3.6672158512446822e-06, + "loss": 0.5204, + "step": 4773 + }, + { + "epoch": 0.6, + "learning_rate": 3.665259177320578e-06, + "loss": 0.5143, + "step": 4774 + }, + { + "epoch": 0.6, + "learning_rate": 3.6633027234617346e-06, + "loss": 0.5611, + "step": 4775 + }, + { + "epoch": 0.6, + "learning_rate": 3.6613464899907236e-06, + "loss": 0.4969, + "step": 4776 + }, + { + "epoch": 0.6, + "learning_rate": 3.6593904772300738e-06, + "loss": 0.5365, + "step": 4777 + }, + { + "epoch": 0.6, + "learning_rate": 3.6574346855022853e-06, + "loss": 0.5542, + "step": 4778 + }, + { + "epoch": 0.6, + "learning_rate": 3.655479115129819e-06, + "loss": 0.5256, + "step": 4779 + }, + { + "epoch": 0.6, + "learning_rate": 3.6535237664351e-06, + "loss": 0.5562, + "step": 4780 + }, + { + "epoch": 0.6, + "learning_rate": 3.651568639740517e-06, + "loss": 0.5547, + "step": 4781 + }, + { + "epoch": 0.6, + "learning_rate": 3.6496137353684184e-06, + "loss": 0.5381, + "step": 4782 + }, + { + "epoch": 0.6, + "learning_rate": 3.6476590536411203e-06, + "loss": 0.5114, + "step": 4783 + }, + { + "epoch": 0.6, + "learning_rate": 3.645704594880901e-06, + "loss": 0.5402, + "step": 4784 + }, + { + "epoch": 0.6, + "learning_rate": 3.643750359410001e-06, + "loss": 0.504, + "step": 4785 + }, + { + "epoch": 0.6, + "learning_rate": 3.6417963475506256e-06, + "loss": 0.5708, + "step": 4786 + }, + { + "epoch": 0.6, + "learning_rate": 3.639842559624944e-06, + "loss": 0.4834, + "step": 4787 + }, + { + "epoch": 0.6, + "learning_rate": 3.637888995955082e-06, + "loss": 0.4784, + "step": 4788 + }, + { + "epoch": 0.6, + "learning_rate": 3.6359356568631367e-06, + "loss": 0.5504, + "step": 4789 + }, + { + "epoch": 0.6, + "learning_rate": 3.633982542671163e-06, + "loss": 0.6195, + "step": 4790 + }, + { + "epoch": 0.6, + "learning_rate": 3.632029653701182e-06, + "loss": 0.4968, + "step": 4791 + }, + { + "epoch": 0.6, + "learning_rate": 3.6300769902751764e-06, + "loss": 0.5137, + "step": 4792 + }, + { + "epoch": 0.6, + "learning_rate": 3.6281245527150895e-06, + "loss": 0.4904, + "step": 4793 + }, + { + "epoch": 0.6, + "learning_rate": 3.6261723413428294e-06, + "loss": 0.5786, + "step": 4794 + }, + { + "epoch": 0.6, + "learning_rate": 3.6242203564802674e-06, + "loss": 0.5429, + "step": 4795 + }, + { + "epoch": 0.6, + "learning_rate": 3.622268598449237e-06, + "loss": 0.4646, + "step": 4796 + }, + { + "epoch": 0.6, + "learning_rate": 3.620317067571534e-06, + "loss": 0.535, + "step": 4797 + }, + { + "epoch": 0.6, + "learning_rate": 3.6183657641689185e-06, + "loss": 0.5708, + "step": 4798 + }, + { + "epoch": 0.6, + "learning_rate": 3.616414688563108e-06, + "loss": 0.5082, + "step": 4799 + }, + { + "epoch": 0.6, + "learning_rate": 3.614463841075788e-06, + "loss": 0.5274, + "step": 4800 + }, + { + "epoch": 0.6, + "learning_rate": 3.612513222028603e-06, + "loss": 0.2125, + "step": 4801 + }, + { + "epoch": 0.6, + "learning_rate": 3.6105628317431627e-06, + "loss": 0.4984, + "step": 4802 + }, + { + "epoch": 0.6, + "learning_rate": 3.6086126705410386e-06, + "loss": 0.4585, + "step": 4803 + }, + { + "epoch": 0.6, + "learning_rate": 3.6066627387437604e-06, + "loss": 0.4524, + "step": 4804 + }, + { + "epoch": 0.6, + "learning_rate": 3.6047130366728235e-06, + "loss": 0.5483, + "step": 4805 + }, + { + "epoch": 0.6, + "learning_rate": 3.6027635646496857e-06, + "loss": 0.4929, + "step": 4806 + }, + { + "epoch": 0.6, + "learning_rate": 3.6008143229957655e-06, + "loss": 0.5555, + "step": 4807 + }, + { + "epoch": 0.6, + "learning_rate": 3.598865312032445e-06, + "loss": 0.5842, + "step": 4808 + }, + { + "epoch": 0.6, + "learning_rate": 3.5969165320810658e-06, + "loss": 0.5563, + "step": 4809 + }, + { + "epoch": 0.6, + "learning_rate": 3.594967983462933e-06, + "loss": 0.5115, + "step": 4810 + }, + { + "epoch": 0.6, + "learning_rate": 3.593019666499315e-06, + "loss": 0.5434, + "step": 4811 + }, + { + "epoch": 0.6, + "learning_rate": 3.5910715815114376e-06, + "loss": 0.5192, + "step": 4812 + }, + { + "epoch": 0.6, + "learning_rate": 3.5891237288204935e-06, + "loss": 0.5331, + "step": 4813 + }, + { + "epoch": 0.6, + "learning_rate": 3.5871761087476344e-06, + "loss": 0.5338, + "step": 4814 + }, + { + "epoch": 0.6, + "learning_rate": 3.585228721613972e-06, + "loss": 0.4681, + "step": 4815 + }, + { + "epoch": 0.6, + "learning_rate": 3.583281567740583e-06, + "loss": 0.4313, + "step": 4816 + }, + { + "epoch": 0.6, + "learning_rate": 3.5813346474485043e-06, + "loss": 0.4994, + "step": 4817 + }, + { + "epoch": 0.6, + "learning_rate": 3.5793879610587356e-06, + "loss": 0.5153, + "step": 4818 + }, + { + "epoch": 0.6, + "learning_rate": 3.5774415088922345e-06, + "loss": 0.5362, + "step": 4819 + }, + { + "epoch": 0.6, + "learning_rate": 3.5754952912699213e-06, + "loss": 0.5228, + "step": 4820 + }, + { + "epoch": 0.6, + "learning_rate": 3.5735493085126806e-06, + "loss": 0.5784, + "step": 4821 + }, + { + "epoch": 0.6, + "learning_rate": 3.5716035609413548e-06, + "loss": 0.2445, + "step": 4822 + }, + { + "epoch": 0.6, + "learning_rate": 3.5696580488767496e-06, + "loss": 0.5756, + "step": 4823 + }, + { + "epoch": 0.6, + "learning_rate": 3.5677127726396326e-06, + "loss": 0.5617, + "step": 4824 + }, + { + "epoch": 0.6, + "learning_rate": 3.5657677325507278e-06, + "loss": 0.5187, + "step": 4825 + }, + { + "epoch": 0.6, + "learning_rate": 3.5638229289307247e-06, + "loss": 0.5148, + "step": 4826 + }, + { + "epoch": 0.61, + "learning_rate": 3.5618783621002724e-06, + "loss": 0.4837, + "step": 4827 + }, + { + "epoch": 0.61, + "learning_rate": 3.5599340323799817e-06, + "loss": 0.4706, + "step": 4828 + }, + { + "epoch": 0.61, + "learning_rate": 3.5579899400904246e-06, + "loss": 0.5578, + "step": 4829 + }, + { + "epoch": 0.61, + "learning_rate": 3.556046085552134e-06, + "loss": 0.4907, + "step": 4830 + }, + { + "epoch": 0.61, + "learning_rate": 3.5541024690855986e-06, + "loss": 0.5388, + "step": 4831 + }, + { + "epoch": 0.61, + "learning_rate": 3.5521590910112736e-06, + "loss": 0.5065, + "step": 4832 + }, + { + "epoch": 0.61, + "learning_rate": 3.550215951649575e-06, + "loss": 0.4809, + "step": 4833 + }, + { + "epoch": 0.61, + "learning_rate": 3.548273051320876e-06, + "loss": 0.5332, + "step": 4834 + }, + { + "epoch": 0.61, + "learning_rate": 3.546330390345516e-06, + "loss": 0.5238, + "step": 4835 + }, + { + "epoch": 0.61, + "learning_rate": 3.544387969043785e-06, + "loss": 0.5688, + "step": 4836 + }, + { + "epoch": 0.61, + "learning_rate": 3.5424457877359424e-06, + "loss": 0.538, + "step": 4837 + }, + { + "epoch": 0.61, + "learning_rate": 3.540503846742205e-06, + "loss": 0.5678, + "step": 4838 + }, + { + "epoch": 0.61, + "learning_rate": 3.53856214638275e-06, + "loss": 0.5072, + "step": 4839 + }, + { + "epoch": 0.61, + "learning_rate": 3.5366206869777187e-06, + "loss": 0.5415, + "step": 4840 + }, + { + "epoch": 0.61, + "learning_rate": 3.534679468847202e-06, + "loss": 0.6065, + "step": 4841 + }, + { + "epoch": 0.61, + "learning_rate": 3.532738492311262e-06, + "loss": 0.5294, + "step": 4842 + }, + { + "epoch": 0.61, + "learning_rate": 3.530797757689917e-06, + "loss": 0.6059, + "step": 4843 + }, + { + "epoch": 0.61, + "learning_rate": 3.528857265303145e-06, + "loss": 0.4771, + "step": 4844 + }, + { + "epoch": 0.61, + "learning_rate": 3.5269170154708847e-06, + "loss": 0.5239, + "step": 4845 + }, + { + "epoch": 0.61, + "learning_rate": 3.524977008513036e-06, + "loss": 0.5208, + "step": 4846 + }, + { + "epoch": 0.61, + "learning_rate": 3.5230372447494547e-06, + "loss": 0.5103, + "step": 4847 + }, + { + "epoch": 0.61, + "learning_rate": 3.52109772449996e-06, + "loss": 0.5337, + "step": 4848 + }, + { + "epoch": 0.61, + "learning_rate": 3.51915844808433e-06, + "loss": 0.5739, + "step": 4849 + }, + { + "epoch": 0.61, + "learning_rate": 3.517219415822303e-06, + "loss": 0.5446, + "step": 4850 + }, + { + "epoch": 0.61, + "learning_rate": 3.5152806280335794e-06, + "loss": 0.4613, + "step": 4851 + }, + { + "epoch": 0.61, + "learning_rate": 3.513342085037812e-06, + "loss": 0.51, + "step": 4852 + }, + { + "epoch": 0.61, + "learning_rate": 3.5114037871546198e-06, + "loss": 0.5192, + "step": 4853 + }, + { + "epoch": 0.61, + "learning_rate": 3.50946573470358e-06, + "loss": 0.5092, + "step": 4854 + }, + { + "epoch": 0.61, + "learning_rate": 3.5075279280042275e-06, + "loss": 0.554, + "step": 4855 + }, + { + "epoch": 0.61, + "learning_rate": 3.5055903673760604e-06, + "loss": 0.5061, + "step": 4856 + }, + { + "epoch": 0.61, + "learning_rate": 3.5036530531385325e-06, + "loss": 0.5028, + "step": 4857 + }, + { + "epoch": 0.61, + "learning_rate": 3.5017159856110573e-06, + "loss": 0.5427, + "step": 4858 + }, + { + "epoch": 0.61, + "learning_rate": 3.49977916511301e-06, + "loss": 0.5123, + "step": 4859 + }, + { + "epoch": 0.61, + "learning_rate": 3.4978425919637217e-06, + "loss": 0.5464, + "step": 4860 + }, + { + "epoch": 0.61, + "learning_rate": 3.4959062664824873e-06, + "loss": 0.4923, + "step": 4861 + }, + { + "epoch": 0.61, + "learning_rate": 3.4939701889885575e-06, + "loss": 0.5468, + "step": 4862 + }, + { + "epoch": 0.61, + "learning_rate": 3.492034359801142e-06, + "loss": 0.498, + "step": 4863 + }, + { + "epoch": 0.61, + "learning_rate": 3.4900987792394115e-06, + "loss": 0.5289, + "step": 4864 + }, + { + "epoch": 0.61, + "learning_rate": 3.4881634476224934e-06, + "loss": 0.5435, + "step": 4865 + }, + { + "epoch": 0.61, + "learning_rate": 3.486228365269476e-06, + "loss": 0.4953, + "step": 4866 + }, + { + "epoch": 0.61, + "learning_rate": 3.4842935324994065e-06, + "loss": 0.5612, + "step": 4867 + }, + { + "epoch": 0.61, + "learning_rate": 3.4823589496312894e-06, + "loss": 0.5255, + "step": 4868 + }, + { + "epoch": 0.61, + "learning_rate": 3.4804246169840885e-06, + "loss": 0.5868, + "step": 4869 + }, + { + "epoch": 0.61, + "learning_rate": 3.4784905348767265e-06, + "loss": 0.5203, + "step": 4870 + }, + { + "epoch": 0.61, + "learning_rate": 3.4765567036280877e-06, + "loss": 0.4698, + "step": 4871 + }, + { + "epoch": 0.61, + "learning_rate": 3.4746231235570096e-06, + "loss": 0.5153, + "step": 4872 + }, + { + "epoch": 0.61, + "learning_rate": 3.4726897949822912e-06, + "loss": 0.5231, + "step": 4873 + }, + { + "epoch": 0.61, + "learning_rate": 3.470756718222691e-06, + "loss": 0.2135, + "step": 4874 + }, + { + "epoch": 0.61, + "learning_rate": 3.468823893596924e-06, + "loss": 0.4734, + "step": 4875 + }, + { + "epoch": 0.61, + "learning_rate": 3.4668913214236642e-06, + "loss": 0.2545, + "step": 4876 + }, + { + "epoch": 0.61, + "learning_rate": 3.464959002021545e-06, + "loss": 0.5191, + "step": 4877 + }, + { + "epoch": 0.61, + "learning_rate": 3.4630269357091594e-06, + "loss": 0.5319, + "step": 4878 + }, + { + "epoch": 0.61, + "learning_rate": 3.461095122805053e-06, + "loss": 0.5021, + "step": 4879 + }, + { + "epoch": 0.61, + "learning_rate": 3.459163563627734e-06, + "loss": 0.5152, + "step": 4880 + }, + { + "epoch": 0.61, + "learning_rate": 3.4572322584956685e-06, + "loss": 0.5319, + "step": 4881 + }, + { + "epoch": 0.61, + "learning_rate": 3.4553012077272807e-06, + "loss": 0.4928, + "step": 4882 + }, + { + "epoch": 0.61, + "learning_rate": 3.453370411640954e-06, + "loss": 0.6075, + "step": 4883 + }, + { + "epoch": 0.61, + "learning_rate": 3.4514398705550244e-06, + "loss": 0.4834, + "step": 4884 + }, + { + "epoch": 0.61, + "learning_rate": 3.4495095847877913e-06, + "loss": 0.5144, + "step": 4885 + }, + { + "epoch": 0.61, + "learning_rate": 3.447579554657511e-06, + "loss": 0.2172, + "step": 4886 + }, + { + "epoch": 0.61, + "learning_rate": 3.445649780482397e-06, + "loss": 0.5754, + "step": 4887 + }, + { + "epoch": 0.61, + "learning_rate": 3.4437202625806197e-06, + "loss": 0.5936, + "step": 4888 + }, + { + "epoch": 0.61, + "learning_rate": 3.4417910012703116e-06, + "loss": 0.4927, + "step": 4889 + }, + { + "epoch": 0.61, + "learning_rate": 3.439861996869554e-06, + "loss": 0.548, + "step": 4890 + }, + { + "epoch": 0.61, + "learning_rate": 3.4379332496963944e-06, + "loss": 0.527, + "step": 4891 + }, + { + "epoch": 0.61, + "learning_rate": 3.436004760068834e-06, + "loss": 0.5611, + "step": 4892 + }, + { + "epoch": 0.61, + "learning_rate": 3.4340765283048326e-06, + "loss": 0.4265, + "step": 4893 + }, + { + "epoch": 0.61, + "learning_rate": 3.4321485547223098e-06, + "loss": 0.5635, + "step": 4894 + }, + { + "epoch": 0.61, + "learning_rate": 3.4302208396391356e-06, + "loss": 0.5052, + "step": 4895 + }, + { + "epoch": 0.61, + "learning_rate": 3.4282933833731435e-06, + "loss": 0.4713, + "step": 4896 + }, + { + "epoch": 0.61, + "learning_rate": 3.426366186242124e-06, + "loss": 0.514, + "step": 4897 + }, + { + "epoch": 0.61, + "learning_rate": 3.424439248563821e-06, + "loss": 0.5156, + "step": 4898 + }, + { + "epoch": 0.61, + "learning_rate": 3.4225125706559426e-06, + "loss": 0.5021, + "step": 4899 + }, + { + "epoch": 0.61, + "learning_rate": 3.4205861528361444e-06, + "loss": 0.4716, + "step": 4900 + }, + { + "epoch": 0.61, + "learning_rate": 3.4186599954220467e-06, + "loss": 0.6026, + "step": 4901 + }, + { + "epoch": 0.61, + "learning_rate": 3.4167340987312246e-06, + "loss": 0.5192, + "step": 4902 + }, + { + "epoch": 0.61, + "learning_rate": 3.4148084630812095e-06, + "loss": 0.5524, + "step": 4903 + }, + { + "epoch": 0.61, + "learning_rate": 3.4128830887894915e-06, + "loss": 0.5174, + "step": 4904 + }, + { + "epoch": 0.61, + "learning_rate": 3.4109579761735173e-06, + "loss": 0.5067, + "step": 4905 + }, + { + "epoch": 0.61, + "learning_rate": 3.4090331255506863e-06, + "loss": 0.5508, + "step": 4906 + }, + { + "epoch": 0.62, + "learning_rate": 3.4071085372383605e-06, + "loss": 0.4993, + "step": 4907 + }, + { + "epoch": 0.62, + "learning_rate": 3.4051842115538554e-06, + "loss": 0.4906, + "step": 4908 + }, + { + "epoch": 0.62, + "learning_rate": 3.403260148814444e-06, + "loss": 0.4846, + "step": 4909 + }, + { + "epoch": 0.62, + "learning_rate": 3.4013363493373584e-06, + "loss": 0.5161, + "step": 4910 + }, + { + "epoch": 0.62, + "learning_rate": 3.399412813439781e-06, + "loss": 0.5515, + "step": 4911 + }, + { + "epoch": 0.62, + "learning_rate": 3.3974895414388555e-06, + "loss": 0.4783, + "step": 4912 + }, + { + "epoch": 0.62, + "learning_rate": 3.3955665336516824e-06, + "loss": 0.559, + "step": 4913 + }, + { + "epoch": 0.62, + "learning_rate": 3.393643790395317e-06, + "loss": 0.4788, + "step": 4914 + }, + { + "epoch": 0.62, + "learning_rate": 3.391721311986772e-06, + "loss": 0.5711, + "step": 4915 + }, + { + "epoch": 0.62, + "learning_rate": 3.3897990987430152e-06, + "loss": 0.5385, + "step": 4916 + }, + { + "epoch": 0.62, + "learning_rate": 3.3878771509809714e-06, + "loss": 0.5596, + "step": 4917 + }, + { + "epoch": 0.62, + "learning_rate": 3.3859554690175204e-06, + "loss": 0.5462, + "step": 4918 + }, + { + "epoch": 0.62, + "learning_rate": 3.384034053169501e-06, + "loss": 0.5753, + "step": 4919 + }, + { + "epoch": 0.62, + "learning_rate": 3.3821129037537053e-06, + "loss": 0.536, + "step": 4920 + }, + { + "epoch": 0.62, + "learning_rate": 3.3801920210868844e-06, + "loss": 0.4789, + "step": 4921 + }, + { + "epoch": 0.62, + "learning_rate": 3.3782714054857415e-06, + "loss": 0.4775, + "step": 4922 + }, + { + "epoch": 0.62, + "learning_rate": 3.376351057266939e-06, + "loss": 0.4995, + "step": 4923 + }, + { + "epoch": 0.62, + "learning_rate": 3.374430976747094e-06, + "loss": 0.5534, + "step": 4924 + }, + { + "epoch": 0.62, + "learning_rate": 3.3725111642427793e-06, + "loss": 0.5555, + "step": 4925 + }, + { + "epoch": 0.62, + "learning_rate": 3.3705916200705244e-06, + "loss": 0.5161, + "step": 4926 + }, + { + "epoch": 0.62, + "learning_rate": 3.368672344546813e-06, + "loss": 0.5354, + "step": 4927 + }, + { + "epoch": 0.62, + "learning_rate": 3.3667533379880857e-06, + "loss": 0.5337, + "step": 4928 + }, + { + "epoch": 0.62, + "learning_rate": 3.364834600710739e-06, + "loss": 0.5209, + "step": 4929 + }, + { + "epoch": 0.62, + "learning_rate": 3.3629161330311242e-06, + "loss": 0.5307, + "step": 4930 + }, + { + "epoch": 0.62, + "learning_rate": 3.3609979352655515e-06, + "loss": 0.5501, + "step": 4931 + }, + { + "epoch": 0.62, + "learning_rate": 3.3590800077302777e-06, + "loss": 0.5391, + "step": 4932 + }, + { + "epoch": 0.62, + "learning_rate": 3.3571623507415242e-06, + "loss": 0.5108, + "step": 4933 + }, + { + "epoch": 0.62, + "learning_rate": 3.355244964615464e-06, + "loss": 0.4718, + "step": 4934 + }, + { + "epoch": 0.62, + "learning_rate": 3.3533278496682263e-06, + "loss": 0.5385, + "step": 4935 + }, + { + "epoch": 0.62, + "learning_rate": 3.351411006215895e-06, + "loss": 0.5488, + "step": 4936 + }, + { + "epoch": 0.62, + "learning_rate": 3.3494944345745117e-06, + "loss": 0.5223, + "step": 4937 + }, + { + "epoch": 0.62, + "learning_rate": 3.347578135060067e-06, + "loss": 0.545, + "step": 4938 + }, + { + "epoch": 0.62, + "learning_rate": 3.3456621079885115e-06, + "loss": 0.5536, + "step": 4939 + }, + { + "epoch": 0.62, + "learning_rate": 3.343746353675752e-06, + "loss": 0.5543, + "step": 4940 + }, + { + "epoch": 0.62, + "learning_rate": 3.341830872437646e-06, + "loss": 0.5574, + "step": 4941 + }, + { + "epoch": 0.62, + "learning_rate": 3.3399156645900123e-06, + "loss": 0.5552, + "step": 4942 + }, + { + "epoch": 0.62, + "learning_rate": 3.338000730448615e-06, + "loss": 0.4624, + "step": 4943 + }, + { + "epoch": 0.62, + "learning_rate": 3.3360860703291818e-06, + "loss": 0.4191, + "step": 4944 + }, + { + "epoch": 0.62, + "learning_rate": 3.3341716845473925e-06, + "loss": 0.4139, + "step": 4945 + }, + { + "epoch": 0.62, + "learning_rate": 3.3322575734188794e-06, + "loss": 0.5418, + "step": 4946 + }, + { + "epoch": 0.62, + "learning_rate": 3.330343737259233e-06, + "loss": 0.5178, + "step": 4947 + }, + { + "epoch": 0.62, + "learning_rate": 3.328430176383998e-06, + "loss": 0.5469, + "step": 4948 + }, + { + "epoch": 0.62, + "learning_rate": 3.3265168911086686e-06, + "loss": 0.5179, + "step": 4949 + }, + { + "epoch": 0.62, + "learning_rate": 3.3246038817487004e-06, + "loss": 0.5307, + "step": 4950 + }, + { + "epoch": 0.62, + "learning_rate": 3.3226911486195e-06, + "loss": 0.5491, + "step": 4951 + }, + { + "epoch": 0.62, + "learning_rate": 3.3207786920364294e-06, + "loss": 0.5235, + "step": 4952 + }, + { + "epoch": 0.62, + "learning_rate": 3.3188665123148055e-06, + "loss": 0.5203, + "step": 4953 + }, + { + "epoch": 0.62, + "learning_rate": 3.3169546097698964e-06, + "loss": 0.5473, + "step": 4954 + }, + { + "epoch": 0.62, + "learning_rate": 3.3150429847169275e-06, + "loss": 0.5788, + "step": 4955 + }, + { + "epoch": 0.62, + "learning_rate": 3.3131316374710786e-06, + "loss": 0.4819, + "step": 4956 + }, + { + "epoch": 0.62, + "learning_rate": 3.3112205683474824e-06, + "loss": 0.513, + "step": 4957 + }, + { + "epoch": 0.62, + "learning_rate": 3.3093097776612287e-06, + "loss": 0.5431, + "step": 4958 + }, + { + "epoch": 0.62, + "learning_rate": 3.3073992657273545e-06, + "loss": 0.5018, + "step": 4959 + }, + { + "epoch": 0.62, + "learning_rate": 3.305489032860858e-06, + "loss": 0.5713, + "step": 4960 + }, + { + "epoch": 0.62, + "learning_rate": 3.3035790793766874e-06, + "loss": 0.4759, + "step": 4961 + }, + { + "epoch": 0.62, + "learning_rate": 3.301669405589747e-06, + "loss": 0.5469, + "step": 4962 + }, + { + "epoch": 0.62, + "learning_rate": 3.2997600118148944e-06, + "loss": 0.4862, + "step": 4963 + }, + { + "epoch": 0.62, + "learning_rate": 3.297850898366942e-06, + "loss": 0.5346, + "step": 4964 + }, + { + "epoch": 0.62, + "learning_rate": 3.2959420655606504e-06, + "loss": 0.5736, + "step": 4965 + }, + { + "epoch": 0.62, + "learning_rate": 3.294033513710741e-06, + "loss": 0.5156, + "step": 4966 + }, + { + "epoch": 0.62, + "learning_rate": 3.2921252431318864e-06, + "loss": 0.5197, + "step": 4967 + }, + { + "epoch": 0.62, + "learning_rate": 3.2902172541387114e-06, + "loss": 0.5159, + "step": 4968 + }, + { + "epoch": 0.62, + "learning_rate": 3.288309547045796e-06, + "loss": 0.518, + "step": 4969 + }, + { + "epoch": 0.62, + "learning_rate": 3.2864021221676747e-06, + "loss": 0.5042, + "step": 4970 + }, + { + "epoch": 0.62, + "learning_rate": 3.2844949798188307e-06, + "loss": 0.5436, + "step": 4971 + }, + { + "epoch": 0.62, + "learning_rate": 3.282588120313706e-06, + "loss": 0.5215, + "step": 4972 + }, + { + "epoch": 0.62, + "learning_rate": 3.280681543966694e-06, + "loss": 0.5563, + "step": 4973 + }, + { + "epoch": 0.62, + "learning_rate": 3.278775251092141e-06, + "loss": 0.4978, + "step": 4974 + }, + { + "epoch": 0.62, + "learning_rate": 3.2768692420043456e-06, + "loss": 0.5138, + "step": 4975 + }, + { + "epoch": 0.62, + "learning_rate": 3.274963517017563e-06, + "loss": 0.4696, + "step": 4976 + }, + { + "epoch": 0.62, + "learning_rate": 3.273058076445999e-06, + "loss": 0.54, + "step": 4977 + }, + { + "epoch": 0.62, + "learning_rate": 3.2711529206038113e-06, + "loss": 0.5481, + "step": 4978 + }, + { + "epoch": 0.62, + "learning_rate": 3.2692480498051134e-06, + "loss": 0.5356, + "step": 4979 + }, + { + "epoch": 0.62, + "learning_rate": 3.267343464363971e-06, + "loss": 0.5358, + "step": 4980 + }, + { + "epoch": 0.62, + "learning_rate": 3.265439164594402e-06, + "loss": 0.2355, + "step": 4981 + }, + { + "epoch": 0.62, + "learning_rate": 3.2635351508103767e-06, + "loss": 0.4948, + "step": 4982 + }, + { + "epoch": 0.62, + "learning_rate": 3.2616314233258205e-06, + "loss": 0.4847, + "step": 4983 + }, + { + "epoch": 0.62, + "learning_rate": 3.2597279824546114e-06, + "loss": 0.5608, + "step": 4984 + }, + { + "epoch": 0.62, + "learning_rate": 3.257824828510577e-06, + "loss": 0.5036, + "step": 4985 + }, + { + "epoch": 0.62, + "learning_rate": 3.255921961807499e-06, + "loss": 0.5271, + "step": 4986 + }, + { + "epoch": 0.63, + "learning_rate": 3.254019382659114e-06, + "loss": 0.5391, + "step": 4987 + }, + { + "epoch": 0.63, + "learning_rate": 3.2521170913791082e-06, + "loss": 0.5214, + "step": 4988 + }, + { + "epoch": 0.63, + "learning_rate": 3.2502150882811234e-06, + "loss": 0.5356, + "step": 4989 + }, + { + "epoch": 0.63, + "learning_rate": 3.248313373678753e-06, + "loss": 0.5439, + "step": 4990 + }, + { + "epoch": 0.63, + "learning_rate": 3.2464119478855384e-06, + "loss": 0.2314, + "step": 4991 + }, + { + "epoch": 0.63, + "learning_rate": 3.244510811214979e-06, + "loss": 0.5422, + "step": 4992 + }, + { + "epoch": 0.63, + "learning_rate": 3.242609963980524e-06, + "loss": 0.5121, + "step": 4993 + }, + { + "epoch": 0.63, + "learning_rate": 3.2407094064955763e-06, + "loss": 0.5069, + "step": 4994 + }, + { + "epoch": 0.63, + "learning_rate": 3.2388091390734895e-06, + "loss": 0.5136, + "step": 4995 + }, + { + "epoch": 0.63, + "learning_rate": 3.2369091620275716e-06, + "loss": 0.4988, + "step": 4996 + }, + { + "epoch": 0.63, + "learning_rate": 3.235009475671079e-06, + "loss": 0.544, + "step": 4997 + }, + { + "epoch": 0.63, + "learning_rate": 3.233110080317222e-06, + "loss": 0.5763, + "step": 4998 + }, + { + "epoch": 0.63, + "learning_rate": 3.2312109762791644e-06, + "loss": 0.5679, + "step": 4999 + }, + { + "epoch": 0.63, + "learning_rate": 3.2293121638700205e-06, + "loss": 0.5164, + "step": 5000 + }, + { + "epoch": 0.63, + "learning_rate": 3.2274136434028593e-06, + "loss": 0.5405, + "step": 5001 + }, + { + "epoch": 0.63, + "learning_rate": 3.225515415190694e-06, + "loss": 0.4788, + "step": 5002 + }, + { + "epoch": 0.63, + "learning_rate": 3.223617479546498e-06, + "loss": 0.5494, + "step": 5003 + }, + { + "epoch": 0.63, + "learning_rate": 3.2217198367831925e-06, + "loss": 0.5096, + "step": 5004 + }, + { + "epoch": 0.63, + "learning_rate": 3.219822487213652e-06, + "loss": 0.5162, + "step": 5005 + }, + { + "epoch": 0.63, + "learning_rate": 3.2179254311507026e-06, + "loss": 0.4528, + "step": 5006 + }, + { + "epoch": 0.63, + "learning_rate": 3.2160286689071185e-06, + "loss": 0.585, + "step": 5007 + }, + { + "epoch": 0.63, + "learning_rate": 3.2141322007956293e-06, + "loss": 0.5702, + "step": 5008 + }, + { + "epoch": 0.63, + "learning_rate": 3.2122360271289153e-06, + "loss": 0.57, + "step": 5009 + }, + { + "epoch": 0.63, + "learning_rate": 3.2103401482196085e-06, + "loss": 0.5674, + "step": 5010 + }, + { + "epoch": 0.63, + "learning_rate": 3.20844456438029e-06, + "loss": 0.5092, + "step": 5011 + }, + { + "epoch": 0.63, + "learning_rate": 3.2065492759234983e-06, + "loss": 0.5045, + "step": 5012 + }, + { + "epoch": 0.63, + "learning_rate": 3.2046542831617133e-06, + "loss": 0.5501, + "step": 5013 + }, + { + "epoch": 0.63, + "learning_rate": 3.202759586407374e-06, + "loss": 0.4946, + "step": 5014 + }, + { + "epoch": 0.63, + "learning_rate": 3.200865185972869e-06, + "loss": 0.5113, + "step": 5015 + }, + { + "epoch": 0.63, + "learning_rate": 3.1989710821705367e-06, + "loss": 0.4889, + "step": 5016 + }, + { + "epoch": 0.63, + "learning_rate": 3.1970772753126696e-06, + "loss": 0.5718, + "step": 5017 + }, + { + "epoch": 0.63, + "learning_rate": 3.195183765711505e-06, + "loss": 0.52, + "step": 5018 + }, + { + "epoch": 0.63, + "learning_rate": 3.1932905536792365e-06, + "loss": 0.4776, + "step": 5019 + }, + { + "epoch": 0.63, + "learning_rate": 3.191397639528008e-06, + "loss": 0.5131, + "step": 5020 + }, + { + "epoch": 0.63, + "learning_rate": 3.189505023569913e-06, + "loss": 0.5006, + "step": 5021 + }, + { + "epoch": 0.63, + "learning_rate": 3.1876127061169963e-06, + "loss": 0.4982, + "step": 5022 + }, + { + "epoch": 0.63, + "learning_rate": 3.185720687481256e-06, + "loss": 0.5502, + "step": 5023 + }, + { + "epoch": 0.63, + "learning_rate": 3.1838289679746347e-06, + "loss": 0.5001, + "step": 5024 + }, + { + "epoch": 0.63, + "learning_rate": 3.181937547909031e-06, + "loss": 0.5062, + "step": 5025 + }, + { + "epoch": 0.63, + "learning_rate": 3.180046427596293e-06, + "loss": 0.5354, + "step": 5026 + }, + { + "epoch": 0.63, + "learning_rate": 3.1781556073482177e-06, + "loss": 0.5339, + "step": 5027 + }, + { + "epoch": 0.63, + "learning_rate": 3.1762650874765565e-06, + "loss": 0.5152, + "step": 5028 + }, + { + "epoch": 0.63, + "learning_rate": 3.174374868293005e-06, + "loss": 0.5579, + "step": 5029 + }, + { + "epoch": 0.63, + "learning_rate": 3.172484950109217e-06, + "loss": 0.526, + "step": 5030 + }, + { + "epoch": 0.63, + "learning_rate": 3.1705953332367894e-06, + "loss": 0.5499, + "step": 5031 + }, + { + "epoch": 0.63, + "learning_rate": 3.1687060179872736e-06, + "loss": 0.5437, + "step": 5032 + }, + { + "epoch": 0.63, + "learning_rate": 3.1668170046721712e-06, + "loss": 0.5211, + "step": 5033 + }, + { + "epoch": 0.63, + "learning_rate": 3.164928293602931e-06, + "loss": 0.4885, + "step": 5034 + }, + { + "epoch": 0.63, + "learning_rate": 3.163039885090956e-06, + "loss": 0.5404, + "step": 5035 + }, + { + "epoch": 0.63, + "learning_rate": 3.161151779447597e-06, + "loss": 0.5204, + "step": 5036 + }, + { + "epoch": 0.63, + "learning_rate": 3.1592639769841538e-06, + "loss": 0.5025, + "step": 5037 + }, + { + "epoch": 0.63, + "learning_rate": 3.1573764780118793e-06, + "loss": 0.5023, + "step": 5038 + }, + { + "epoch": 0.63, + "learning_rate": 3.155489282841975e-06, + "loss": 0.5417, + "step": 5039 + }, + { + "epoch": 0.63, + "learning_rate": 3.1536023917855897e-06, + "loss": 0.5163, + "step": 5040 + }, + { + "epoch": 0.63, + "learning_rate": 3.151715805153826e-06, + "loss": 0.5369, + "step": 5041 + }, + { + "epoch": 0.63, + "learning_rate": 3.1498295232577347e-06, + "loss": 0.4939, + "step": 5042 + }, + { + "epoch": 0.63, + "learning_rate": 3.147943546408317e-06, + "loss": 0.5252, + "step": 5043 + }, + { + "epoch": 0.63, + "learning_rate": 3.146057874916522e-06, + "loss": 0.5211, + "step": 5044 + }, + { + "epoch": 0.63, + "learning_rate": 3.144172509093249e-06, + "loss": 0.4825, + "step": 5045 + }, + { + "epoch": 0.63, + "learning_rate": 3.1422874492493484e-06, + "loss": 0.532, + "step": 5046 + }, + { + "epoch": 0.63, + "learning_rate": 3.140402695695619e-06, + "loss": 0.4972, + "step": 5047 + }, + { + "epoch": 0.63, + "learning_rate": 3.13851824874281e-06, + "loss": 0.5144, + "step": 5048 + }, + { + "epoch": 0.63, + "learning_rate": 3.1366341087016218e-06, + "loss": 0.5416, + "step": 5049 + }, + { + "epoch": 0.63, + "learning_rate": 3.1347502758826953e-06, + "loss": 0.5777, + "step": 5050 + }, + { + "epoch": 0.63, + "learning_rate": 3.1328667505966314e-06, + "loss": 0.4925, + "step": 5051 + }, + { + "epoch": 0.63, + "learning_rate": 3.1309835331539762e-06, + "loss": 0.5169, + "step": 5052 + }, + { + "epoch": 0.63, + "learning_rate": 3.1291006238652243e-06, + "loss": 0.503, + "step": 5053 + }, + { + "epoch": 0.63, + "learning_rate": 3.1272180230408204e-06, + "loss": 0.5699, + "step": 5054 + }, + { + "epoch": 0.63, + "learning_rate": 3.1253357309911604e-06, + "loss": 0.488, + "step": 5055 + }, + { + "epoch": 0.63, + "learning_rate": 3.1234537480265818e-06, + "loss": 0.4845, + "step": 5056 + }, + { + "epoch": 0.63, + "learning_rate": 3.1215720744573796e-06, + "loss": 0.5287, + "step": 5057 + }, + { + "epoch": 0.63, + "learning_rate": 3.1196907105937946e-06, + "loss": 0.5051, + "step": 5058 + }, + { + "epoch": 0.63, + "learning_rate": 3.1178096567460157e-06, + "loss": 0.2271, + "step": 5059 + }, + { + "epoch": 0.63, + "learning_rate": 3.1159289132241836e-06, + "loss": 0.4823, + "step": 5060 + }, + { + "epoch": 0.63, + "learning_rate": 3.114048480338382e-06, + "loss": 0.5289, + "step": 5061 + }, + { + "epoch": 0.63, + "learning_rate": 3.1121683583986495e-06, + "loss": 0.576, + "step": 5062 + }, + { + "epoch": 0.63, + "learning_rate": 3.11028854771497e-06, + "loss": 0.514, + "step": 5063 + }, + { + "epoch": 0.63, + "learning_rate": 3.108409048597278e-06, + "loss": 0.5071, + "step": 5064 + }, + { + "epoch": 0.63, + "learning_rate": 3.1065298613554574e-06, + "loss": 0.5163, + "step": 5065 + }, + { + "epoch": 0.64, + "learning_rate": 3.1046509862993346e-06, + "loss": 0.4825, + "step": 5066 + }, + { + "epoch": 0.64, + "learning_rate": 3.1027724237386913e-06, + "loss": 0.4488, + "step": 5067 + }, + { + "epoch": 0.64, + "learning_rate": 3.1008941739832555e-06, + "loss": 0.489, + "step": 5068 + }, + { + "epoch": 0.64, + "learning_rate": 3.0990162373427034e-06, + "loss": 0.5996, + "step": 5069 + }, + { + "epoch": 0.64, + "learning_rate": 3.097138614126659e-06, + "loss": 0.5761, + "step": 5070 + }, + { + "epoch": 0.64, + "learning_rate": 3.0952613046446984e-06, + "loss": 0.5477, + "step": 5071 + }, + { + "epoch": 0.64, + "learning_rate": 3.0933843092063377e-06, + "loss": 0.5607, + "step": 5072 + }, + { + "epoch": 0.64, + "learning_rate": 3.0915076281210487e-06, + "loss": 0.5613, + "step": 5073 + }, + { + "epoch": 0.64, + "learning_rate": 3.0896312616982493e-06, + "loss": 0.4791, + "step": 5074 + }, + { + "epoch": 0.64, + "learning_rate": 3.0877552102473047e-06, + "loss": 0.5469, + "step": 5075 + }, + { + "epoch": 0.64, + "learning_rate": 3.085879474077531e-06, + "loss": 0.5103, + "step": 5076 + }, + { + "epoch": 0.64, + "learning_rate": 3.0840040534981865e-06, + "loss": 0.4522, + "step": 5077 + }, + { + "epoch": 0.64, + "learning_rate": 3.082128948818482e-06, + "loss": 0.5252, + "step": 5078 + }, + { + "epoch": 0.64, + "learning_rate": 3.0802541603475754e-06, + "loss": 0.5085, + "step": 5079 + }, + { + "epoch": 0.64, + "learning_rate": 3.078379688394574e-06, + "loss": 0.5549, + "step": 5080 + }, + { + "epoch": 0.64, + "learning_rate": 3.076505533268529e-06, + "loss": 0.5234, + "step": 5081 + }, + { + "epoch": 0.64, + "learning_rate": 3.0746316952784435e-06, + "loss": 0.5228, + "step": 5082 + }, + { + "epoch": 0.64, + "learning_rate": 3.0727581747332634e-06, + "loss": 0.6049, + "step": 5083 + }, + { + "epoch": 0.64, + "learning_rate": 3.070884971941887e-06, + "loss": 0.5268, + "step": 5084 + }, + { + "epoch": 0.64, + "learning_rate": 3.0690120872131585e-06, + "loss": 0.5091, + "step": 5085 + }, + { + "epoch": 0.64, + "learning_rate": 3.0671395208558697e-06, + "loss": 0.5732, + "step": 5086 + }, + { + "epoch": 0.64, + "learning_rate": 3.06526727317876e-06, + "loss": 0.5415, + "step": 5087 + }, + { + "epoch": 0.64, + "learning_rate": 3.0633953444905145e-06, + "loss": 0.5462, + "step": 5088 + }, + { + "epoch": 0.64, + "learning_rate": 3.0615237350997695e-06, + "loss": 0.4963, + "step": 5089 + }, + { + "epoch": 0.64, + "learning_rate": 3.0596524453151046e-06, + "loss": 0.4745, + "step": 5090 + }, + { + "epoch": 0.64, + "learning_rate": 3.0577814754450485e-06, + "loss": 0.5259, + "step": 5091 + }, + { + "epoch": 0.64, + "learning_rate": 3.0559108257980796e-06, + "loss": 0.4716, + "step": 5092 + }, + { + "epoch": 0.64, + "learning_rate": 3.054040496682618e-06, + "loss": 0.5504, + "step": 5093 + }, + { + "epoch": 0.64, + "learning_rate": 3.052170488407035e-06, + "loss": 0.5717, + "step": 5094 + }, + { + "epoch": 0.64, + "learning_rate": 3.050300801279649e-06, + "loss": 0.5399, + "step": 5095 + }, + { + "epoch": 0.64, + "learning_rate": 3.048431435608724e-06, + "loss": 0.5097, + "step": 5096 + }, + { + "epoch": 0.64, + "learning_rate": 3.0465623917024713e-06, + "loss": 0.5289, + "step": 5097 + }, + { + "epoch": 0.64, + "learning_rate": 3.04469366986905e-06, + "loss": 0.5049, + "step": 5098 + }, + { + "epoch": 0.64, + "learning_rate": 3.0428252704165635e-06, + "loss": 0.5528, + "step": 5099 + }, + { + "epoch": 0.64, + "learning_rate": 3.040957193653065e-06, + "loss": 0.491, + "step": 5100 + }, + { + "epoch": 0.64, + "learning_rate": 3.0390894398865544e-06, + "loss": 0.596, + "step": 5101 + }, + { + "epoch": 0.64, + "learning_rate": 3.037222009424976e-06, + "loss": 0.5412, + "step": 5102 + }, + { + "epoch": 0.64, + "learning_rate": 3.0353549025762254e-06, + "loss": 0.5241, + "step": 5103 + }, + { + "epoch": 0.64, + "learning_rate": 3.033488119648137e-06, + "loss": 0.4952, + "step": 5104 + }, + { + "epoch": 0.64, + "learning_rate": 3.0316216609484987e-06, + "loss": 0.5627, + "step": 5105 + }, + { + "epoch": 0.64, + "learning_rate": 3.0297555267850424e-06, + "loss": 0.5282, + "step": 5106 + }, + { + "epoch": 0.64, + "learning_rate": 3.0278897174654477e-06, + "loss": 0.5917, + "step": 5107 + }, + { + "epoch": 0.64, + "learning_rate": 3.0260242332973404e-06, + "loss": 0.5103, + "step": 5108 + }, + { + "epoch": 0.64, + "learning_rate": 3.0241590745882887e-06, + "loss": 0.6106, + "step": 5109 + }, + { + "epoch": 0.64, + "learning_rate": 3.0222942416458127e-06, + "loss": 0.5254, + "step": 5110 + }, + { + "epoch": 0.64, + "learning_rate": 3.020429734777376e-06, + "loss": 0.5118, + "step": 5111 + }, + { + "epoch": 0.64, + "learning_rate": 3.0185655542903896e-06, + "loss": 0.6124, + "step": 5112 + }, + { + "epoch": 0.64, + "learning_rate": 3.0167017004922097e-06, + "loss": 0.5153, + "step": 5113 + }, + { + "epoch": 0.64, + "learning_rate": 3.014838173690141e-06, + "loss": 0.5017, + "step": 5114 + }, + { + "epoch": 0.64, + "learning_rate": 3.0129749741914276e-06, + "loss": 0.6347, + "step": 5115 + }, + { + "epoch": 0.64, + "learning_rate": 3.0111121023032674e-06, + "loss": 0.5555, + "step": 5116 + }, + { + "epoch": 0.64, + "learning_rate": 3.0092495583328017e-06, + "loss": 0.4891, + "step": 5117 + }, + { + "epoch": 0.64, + "learning_rate": 3.007387342587116e-06, + "loss": 0.545, + "step": 5118 + }, + { + "epoch": 0.64, + "learning_rate": 3.005525455373245e-06, + "loss": 0.5353, + "step": 5119 + }, + { + "epoch": 0.64, + "learning_rate": 3.0036638969981636e-06, + "loss": 0.5347, + "step": 5120 + }, + { + "epoch": 0.64, + "learning_rate": 3.001802667768798e-06, + "loss": 0.4392, + "step": 5121 + }, + { + "epoch": 0.64, + "learning_rate": 2.9999417679920177e-06, + "loss": 0.505, + "step": 5122 + }, + { + "epoch": 0.64, + "learning_rate": 2.9980811979746393e-06, + "loss": 0.4913, + "step": 5123 + }, + { + "epoch": 0.64, + "learning_rate": 2.9962209580234258e-06, + "loss": 0.5118, + "step": 5124 + }, + { + "epoch": 0.64, + "learning_rate": 2.9943610484450796e-06, + "loss": 0.2154, + "step": 5125 + }, + { + "epoch": 0.64, + "learning_rate": 2.9925014695462556e-06, + "loss": 0.4535, + "step": 5126 + }, + { + "epoch": 0.64, + "learning_rate": 2.990642221633552e-06, + "loss": 0.5562, + "step": 5127 + }, + { + "epoch": 0.64, + "learning_rate": 2.9887833050135106e-06, + "loss": 0.4909, + "step": 5128 + }, + { + "epoch": 0.64, + "learning_rate": 2.986924719992621e-06, + "loss": 0.4755, + "step": 5129 + }, + { + "epoch": 0.64, + "learning_rate": 2.98506646687732e-06, + "loss": 0.55, + "step": 5130 + }, + { + "epoch": 0.64, + "learning_rate": 2.9832085459739825e-06, + "loss": 0.5307, + "step": 5131 + }, + { + "epoch": 0.64, + "learning_rate": 2.9813509575889338e-06, + "loss": 0.5028, + "step": 5132 + }, + { + "epoch": 0.64, + "learning_rate": 2.9794937020284453e-06, + "loss": 0.5052, + "step": 5133 + }, + { + "epoch": 0.64, + "learning_rate": 2.9776367795987305e-06, + "loss": 0.5152, + "step": 5134 + }, + { + "epoch": 0.64, + "learning_rate": 2.9757801906059524e-06, + "loss": 0.4741, + "step": 5135 + }, + { + "epoch": 0.64, + "learning_rate": 2.973923935356211e-06, + "loss": 0.4949, + "step": 5136 + }, + { + "epoch": 0.64, + "learning_rate": 2.972068014155558e-06, + "loss": 0.5137, + "step": 5137 + }, + { + "epoch": 0.64, + "learning_rate": 2.9702124273099897e-06, + "loss": 0.5391, + "step": 5138 + }, + { + "epoch": 0.64, + "learning_rate": 2.9683571751254447e-06, + "loss": 0.5032, + "step": 5139 + }, + { + "epoch": 0.64, + "learning_rate": 2.9665022579078083e-06, + "loss": 0.509, + "step": 5140 + }, + { + "epoch": 0.64, + "learning_rate": 2.964647675962909e-06, + "loss": 0.4971, + "step": 5141 + }, + { + "epoch": 0.64, + "learning_rate": 2.9627934295965217e-06, + "loss": 0.1953, + "step": 5142 + }, + { + "epoch": 0.64, + "learning_rate": 2.960939519114363e-06, + "loss": 0.5582, + "step": 5143 + }, + { + "epoch": 0.64, + "learning_rate": 2.959085944822098e-06, + "loss": 0.576, + "step": 5144 + }, + { + "epoch": 0.64, + "learning_rate": 2.9572327070253343e-06, + "loss": 0.5633, + "step": 5145 + }, + { + "epoch": 0.65, + "learning_rate": 2.9553798060296245e-06, + "loss": 0.4962, + "step": 5146 + }, + { + "epoch": 0.65, + "learning_rate": 2.953527242140465e-06, + "loss": 0.5436, + "step": 5147 + }, + { + "epoch": 0.65, + "learning_rate": 2.951675015663297e-06, + "loss": 0.5881, + "step": 5148 + }, + { + "epoch": 0.65, + "learning_rate": 2.949823126903507e-06, + "loss": 0.4838, + "step": 5149 + }, + { + "epoch": 0.65, + "learning_rate": 2.947971576166424e-06, + "loss": 0.5306, + "step": 5150 + }, + { + "epoch": 0.65, + "learning_rate": 2.946120363757323e-06, + "loss": 0.521, + "step": 5151 + }, + { + "epoch": 0.65, + "learning_rate": 2.9442694899814216e-06, + "loss": 0.503, + "step": 5152 + }, + { + "epoch": 0.65, + "learning_rate": 2.942418955143883e-06, + "loss": 0.5237, + "step": 5153 + }, + { + "epoch": 0.65, + "learning_rate": 2.9405687595498135e-06, + "loss": 0.5929, + "step": 5154 + }, + { + "epoch": 0.65, + "learning_rate": 2.938718903504264e-06, + "loss": 0.5318, + "step": 5155 + }, + { + "epoch": 0.65, + "learning_rate": 2.936869387312232e-06, + "loss": 0.5167, + "step": 5156 + }, + { + "epoch": 0.65, + "learning_rate": 2.935020211278652e-06, + "loss": 0.5749, + "step": 5157 + }, + { + "epoch": 0.65, + "learning_rate": 2.933171375708408e-06, + "loss": 0.5073, + "step": 5158 + }, + { + "epoch": 0.65, + "learning_rate": 2.9313228809063276e-06, + "loss": 0.5931, + "step": 5159 + }, + { + "epoch": 0.65, + "learning_rate": 2.9294747271771806e-06, + "loss": 0.5761, + "step": 5160 + }, + { + "epoch": 0.65, + "learning_rate": 2.9276269148256815e-06, + "loss": 0.5229, + "step": 5161 + }, + { + "epoch": 0.65, + "learning_rate": 2.92577944415649e-06, + "loss": 0.4554, + "step": 5162 + }, + { + "epoch": 0.65, + "learning_rate": 2.923932315474204e-06, + "loss": 0.5136, + "step": 5163 + }, + { + "epoch": 0.65, + "learning_rate": 2.9220855290833695e-06, + "loss": 0.5016, + "step": 5164 + }, + { + "epoch": 0.65, + "learning_rate": 2.9202390852884764e-06, + "loss": 0.5044, + "step": 5165 + }, + { + "epoch": 0.65, + "learning_rate": 2.9183929843939574e-06, + "loss": 0.5044, + "step": 5166 + }, + { + "epoch": 0.65, + "learning_rate": 2.9165472267041884e-06, + "loss": 0.4729, + "step": 5167 + }, + { + "epoch": 0.65, + "learning_rate": 2.9147018125234854e-06, + "loss": 0.5809, + "step": 5168 + }, + { + "epoch": 0.65, + "learning_rate": 2.9128567421561138e-06, + "loss": 0.523, + "step": 5169 + }, + { + "epoch": 0.65, + "learning_rate": 2.911012015906278e-06, + "loss": 0.5553, + "step": 5170 + }, + { + "epoch": 0.65, + "learning_rate": 2.909167634078128e-06, + "loss": 0.2202, + "step": 5171 + }, + { + "epoch": 0.65, + "learning_rate": 2.907323596975755e-06, + "loss": 0.5379, + "step": 5172 + }, + { + "epoch": 0.65, + "learning_rate": 2.905479904903198e-06, + "loss": 0.5264, + "step": 5173 + }, + { + "epoch": 0.65, + "learning_rate": 2.9036365581644297e-06, + "loss": 0.4837, + "step": 5174 + }, + { + "epoch": 0.65, + "learning_rate": 2.9017935570633756e-06, + "loss": 0.487, + "step": 5175 + }, + { + "epoch": 0.65, + "learning_rate": 2.8999509019038984e-06, + "loss": 0.5357, + "step": 5176 + }, + { + "epoch": 0.65, + "learning_rate": 2.898108592989808e-06, + "loss": 0.553, + "step": 5177 + }, + { + "epoch": 0.65, + "learning_rate": 2.8962666306248545e-06, + "loss": 0.519, + "step": 5178 + }, + { + "epoch": 0.65, + "learning_rate": 2.8944250151127286e-06, + "loss": 0.5007, + "step": 5179 + }, + { + "epoch": 0.65, + "learning_rate": 2.8925837467570685e-06, + "loss": 0.491, + "step": 5180 + }, + { + "epoch": 0.65, + "learning_rate": 2.8907428258614528e-06, + "loss": 0.5139, + "step": 5181 + }, + { + "epoch": 0.65, + "learning_rate": 2.888902252729403e-06, + "loss": 0.532, + "step": 5182 + }, + { + "epoch": 0.65, + "learning_rate": 2.887062027664386e-06, + "loss": 0.483, + "step": 5183 + }, + { + "epoch": 0.65, + "learning_rate": 2.885222150969804e-06, + "loss": 0.5431, + "step": 5184 + }, + { + "epoch": 0.65, + "learning_rate": 2.883382622949009e-06, + "loss": 0.5023, + "step": 5185 + }, + { + "epoch": 0.65, + "learning_rate": 2.8815434439052923e-06, + "loss": 0.5664, + "step": 5186 + }, + { + "epoch": 0.65, + "learning_rate": 2.8797046141418896e-06, + "loss": 0.5154, + "step": 5187 + }, + { + "epoch": 0.65, + "learning_rate": 2.8778661339619773e-06, + "loss": 0.5566, + "step": 5188 + }, + { + "epoch": 0.65, + "learning_rate": 2.8760280036686755e-06, + "loss": 0.5372, + "step": 5189 + }, + { + "epoch": 0.65, + "learning_rate": 2.874190223565043e-06, + "loss": 0.5604, + "step": 5190 + }, + { + "epoch": 0.65, + "learning_rate": 2.872352793954085e-06, + "loss": 0.5134, + "step": 5191 + }, + { + "epoch": 0.65, + "learning_rate": 2.8705157151387485e-06, + "loss": 0.4992, + "step": 5192 + }, + { + "epoch": 0.65, + "learning_rate": 2.8686789874219205e-06, + "loss": 0.5029, + "step": 5193 + }, + { + "epoch": 0.65, + "learning_rate": 2.866842611106434e-06, + "loss": 0.4677, + "step": 5194 + }, + { + "epoch": 0.65, + "learning_rate": 2.865006586495057e-06, + "loss": 0.4808, + "step": 5195 + }, + { + "epoch": 0.65, + "learning_rate": 2.8631709138905063e-06, + "loss": 0.5354, + "step": 5196 + }, + { + "epoch": 0.65, + "learning_rate": 2.8613355935954373e-06, + "loss": 0.5122, + "step": 5197 + }, + { + "epoch": 0.65, + "learning_rate": 2.8595006259124492e-06, + "loss": 0.248, + "step": 5198 + }, + { + "epoch": 0.65, + "learning_rate": 2.857666011144084e-06, + "loss": 0.5052, + "step": 5199 + }, + { + "epoch": 0.65, + "learning_rate": 2.855831749592819e-06, + "loss": 0.5369, + "step": 5200 + }, + { + "epoch": 0.65, + "learning_rate": 2.8539978415610808e-06, + "loss": 0.5494, + "step": 5201 + }, + { + "epoch": 0.65, + "learning_rate": 2.8521642873512346e-06, + "loss": 0.4947, + "step": 5202 + }, + { + "epoch": 0.65, + "learning_rate": 2.8503310872655866e-06, + "loss": 0.5253, + "step": 5203 + }, + { + "epoch": 0.65, + "learning_rate": 2.8484982416063856e-06, + "loss": 0.5406, + "step": 5204 + }, + { + "epoch": 0.65, + "learning_rate": 2.8466657506758254e-06, + "loss": 0.4896, + "step": 5205 + }, + { + "epoch": 0.65, + "learning_rate": 2.844833614776032e-06, + "loss": 0.5017, + "step": 5206 + }, + { + "epoch": 0.65, + "learning_rate": 2.8430018342090814e-06, + "loss": 0.4323, + "step": 5207 + }, + { + "epoch": 0.65, + "learning_rate": 2.841170409276989e-06, + "loss": 0.4626, + "step": 5208 + }, + { + "epoch": 0.65, + "learning_rate": 2.8393393402817084e-06, + "loss": 0.5449, + "step": 5209 + }, + { + "epoch": 0.65, + "learning_rate": 2.837508627525142e-06, + "loss": 0.5333, + "step": 5210 + }, + { + "epoch": 0.65, + "learning_rate": 2.835678271309121e-06, + "loss": 0.5774, + "step": 5211 + }, + { + "epoch": 0.65, + "learning_rate": 2.833848271935431e-06, + "loss": 0.5244, + "step": 5212 + }, + { + "epoch": 0.65, + "learning_rate": 2.8320186297057896e-06, + "loss": 0.4483, + "step": 5213 + }, + { + "epoch": 0.65, + "learning_rate": 2.830189344921861e-06, + "loss": 0.5394, + "step": 5214 + }, + { + "epoch": 0.65, + "learning_rate": 2.8283604178852496e-06, + "loss": 0.5386, + "step": 5215 + }, + { + "epoch": 0.65, + "learning_rate": 2.8265318488974956e-06, + "loss": 0.5227, + "step": 5216 + }, + { + "epoch": 0.65, + "learning_rate": 2.824703638260086e-06, + "loss": 0.5409, + "step": 5217 + }, + { + "epoch": 0.65, + "learning_rate": 2.8228757862744473e-06, + "loss": 0.6062, + "step": 5218 + }, + { + "epoch": 0.65, + "learning_rate": 2.821048293241945e-06, + "loss": 0.5497, + "step": 5219 + }, + { + "epoch": 0.65, + "learning_rate": 2.8192211594638893e-06, + "loss": 0.5744, + "step": 5220 + }, + { + "epoch": 0.65, + "learning_rate": 2.8173943852415266e-06, + "loss": 0.5166, + "step": 5221 + }, + { + "epoch": 0.65, + "learning_rate": 2.8155679708760487e-06, + "loss": 0.474, + "step": 5222 + }, + { + "epoch": 0.65, + "learning_rate": 2.813741916668582e-06, + "loss": 0.4748, + "step": 5223 + }, + { + "epoch": 0.65, + "learning_rate": 2.8119162229201987e-06, + "loss": 0.5141, + "step": 5224 + }, + { + "epoch": 0.65, + "learning_rate": 2.8100908899319098e-06, + "loss": 0.5448, + "step": 5225 + }, + { + "epoch": 0.66, + "learning_rate": 2.808265918004667e-06, + "loss": 0.5496, + "step": 5226 + }, + { + "epoch": 0.66, + "learning_rate": 2.8064413074393614e-06, + "loss": 0.5571, + "step": 5227 + }, + { + "epoch": 0.66, + "learning_rate": 2.8046170585368288e-06, + "loss": 0.1839, + "step": 5228 + }, + { + "epoch": 0.66, + "learning_rate": 2.802793171597838e-06, + "loss": 0.4841, + "step": 5229 + }, + { + "epoch": 0.66, + "learning_rate": 2.800969646923103e-06, + "loss": 0.2159, + "step": 5230 + }, + { + "epoch": 0.66, + "learning_rate": 2.7991464848132788e-06, + "loss": 0.2162, + "step": 5231 + }, + { + "epoch": 0.66, + "learning_rate": 2.7973236855689577e-06, + "loss": 0.5658, + "step": 5232 + }, + { + "epoch": 0.66, + "learning_rate": 2.795501249490675e-06, + "loss": 0.5524, + "step": 5233 + }, + { + "epoch": 0.66, + "learning_rate": 2.793679176878903e-06, + "loss": 0.4909, + "step": 5234 + }, + { + "epoch": 0.66, + "learning_rate": 2.7918574680340594e-06, + "loss": 0.4567, + "step": 5235 + }, + { + "epoch": 0.66, + "learning_rate": 2.790036123256492e-06, + "loss": 0.4735, + "step": 5236 + }, + { + "epoch": 0.66, + "learning_rate": 2.7882151428464997e-06, + "loss": 0.5048, + "step": 5237 + }, + { + "epoch": 0.66, + "learning_rate": 2.786394527104314e-06, + "loss": 0.5089, + "step": 5238 + }, + { + "epoch": 0.66, + "learning_rate": 2.78457427633011e-06, + "loss": 0.5303, + "step": 5239 + }, + { + "epoch": 0.66, + "learning_rate": 2.782754390824002e-06, + "loss": 0.4669, + "step": 5240 + }, + { + "epoch": 0.66, + "learning_rate": 2.7809348708860418e-06, + "loss": 0.5581, + "step": 5241 + }, + { + "epoch": 0.66, + "learning_rate": 2.779115716816225e-06, + "loss": 0.5111, + "step": 5242 + }, + { + "epoch": 0.66, + "learning_rate": 2.7772969289144813e-06, + "loss": 0.5918, + "step": 5243 + }, + { + "epoch": 0.66, + "learning_rate": 2.7754785074806844e-06, + "loss": 0.5719, + "step": 5244 + }, + { + "epoch": 0.66, + "learning_rate": 2.7736604528146466e-06, + "loss": 0.502, + "step": 5245 + }, + { + "epoch": 0.66, + "learning_rate": 2.7718427652161195e-06, + "loss": 0.5468, + "step": 5246 + }, + { + "epoch": 0.66, + "learning_rate": 2.770025444984794e-06, + "loss": 0.57, + "step": 5247 + }, + { + "epoch": 0.66, + "learning_rate": 2.7682084924203036e-06, + "loss": 0.4965, + "step": 5248 + }, + { + "epoch": 0.66, + "learning_rate": 2.766391907822212e-06, + "loss": 0.5041, + "step": 5249 + }, + { + "epoch": 0.66, + "learning_rate": 2.7645756914900325e-06, + "loss": 0.4689, + "step": 5250 + }, + { + "epoch": 0.66, + "learning_rate": 2.762759843723213e-06, + "loss": 0.5855, + "step": 5251 + }, + { + "epoch": 0.66, + "learning_rate": 2.760944364821141e-06, + "loss": 0.5202, + "step": 5252 + }, + { + "epoch": 0.66, + "learning_rate": 2.759129255083145e-06, + "loss": 0.5287, + "step": 5253 + }, + { + "epoch": 0.66, + "learning_rate": 2.757314514808487e-06, + "loss": 0.5477, + "step": 5254 + }, + { + "epoch": 0.66, + "learning_rate": 2.7555001442963754e-06, + "loss": 0.5348, + "step": 5255 + }, + { + "epoch": 0.66, + "learning_rate": 2.7536861438459532e-06, + "loss": 0.4625, + "step": 5256 + }, + { + "epoch": 0.66, + "learning_rate": 2.751872513756304e-06, + "loss": 0.5667, + "step": 5257 + }, + { + "epoch": 0.66, + "learning_rate": 2.7500592543264515e-06, + "loss": 0.5554, + "step": 5258 + }, + { + "epoch": 0.66, + "learning_rate": 2.748246365855354e-06, + "loss": 0.2251, + "step": 5259 + }, + { + "epoch": 0.66, + "learning_rate": 2.746433848641911e-06, + "loss": 0.5162, + "step": 5260 + }, + { + "epoch": 0.66, + "learning_rate": 2.7446217029849626e-06, + "loss": 0.5878, + "step": 5261 + }, + { + "epoch": 0.66, + "learning_rate": 2.7428099291832854e-06, + "loss": 0.5615, + "step": 5262 + }, + { + "epoch": 0.66, + "learning_rate": 2.7409985275355967e-06, + "loss": 0.5692, + "step": 5263 + }, + { + "epoch": 0.66, + "learning_rate": 2.739187498340552e-06, + "loss": 0.542, + "step": 5264 + }, + { + "epoch": 0.66, + "learning_rate": 2.7373768418967415e-06, + "loss": 0.2172, + "step": 5265 + }, + { + "epoch": 0.66, + "learning_rate": 2.735566558502698e-06, + "loss": 0.5496, + "step": 5266 + }, + { + "epoch": 0.66, + "learning_rate": 2.7337566484568924e-06, + "loss": 0.5943, + "step": 5267 + }, + { + "epoch": 0.66, + "learning_rate": 2.7319471120577323e-06, + "loss": 0.561, + "step": 5268 + }, + { + "epoch": 0.66, + "learning_rate": 2.7301379496035684e-06, + "loss": 0.4704, + "step": 5269 + }, + { + "epoch": 0.66, + "learning_rate": 2.728329161392682e-06, + "loss": 0.5226, + "step": 5270 + }, + { + "epoch": 0.66, + "learning_rate": 2.726520747723298e-06, + "loss": 0.2387, + "step": 5271 + }, + { + "epoch": 0.66, + "learning_rate": 2.7247127088935785e-06, + "loss": 0.4507, + "step": 5272 + }, + { + "epoch": 0.66, + "learning_rate": 2.722905045201625e-06, + "loss": 0.2214, + "step": 5273 + }, + { + "epoch": 0.66, + "learning_rate": 2.721097756945476e-06, + "loss": 0.5182, + "step": 5274 + }, + { + "epoch": 0.66, + "learning_rate": 2.7192908444231054e-06, + "loss": 0.4597, + "step": 5275 + }, + { + "epoch": 0.66, + "learning_rate": 2.7174843079324293e-06, + "loss": 0.5181, + "step": 5276 + }, + { + "epoch": 0.66, + "learning_rate": 2.7156781477713003e-06, + "loss": 0.5185, + "step": 5277 + }, + { + "epoch": 0.66, + "learning_rate": 2.713872364237508e-06, + "loss": 0.5187, + "step": 5278 + }, + { + "epoch": 0.66, + "learning_rate": 2.7120669576287823e-06, + "loss": 0.5165, + "step": 5279 + }, + { + "epoch": 0.66, + "learning_rate": 2.71026192824279e-06, + "loss": 0.5005, + "step": 5280 + }, + { + "epoch": 0.66, + "learning_rate": 2.708457276377132e-06, + "loss": 0.5732, + "step": 5281 + }, + { + "epoch": 0.66, + "learning_rate": 2.7066530023293525e-06, + "loss": 0.495, + "step": 5282 + }, + { + "epoch": 0.66, + "learning_rate": 2.7048491063969293e-06, + "loss": 0.5049, + "step": 5283 + }, + { + "epoch": 0.66, + "learning_rate": 2.703045588877282e-06, + "loss": 0.4988, + "step": 5284 + }, + { + "epoch": 0.66, + "learning_rate": 2.701242450067765e-06, + "loss": 0.5209, + "step": 5285 + }, + { + "epoch": 0.66, + "learning_rate": 2.699439690265668e-06, + "loss": 0.5454, + "step": 5286 + }, + { + "epoch": 0.66, + "learning_rate": 2.697637309768223e-06, + "loss": 0.5691, + "step": 5287 + }, + { + "epoch": 0.66, + "learning_rate": 2.695835308872596e-06, + "loss": 0.5266, + "step": 5288 + }, + { + "epoch": 0.66, + "learning_rate": 2.6940336878758925e-06, + "loss": 0.5392, + "step": 5289 + }, + { + "epoch": 0.66, + "learning_rate": 2.692232447075157e-06, + "loss": 0.4798, + "step": 5290 + }, + { + "epoch": 0.66, + "learning_rate": 2.6904315867673645e-06, + "loss": 0.6092, + "step": 5291 + }, + { + "epoch": 0.66, + "learning_rate": 2.6886311072494333e-06, + "loss": 0.5659, + "step": 5292 + }, + { + "epoch": 0.66, + "learning_rate": 2.6868310088182182e-06, + "loss": 0.5894, + "step": 5293 + }, + { + "epoch": 0.66, + "learning_rate": 2.685031291770509e-06, + "loss": 0.5193, + "step": 5294 + }, + { + "epoch": 0.66, + "learning_rate": 2.683231956403035e-06, + "loss": 0.4865, + "step": 5295 + }, + { + "epoch": 0.66, + "learning_rate": 2.6814330030124635e-06, + "loss": 0.5515, + "step": 5296 + }, + { + "epoch": 0.66, + "learning_rate": 2.6796344318953915e-06, + "loss": 0.497, + "step": 5297 + }, + { + "epoch": 0.66, + "learning_rate": 2.6778362433483612e-06, + "loss": 0.4725, + "step": 5298 + }, + { + "epoch": 0.66, + "learning_rate": 2.6760384376678485e-06, + "loss": 0.5313, + "step": 5299 + }, + { + "epoch": 0.66, + "learning_rate": 2.674241015150267e-06, + "loss": 0.4929, + "step": 5300 + }, + { + "epoch": 0.66, + "learning_rate": 2.6724439760919673e-06, + "loss": 0.4923, + "step": 5301 + }, + { + "epoch": 0.66, + "learning_rate": 2.670647320789233e-06, + "loss": 0.5427, + "step": 5302 + }, + { + "epoch": 0.66, + "learning_rate": 2.6688510495382896e-06, + "loss": 0.5164, + "step": 5303 + }, + { + "epoch": 0.66, + "learning_rate": 2.6670551626352963e-06, + "loss": 0.5172, + "step": 5304 + }, + { + "epoch": 0.66, + "learning_rate": 2.665259660376351e-06, + "loss": 0.6237, + "step": 5305 + }, + { + "epoch": 0.67, + "learning_rate": 2.663464543057486e-06, + "loss": 0.523, + "step": 5306 + }, + { + "epoch": 0.67, + "learning_rate": 2.6616698109746726e-06, + "loss": 0.5067, + "step": 5307 + }, + { + "epoch": 0.67, + "learning_rate": 2.6598754644238147e-06, + "loss": 0.4591, + "step": 5308 + }, + { + "epoch": 0.67, + "learning_rate": 2.658081503700755e-06, + "loss": 0.5073, + "step": 5309 + }, + { + "epoch": 0.67, + "learning_rate": 2.6562879291012743e-06, + "loss": 0.5454, + "step": 5310 + }, + { + "epoch": 0.67, + "learning_rate": 2.6544947409210875e-06, + "loss": 0.4598, + "step": 5311 + }, + { + "epoch": 0.67, + "learning_rate": 2.6527019394558485e-06, + "loss": 0.5614, + "step": 5312 + }, + { + "epoch": 0.67, + "learning_rate": 2.65090952500114e-06, + "loss": 0.5217, + "step": 5313 + }, + { + "epoch": 0.67, + "learning_rate": 2.6491174978524904e-06, + "loss": 0.5312, + "step": 5314 + }, + { + "epoch": 0.67, + "learning_rate": 2.6473258583053575e-06, + "loss": 0.5335, + "step": 5315 + }, + { + "epoch": 0.67, + "learning_rate": 2.64553460665514e-06, + "loss": 0.4736, + "step": 5316 + }, + { + "epoch": 0.67, + "learning_rate": 2.643743743197171e-06, + "loss": 0.5734, + "step": 5317 + }, + { + "epoch": 0.67, + "learning_rate": 2.6419532682267145e-06, + "loss": 0.5266, + "step": 5318 + }, + { + "epoch": 0.67, + "learning_rate": 2.6401631820389786e-06, + "loss": 0.5075, + "step": 5319 + }, + { + "epoch": 0.67, + "learning_rate": 2.638373484929102e-06, + "loss": 0.5427, + "step": 5320 + }, + { + "epoch": 0.67, + "learning_rate": 2.6365841771921617e-06, + "loss": 0.5241, + "step": 5321 + }, + { + "epoch": 0.67, + "learning_rate": 2.6347952591231696e-06, + "loss": 0.4801, + "step": 5322 + }, + { + "epoch": 0.67, + "learning_rate": 2.633006731017074e-06, + "loss": 0.52, + "step": 5323 + }, + { + "epoch": 0.67, + "learning_rate": 2.631218593168756e-06, + "loss": 0.4976, + "step": 5324 + }, + { + "epoch": 0.67, + "learning_rate": 2.6294308458730366e-06, + "loss": 0.5065, + "step": 5325 + }, + { + "epoch": 0.67, + "learning_rate": 2.6276434894246694e-06, + "loss": 0.5328, + "step": 5326 + }, + { + "epoch": 0.67, + "learning_rate": 2.6258565241183453e-06, + "loss": 0.5893, + "step": 5327 + }, + { + "epoch": 0.67, + "learning_rate": 2.6240699502486923e-06, + "loss": 0.5373, + "step": 5328 + }, + { + "epoch": 0.67, + "learning_rate": 2.622283768110267e-06, + "loss": 0.566, + "step": 5329 + }, + { + "epoch": 0.67, + "learning_rate": 2.620497977997568e-06, + "loss": 0.5268, + "step": 5330 + }, + { + "epoch": 0.67, + "learning_rate": 2.618712580205027e-06, + "loss": 0.4824, + "step": 5331 + }, + { + "epoch": 0.67, + "learning_rate": 2.6169275750270124e-06, + "loss": 0.5593, + "step": 5332 + }, + { + "epoch": 0.67, + "learning_rate": 2.6151429627578255e-06, + "loss": 0.5687, + "step": 5333 + }, + { + "epoch": 0.67, + "learning_rate": 2.613358743691707e-06, + "loss": 0.5416, + "step": 5334 + }, + { + "epoch": 0.67, + "learning_rate": 2.6115749181228257e-06, + "loss": 0.5247, + "step": 5335 + }, + { + "epoch": 0.67, + "learning_rate": 2.609791486345291e-06, + "loss": 0.5227, + "step": 5336 + }, + { + "epoch": 0.67, + "learning_rate": 2.6080084486531467e-06, + "loss": 0.4848, + "step": 5337 + }, + { + "epoch": 0.67, + "learning_rate": 2.6062258053403705e-06, + "loss": 0.539, + "step": 5338 + }, + { + "epoch": 0.67, + "learning_rate": 2.6044435567008757e-06, + "loss": 0.4685, + "step": 5339 + }, + { + "epoch": 0.67, + "learning_rate": 2.6026617030285105e-06, + "loss": 0.4947, + "step": 5340 + }, + { + "epoch": 0.67, + "learning_rate": 2.60088024461706e-06, + "loss": 0.5075, + "step": 5341 + }, + { + "epoch": 0.67, + "learning_rate": 2.599099181760238e-06, + "loss": 0.5454, + "step": 5342 + }, + { + "epoch": 0.67, + "learning_rate": 2.5973185147516987e-06, + "loss": 0.4484, + "step": 5343 + }, + { + "epoch": 0.67, + "learning_rate": 2.5955382438850295e-06, + "loss": 0.5058, + "step": 5344 + }, + { + "epoch": 0.67, + "learning_rate": 2.5937583694537523e-06, + "loss": 0.5422, + "step": 5345 + }, + { + "epoch": 0.67, + "learning_rate": 2.5919788917513234e-06, + "loss": 0.4726, + "step": 5346 + }, + { + "epoch": 0.67, + "learning_rate": 2.5901998110711347e-06, + "loss": 0.5496, + "step": 5347 + }, + { + "epoch": 0.67, + "learning_rate": 2.588421127706514e-06, + "loss": 0.5105, + "step": 5348 + }, + { + "epoch": 0.67, + "learning_rate": 2.586642841950717e-06, + "loss": 0.5704, + "step": 5349 + }, + { + "epoch": 0.67, + "learning_rate": 2.5848649540969405e-06, + "loss": 0.4774, + "step": 5350 + }, + { + "epoch": 0.67, + "learning_rate": 2.5830874644383143e-06, + "loss": 0.5144, + "step": 5351 + }, + { + "epoch": 0.67, + "learning_rate": 2.581310373267901e-06, + "loss": 0.5027, + "step": 5352 + }, + { + "epoch": 0.67, + "learning_rate": 2.5795336808786974e-06, + "loss": 0.597, + "step": 5353 + }, + { + "epoch": 0.67, + "learning_rate": 2.577757387563638e-06, + "loss": 0.5294, + "step": 5354 + }, + { + "epoch": 0.67, + "learning_rate": 2.5759814936155893e-06, + "loss": 0.5795, + "step": 5355 + }, + { + "epoch": 0.67, + "learning_rate": 2.574205999327348e-06, + "loss": 0.2712, + "step": 5356 + }, + { + "epoch": 0.67, + "learning_rate": 2.5724309049916503e-06, + "loss": 0.4913, + "step": 5357 + }, + { + "epoch": 0.67, + "learning_rate": 2.5706562109011647e-06, + "loss": 0.5863, + "step": 5358 + }, + { + "epoch": 0.67, + "learning_rate": 2.568881917348494e-06, + "loss": 0.4889, + "step": 5359 + }, + { + "epoch": 0.67, + "learning_rate": 2.567108024626177e-06, + "loss": 0.5097, + "step": 5360 + }, + { + "epoch": 0.67, + "learning_rate": 2.5653345330266786e-06, + "loss": 0.4576, + "step": 5361 + }, + { + "epoch": 0.67, + "learning_rate": 2.5635614428424073e-06, + "loss": 0.5105, + "step": 5362 + }, + { + "epoch": 0.67, + "learning_rate": 2.5617887543656995e-06, + "loss": 0.2367, + "step": 5363 + }, + { + "epoch": 0.67, + "learning_rate": 2.560016467888828e-06, + "loss": 0.594, + "step": 5364 + }, + { + "epoch": 0.67, + "learning_rate": 2.558244583703999e-06, + "loss": 0.5767, + "step": 5365 + }, + { + "epoch": 0.67, + "learning_rate": 2.5564731021033495e-06, + "loss": 0.5452, + "step": 5366 + }, + { + "epoch": 0.67, + "learning_rate": 2.5547020233789533e-06, + "loss": 0.4888, + "step": 5367 + }, + { + "epoch": 0.67, + "learning_rate": 2.5529313478228163e-06, + "loss": 0.5386, + "step": 5368 + }, + { + "epoch": 0.67, + "learning_rate": 2.5511610757268794e-06, + "loss": 0.4981, + "step": 5369 + }, + { + "epoch": 0.67, + "learning_rate": 2.549391207383016e-06, + "loss": 0.4724, + "step": 5370 + }, + { + "epoch": 0.67, + "learning_rate": 2.547621743083035e-06, + "loss": 0.2414, + "step": 5371 + }, + { + "epoch": 0.67, + "learning_rate": 2.545852683118672e-06, + "loss": 0.5581, + "step": 5372 + }, + { + "epoch": 0.67, + "learning_rate": 2.5440840277816027e-06, + "loss": 0.5259, + "step": 5373 + }, + { + "epoch": 0.67, + "learning_rate": 2.542315777363435e-06, + "loss": 0.4718, + "step": 5374 + }, + { + "epoch": 0.67, + "learning_rate": 2.5405479321557083e-06, + "loss": 0.4982, + "step": 5375 + }, + { + "epoch": 0.67, + "learning_rate": 2.538780492449897e-06, + "loss": 0.4181, + "step": 5376 + }, + { + "epoch": 0.67, + "learning_rate": 2.537013458537405e-06, + "loss": 0.547, + "step": 5377 + }, + { + "epoch": 0.67, + "learning_rate": 2.5352468307095728e-06, + "loss": 0.5479, + "step": 5378 + }, + { + "epoch": 0.67, + "learning_rate": 2.5334806092576736e-06, + "loss": 0.5266, + "step": 5379 + }, + { + "epoch": 0.67, + "learning_rate": 2.5317147944729132e-06, + "loss": 0.4727, + "step": 5380 + }, + { + "epoch": 0.67, + "learning_rate": 2.5299493866464296e-06, + "loss": 0.4952, + "step": 5381 + }, + { + "epoch": 0.67, + "learning_rate": 2.5281843860692963e-06, + "loss": 0.5165, + "step": 5382 + }, + { + "epoch": 0.67, + "learning_rate": 2.5264197930325135e-06, + "loss": 0.5408, + "step": 5383 + }, + { + "epoch": 0.67, + "learning_rate": 2.5246556078270207e-06, + "loss": 0.6092, + "step": 5384 + }, + { + "epoch": 0.68, + "learning_rate": 2.522891830743688e-06, + "loss": 0.5252, + "step": 5385 + }, + { + "epoch": 0.68, + "learning_rate": 2.5211284620733166e-06, + "loss": 0.5175, + "step": 5386 + }, + { + "epoch": 0.68, + "learning_rate": 2.519365502106645e-06, + "loss": 0.5416, + "step": 5387 + }, + { + "epoch": 0.68, + "learning_rate": 2.517602951134337e-06, + "loss": 0.5402, + "step": 5388 + }, + { + "epoch": 0.68, + "learning_rate": 2.5158408094469944e-06, + "loss": 0.5374, + "step": 5389 + }, + { + "epoch": 0.68, + "learning_rate": 2.514079077335151e-06, + "loss": 0.4926, + "step": 5390 + }, + { + "epoch": 0.68, + "learning_rate": 2.5123177550892707e-06, + "loss": 0.5297, + "step": 5391 + }, + { + "epoch": 0.68, + "learning_rate": 2.510556842999755e-06, + "loss": 0.529, + "step": 5392 + }, + { + "epoch": 0.68, + "learning_rate": 2.5087963413569293e-06, + "loss": 0.5829, + "step": 5393 + }, + { + "epoch": 0.68, + "learning_rate": 2.507036250451058e-06, + "loss": 0.463, + "step": 5394 + }, + { + "epoch": 0.68, + "learning_rate": 2.5052765705723362e-06, + "loss": 0.4597, + "step": 5395 + }, + { + "epoch": 0.68, + "learning_rate": 2.503517302010891e-06, + "loss": 0.5405, + "step": 5396 + }, + { + "epoch": 0.68, + "learning_rate": 2.5017584450567815e-06, + "loss": 0.5425, + "step": 5397 + }, + { + "epoch": 0.68, + "learning_rate": 2.5000000000000015e-06, + "loss": 0.5182, + "step": 5398 + }, + { + "epoch": 0.68, + "learning_rate": 2.4982419671304693e-06, + "loss": 0.5374, + "step": 5399 + }, + { + "epoch": 0.68, + "learning_rate": 2.4964843467380434e-06, + "loss": 0.5478, + "step": 5400 + }, + { + "epoch": 0.68, + "learning_rate": 2.4947271391125113e-06, + "loss": 0.5435, + "step": 5401 + }, + { + "epoch": 0.68, + "learning_rate": 2.4929703445435915e-06, + "loss": 0.5453, + "step": 5402 + }, + { + "epoch": 0.68, + "learning_rate": 2.4912139633209386e-06, + "loss": 0.5295, + "step": 5403 + }, + { + "epoch": 0.68, + "learning_rate": 2.489457995734131e-06, + "loss": 0.5311, + "step": 5404 + }, + { + "epoch": 0.68, + "learning_rate": 2.4877024420726857e-06, + "loss": 0.53, + "step": 5405 + }, + { + "epoch": 0.68, + "learning_rate": 2.4859473026260493e-06, + "loss": 0.5035, + "step": 5406 + }, + { + "epoch": 0.68, + "learning_rate": 2.4841925776836013e-06, + "loss": 0.5422, + "step": 5407 + }, + { + "epoch": 0.68, + "learning_rate": 2.4824382675346524e-06, + "loss": 0.5678, + "step": 5408 + }, + { + "epoch": 0.68, + "learning_rate": 2.480684372468441e-06, + "loss": 0.5432, + "step": 5409 + }, + { + "epoch": 0.68, + "learning_rate": 2.4789308927741423e-06, + "loss": 0.4801, + "step": 5410 + }, + { + "epoch": 0.68, + "learning_rate": 2.477177828740861e-06, + "loss": 0.5067, + "step": 5411 + }, + { + "epoch": 0.68, + "learning_rate": 2.475425180657634e-06, + "loss": 0.4306, + "step": 5412 + }, + { + "epoch": 0.68, + "learning_rate": 2.4736729488134276e-06, + "loss": 0.5261, + "step": 5413 + }, + { + "epoch": 0.68, + "learning_rate": 2.4719211334971443e-06, + "loss": 0.5166, + "step": 5414 + }, + { + "epoch": 0.68, + "learning_rate": 2.4701697349976093e-06, + "loss": 0.5085, + "step": 5415 + }, + { + "epoch": 0.68, + "learning_rate": 2.4684187536035873e-06, + "loss": 0.5448, + "step": 5416 + }, + { + "epoch": 0.68, + "learning_rate": 2.4666681896037703e-06, + "loss": 0.475, + "step": 5417 + }, + { + "epoch": 0.68, + "learning_rate": 2.464918043286783e-06, + "loss": 0.5016, + "step": 5418 + }, + { + "epoch": 0.68, + "learning_rate": 2.4631683149411824e-06, + "loss": 0.5202, + "step": 5419 + }, + { + "epoch": 0.68, + "learning_rate": 2.4614190048554505e-06, + "loss": 0.5334, + "step": 5420 + }, + { + "epoch": 0.68, + "learning_rate": 2.4596701133180073e-06, + "loss": 0.5527, + "step": 5421 + }, + { + "epoch": 0.68, + "learning_rate": 2.4579216406172002e-06, + "loss": 0.5329, + "step": 5422 + }, + { + "epoch": 0.68, + "learning_rate": 2.456173587041309e-06, + "loss": 0.4809, + "step": 5423 + }, + { + "epoch": 0.68, + "learning_rate": 2.454425952878546e-06, + "loss": 0.5278, + "step": 5424 + }, + { + "epoch": 0.68, + "learning_rate": 2.4526787384170477e-06, + "loss": 0.562, + "step": 5425 + }, + { + "epoch": 0.68, + "learning_rate": 2.450931943944888e-06, + "loss": 0.4862, + "step": 5426 + }, + { + "epoch": 0.68, + "learning_rate": 2.4491855697500704e-06, + "loss": 0.5419, + "step": 5427 + }, + { + "epoch": 0.68, + "learning_rate": 2.447439616120527e-06, + "loss": 0.4755, + "step": 5428 + }, + { + "epoch": 0.68, + "learning_rate": 2.4456940833441223e-06, + "loss": 0.5323, + "step": 5429 + }, + { + "epoch": 0.68, + "learning_rate": 2.443948971708653e-06, + "loss": 0.1766, + "step": 5430 + }, + { + "epoch": 0.68, + "learning_rate": 2.4422042815018393e-06, + "loss": 0.4876, + "step": 5431 + }, + { + "epoch": 0.68, + "learning_rate": 2.44046001301134e-06, + "loss": 0.5669, + "step": 5432 + }, + { + "epoch": 0.68, + "learning_rate": 2.4387161665247403e-06, + "loss": 0.543, + "step": 5433 + }, + { + "epoch": 0.68, + "learning_rate": 2.4369727423295574e-06, + "loss": 0.5441, + "step": 5434 + }, + { + "epoch": 0.68, + "learning_rate": 2.4352297407132393e-06, + "loss": 0.5171, + "step": 5435 + }, + { + "epoch": 0.68, + "learning_rate": 2.43348716196316e-06, + "loss": 0.5514, + "step": 5436 + }, + { + "epoch": 0.68, + "learning_rate": 2.4317450063666293e-06, + "loss": 0.5165, + "step": 5437 + }, + { + "epoch": 0.68, + "learning_rate": 2.4300032742108835e-06, + "loss": 0.2048, + "step": 5438 + }, + { + "epoch": 0.68, + "learning_rate": 2.428261965783092e-06, + "loss": 0.5602, + "step": 5439 + }, + { + "epoch": 0.68, + "learning_rate": 2.4265210813703543e-06, + "loss": 0.5187, + "step": 5440 + }, + { + "epoch": 0.68, + "learning_rate": 2.4247806212596943e-06, + "loss": 0.5399, + "step": 5441 + }, + { + "epoch": 0.68, + "learning_rate": 2.4230405857380724e-06, + "loss": 0.5038, + "step": 5442 + }, + { + "epoch": 0.68, + "learning_rate": 2.4213009750923773e-06, + "loss": 0.4908, + "step": 5443 + }, + { + "epoch": 0.68, + "learning_rate": 2.419561789609426e-06, + "loss": 0.5344, + "step": 5444 + }, + { + "epoch": 0.68, + "learning_rate": 2.417823029575967e-06, + "loss": 0.4927, + "step": 5445 + }, + { + "epoch": 0.68, + "learning_rate": 2.4160846952786786e-06, + "loss": 0.5034, + "step": 5446 + }, + { + "epoch": 0.68, + "learning_rate": 2.4143467870041703e-06, + "loss": 0.5612, + "step": 5447 + }, + { + "epoch": 0.68, + "learning_rate": 2.4126093050389756e-06, + "loss": 0.4981, + "step": 5448 + }, + { + "epoch": 0.68, + "learning_rate": 2.410872249669563e-06, + "loss": 0.5752, + "step": 5449 + }, + { + "epoch": 0.68, + "learning_rate": 2.40913562118233e-06, + "loss": 0.5049, + "step": 5450 + }, + { + "epoch": 0.68, + "learning_rate": 2.4073994198636036e-06, + "loss": 0.478, + "step": 5451 + }, + { + "epoch": 0.68, + "learning_rate": 2.4056636459996385e-06, + "loss": 0.555, + "step": 5452 + }, + { + "epoch": 0.68, + "learning_rate": 2.4039282998766213e-06, + "loss": 0.5133, + "step": 5453 + }, + { + "epoch": 0.68, + "learning_rate": 2.402193381780668e-06, + "loss": 0.4802, + "step": 5454 + }, + { + "epoch": 0.68, + "learning_rate": 2.4004588919978207e-06, + "loss": 0.5512, + "step": 5455 + }, + { + "epoch": 0.68, + "learning_rate": 2.398724830814053e-06, + "loss": 0.4365, + "step": 5456 + }, + { + "epoch": 0.68, + "learning_rate": 2.3969911985152698e-06, + "loss": 0.518, + "step": 5457 + }, + { + "epoch": 0.68, + "learning_rate": 2.395257995387303e-06, + "loss": 0.5323, + "step": 5458 + }, + { + "epoch": 0.68, + "learning_rate": 2.3935252217159143e-06, + "loss": 0.5662, + "step": 5459 + }, + { + "epoch": 0.68, + "learning_rate": 2.3917928777867945e-06, + "loss": 0.542, + "step": 5460 + }, + { + "epoch": 0.68, + "learning_rate": 2.3900609638855655e-06, + "loss": 0.5001, + "step": 5461 + }, + { + "epoch": 0.68, + "learning_rate": 2.388329480297773e-06, + "loss": 0.5332, + "step": 5462 + }, + { + "epoch": 0.68, + "learning_rate": 2.3865984273088965e-06, + "loss": 0.5102, + "step": 5463 + }, + { + "epoch": 0.68, + "learning_rate": 2.384867805204344e-06, + "loss": 0.5963, + "step": 5464 + }, + { + "epoch": 0.69, + "learning_rate": 2.3831376142694505e-06, + "loss": 0.5282, + "step": 5465 + }, + { + "epoch": 0.69, + "learning_rate": 2.3814078547894827e-06, + "loss": 0.5479, + "step": 5466 + }, + { + "epoch": 0.69, + "learning_rate": 2.379678527049635e-06, + "loss": 0.4383, + "step": 5467 + }, + { + "epoch": 0.69, + "learning_rate": 2.3779496313350264e-06, + "loss": 0.5351, + "step": 5468 + }, + { + "epoch": 0.69, + "learning_rate": 2.3762211679307105e-06, + "loss": 0.5372, + "step": 5469 + }, + { + "epoch": 0.69, + "learning_rate": 2.3744931371216685e-06, + "loss": 0.5367, + "step": 5470 + }, + { + "epoch": 0.69, + "learning_rate": 2.372765539192808e-06, + "loss": 0.5094, + "step": 5471 + }, + { + "epoch": 0.69, + "learning_rate": 2.371038374428967e-06, + "loss": 0.5073, + "step": 5472 + }, + { + "epoch": 0.69, + "learning_rate": 2.369311643114914e-06, + "loss": 0.2197, + "step": 5473 + }, + { + "epoch": 0.69, + "learning_rate": 2.3675853455353393e-06, + "loss": 0.4921, + "step": 5474 + }, + { + "epoch": 0.69, + "learning_rate": 2.3658594819748675e-06, + "loss": 0.5101, + "step": 5475 + }, + { + "epoch": 0.69, + "learning_rate": 2.3641340527180517e-06, + "loss": 0.5497, + "step": 5476 + }, + { + "epoch": 0.69, + "learning_rate": 2.3624090580493702e-06, + "loss": 0.5161, + "step": 5477 + }, + { + "epoch": 0.69, + "learning_rate": 2.3606844982532345e-06, + "loss": 0.4541, + "step": 5478 + }, + { + "epoch": 0.69, + "learning_rate": 2.3589603736139764e-06, + "loss": 0.5595, + "step": 5479 + }, + { + "epoch": 0.69, + "learning_rate": 2.3572366844158633e-06, + "loss": 0.5392, + "step": 5480 + }, + { + "epoch": 0.69, + "learning_rate": 2.355513430943088e-06, + "loss": 0.5055, + "step": 5481 + }, + { + "epoch": 0.69, + "learning_rate": 2.353790613479772e-06, + "loss": 0.5281, + "step": 5482 + }, + { + "epoch": 0.69, + "learning_rate": 2.352068232309967e-06, + "loss": 0.5713, + "step": 5483 + }, + { + "epoch": 0.69, + "learning_rate": 2.3503462877176458e-06, + "loss": 0.5606, + "step": 5484 + }, + { + "epoch": 0.69, + "learning_rate": 2.3486247799867155e-06, + "loss": 0.4928, + "step": 5485 + }, + { + "epoch": 0.69, + "learning_rate": 2.3469037094010105e-06, + "loss": 0.4374, + "step": 5486 + }, + { + "epoch": 0.69, + "learning_rate": 2.345183076244292e-06, + "loss": 0.4981, + "step": 5487 + }, + { + "epoch": 0.69, + "learning_rate": 2.343462880800248e-06, + "loss": 0.5235, + "step": 5488 + }, + { + "epoch": 0.69, + "learning_rate": 2.3417431233524985e-06, + "loss": 0.4482, + "step": 5489 + }, + { + "epoch": 0.69, + "learning_rate": 2.3400238041845846e-06, + "loss": 0.5021, + "step": 5490 + }, + { + "epoch": 0.69, + "learning_rate": 2.338304923579981e-06, + "loss": 0.6176, + "step": 5491 + }, + { + "epoch": 0.69, + "learning_rate": 2.3365864818220867e-06, + "loss": 0.5034, + "step": 5492 + }, + { + "epoch": 0.69, + "learning_rate": 2.334868479194231e-06, + "loss": 0.4716, + "step": 5493 + }, + { + "epoch": 0.69, + "learning_rate": 2.3331509159796706e-06, + "loss": 0.4619, + "step": 5494 + }, + { + "epoch": 0.69, + "learning_rate": 2.3314337924615847e-06, + "loss": 0.4485, + "step": 5495 + }, + { + "epoch": 0.69, + "learning_rate": 2.3297171089230865e-06, + "loss": 0.1954, + "step": 5496 + }, + { + "epoch": 0.69, + "learning_rate": 2.328000865647213e-06, + "loss": 0.5198, + "step": 5497 + }, + { + "epoch": 0.69, + "learning_rate": 2.3262850629169304e-06, + "loss": 0.4924, + "step": 5498 + }, + { + "epoch": 0.69, + "learning_rate": 2.324569701015133e-06, + "loss": 0.5597, + "step": 5499 + }, + { + "epoch": 0.69, + "learning_rate": 2.322854780224637e-06, + "loss": 0.5174, + "step": 5500 + }, + { + "epoch": 0.69, + "learning_rate": 2.3211403008281926e-06, + "loss": 0.4778, + "step": 5501 + }, + { + "epoch": 0.69, + "learning_rate": 2.3194262631084734e-06, + "loss": 0.5787, + "step": 5502 + }, + { + "epoch": 0.69, + "learning_rate": 2.3177126673480816e-06, + "loss": 0.4806, + "step": 5503 + }, + { + "epoch": 0.69, + "learning_rate": 2.315999513829546e-06, + "loss": 0.5061, + "step": 5504 + }, + { + "epoch": 0.69, + "learning_rate": 2.314286802835325e-06, + "loss": 0.4426, + "step": 5505 + }, + { + "epoch": 0.69, + "learning_rate": 2.3125745346477975e-06, + "loss": 0.5597, + "step": 5506 + }, + { + "epoch": 0.69, + "learning_rate": 2.3108627095492745e-06, + "loss": 0.5857, + "step": 5507 + }, + { + "epoch": 0.69, + "learning_rate": 2.309151327821995e-06, + "loss": 0.4573, + "step": 5508 + }, + { + "epoch": 0.69, + "learning_rate": 2.307440389748122e-06, + "loss": 0.5865, + "step": 5509 + }, + { + "epoch": 0.69, + "learning_rate": 2.3057298956097468e-06, + "loss": 0.5137, + "step": 5510 + }, + { + "epoch": 0.69, + "learning_rate": 2.3040198456888853e-06, + "loss": 0.5378, + "step": 5511 + }, + { + "epoch": 0.69, + "learning_rate": 2.302310240267482e-06, + "loss": 0.488, + "step": 5512 + }, + { + "epoch": 0.69, + "learning_rate": 2.300601079627409e-06, + "loss": 0.5346, + "step": 5513 + }, + { + "epoch": 0.69, + "learning_rate": 2.2988923640504636e-06, + "loss": 0.5004, + "step": 5514 + }, + { + "epoch": 0.69, + "learning_rate": 2.297184093818372e-06, + "loss": 0.4389, + "step": 5515 + }, + { + "epoch": 0.69, + "learning_rate": 2.2954762692127815e-06, + "loss": 0.5558, + "step": 5516 + }, + { + "epoch": 0.69, + "learning_rate": 2.293768890515271e-06, + "loss": 0.5576, + "step": 5517 + }, + { + "epoch": 0.69, + "learning_rate": 2.292061958007345e-06, + "loss": 0.5358, + "step": 5518 + }, + { + "epoch": 0.69, + "learning_rate": 2.290355471970433e-06, + "loss": 0.5295, + "step": 5519 + }, + { + "epoch": 0.69, + "learning_rate": 2.2886494326858923e-06, + "loss": 0.452, + "step": 5520 + }, + { + "epoch": 0.69, + "learning_rate": 2.2869438404350076e-06, + "loss": 0.5309, + "step": 5521 + }, + { + "epoch": 0.69, + "learning_rate": 2.2852386954989846e-06, + "loss": 0.5535, + "step": 5522 + }, + { + "epoch": 0.69, + "learning_rate": 2.283533998158961e-06, + "loss": 0.5505, + "step": 5523 + }, + { + "epoch": 0.69, + "learning_rate": 2.2818297486959983e-06, + "loss": 0.4882, + "step": 5524 + }, + { + "epoch": 0.69, + "learning_rate": 2.2801259473910846e-06, + "loss": 0.509, + "step": 5525 + }, + { + "epoch": 0.69, + "learning_rate": 2.278422594525136e-06, + "loss": 0.4948, + "step": 5526 + }, + { + "epoch": 0.69, + "learning_rate": 2.2767196903789886e-06, + "loss": 0.5119, + "step": 5527 + }, + { + "epoch": 0.69, + "learning_rate": 2.2750172352334103e-06, + "loss": 0.5804, + "step": 5528 + }, + { + "epoch": 0.69, + "learning_rate": 2.2733152293690937e-06, + "loss": 0.5495, + "step": 5529 + }, + { + "epoch": 0.69, + "learning_rate": 2.271613673066656e-06, + "loss": 0.556, + "step": 5530 + }, + { + "epoch": 0.69, + "learning_rate": 2.2699125666066417e-06, + "loss": 0.5208, + "step": 5531 + }, + { + "epoch": 0.69, + "learning_rate": 2.2682119102695233e-06, + "loss": 0.5426, + "step": 5532 + }, + { + "epoch": 0.69, + "learning_rate": 2.266511704335691e-06, + "loss": 0.5602, + "step": 5533 + }, + { + "epoch": 0.69, + "learning_rate": 2.2648119490854685e-06, + "loss": 0.531, + "step": 5534 + }, + { + "epoch": 0.69, + "learning_rate": 2.2631126447991037e-06, + "loss": 0.5585, + "step": 5535 + }, + { + "epoch": 0.69, + "learning_rate": 2.261413791756768e-06, + "loss": 0.5127, + "step": 5536 + }, + { + "epoch": 0.69, + "learning_rate": 2.259715390238562e-06, + "loss": 0.4674, + "step": 5537 + }, + { + "epoch": 0.69, + "learning_rate": 2.258017440524506e-06, + "loss": 0.5499, + "step": 5538 + }, + { + "epoch": 0.69, + "learning_rate": 2.2563199428945503e-06, + "loss": 0.4712, + "step": 5539 + }, + { + "epoch": 0.69, + "learning_rate": 2.2546228976285704e-06, + "loss": 0.5536, + "step": 5540 + }, + { + "epoch": 0.69, + "learning_rate": 2.2529263050063672e-06, + "loss": 0.4729, + "step": 5541 + }, + { + "epoch": 0.69, + "learning_rate": 2.2512301653076663e-06, + "loss": 0.5052, + "step": 5542 + }, + { + "epoch": 0.69, + "learning_rate": 2.249534478812116e-06, + "loss": 0.5068, + "step": 5543 + }, + { + "epoch": 0.69, + "learning_rate": 2.2478392457992936e-06, + "loss": 0.4493, + "step": 5544 + }, + { + "epoch": 0.7, + "learning_rate": 2.2461444665487013e-06, + "loss": 0.5582, + "step": 5545 + }, + { + "epoch": 0.7, + "learning_rate": 2.244450141339766e-06, + "loss": 0.4976, + "step": 5546 + }, + { + "epoch": 0.7, + "learning_rate": 2.2427562704518374e-06, + "loss": 0.5253, + "step": 5547 + }, + { + "epoch": 0.7, + "learning_rate": 2.241062854164196e-06, + "loss": 0.4737, + "step": 5548 + }, + { + "epoch": 0.7, + "learning_rate": 2.2393698927560393e-06, + "loss": 0.4614, + "step": 5549 + }, + { + "epoch": 0.7, + "learning_rate": 2.237677386506496e-06, + "loss": 0.5544, + "step": 5550 + }, + { + "epoch": 0.7, + "learning_rate": 2.2359853356946184e-06, + "loss": 0.5053, + "step": 5551 + }, + { + "epoch": 0.7, + "learning_rate": 2.2342937405993818e-06, + "loss": 0.6245, + "step": 5552 + }, + { + "epoch": 0.7, + "learning_rate": 2.2326026014996914e-06, + "loss": 0.4527, + "step": 5553 + }, + { + "epoch": 0.7, + "learning_rate": 2.2309119186743694e-06, + "loss": 0.4791, + "step": 5554 + }, + { + "epoch": 0.7, + "learning_rate": 2.2292216924021675e-06, + "loss": 0.5381, + "step": 5555 + }, + { + "epoch": 0.7, + "learning_rate": 2.227531922961763e-06, + "loss": 0.5443, + "step": 5556 + }, + { + "epoch": 0.7, + "learning_rate": 2.2258426106317567e-06, + "loss": 0.5812, + "step": 5557 + }, + { + "epoch": 0.7, + "learning_rate": 2.2241537556906722e-06, + "loss": 0.5132, + "step": 5558 + }, + { + "epoch": 0.7, + "learning_rate": 2.2224653584169624e-06, + "loss": 0.5599, + "step": 5559 + }, + { + "epoch": 0.7, + "learning_rate": 2.2207774190889977e-06, + "loss": 0.5747, + "step": 5560 + }, + { + "epoch": 0.7, + "learning_rate": 2.2190899379850784e-06, + "loss": 0.5707, + "step": 5561 + }, + { + "epoch": 0.7, + "learning_rate": 2.2174029153834288e-06, + "loss": 0.5469, + "step": 5562 + }, + { + "epoch": 0.7, + "learning_rate": 2.2157163515621948e-06, + "loss": 0.5459, + "step": 5563 + }, + { + "epoch": 0.7, + "learning_rate": 2.21403024679945e-06, + "loss": 0.4763, + "step": 5564 + }, + { + "epoch": 0.7, + "learning_rate": 2.2123446013731907e-06, + "loss": 0.5333, + "step": 5565 + }, + { + "epoch": 0.7, + "learning_rate": 2.2106594155613386e-06, + "loss": 0.484, + "step": 5566 + }, + { + "epoch": 0.7, + "learning_rate": 2.2089746896417354e-06, + "loss": 0.549, + "step": 5567 + }, + { + "epoch": 0.7, + "learning_rate": 2.207290423892151e-06, + "loss": 0.5095, + "step": 5568 + }, + { + "epoch": 0.7, + "learning_rate": 2.2056066185902797e-06, + "loss": 0.5188, + "step": 5569 + }, + { + "epoch": 0.7, + "learning_rate": 2.2039232740137386e-06, + "loss": 0.4962, + "step": 5570 + }, + { + "epoch": 0.7, + "learning_rate": 2.202240390440068e-06, + "loss": 0.5272, + "step": 5571 + }, + { + "epoch": 0.7, + "learning_rate": 2.200557968146734e-06, + "loss": 0.4883, + "step": 5572 + }, + { + "epoch": 0.7, + "learning_rate": 2.1988760074111283e-06, + "loss": 0.575, + "step": 5573 + }, + { + "epoch": 0.7, + "learning_rate": 2.1971945085105587e-06, + "loss": 0.2042, + "step": 5574 + }, + { + "epoch": 0.7, + "learning_rate": 2.1955134717222652e-06, + "loss": 0.4811, + "step": 5575 + }, + { + "epoch": 0.7, + "learning_rate": 2.1938328973234085e-06, + "loss": 0.5447, + "step": 5576 + }, + { + "epoch": 0.7, + "learning_rate": 2.1921527855910724e-06, + "loss": 0.5355, + "step": 5577 + }, + { + "epoch": 0.7, + "learning_rate": 2.190473136802265e-06, + "loss": 0.2078, + "step": 5578 + }, + { + "epoch": 0.7, + "learning_rate": 2.1887939512339203e-06, + "loss": 0.502, + "step": 5579 + }, + { + "epoch": 0.7, + "learning_rate": 2.1871152291628928e-06, + "loss": 0.5124, + "step": 5580 + }, + { + "epoch": 0.7, + "learning_rate": 2.1854369708659602e-06, + "loss": 0.4727, + "step": 5581 + }, + { + "epoch": 0.7, + "learning_rate": 2.1837591766198256e-06, + "loss": 0.554, + "step": 5582 + }, + { + "epoch": 0.7, + "learning_rate": 2.182081846701116e-06, + "loss": 0.5235, + "step": 5583 + }, + { + "epoch": 0.7, + "learning_rate": 2.180404981386381e-06, + "loss": 0.4889, + "step": 5584 + }, + { + "epoch": 0.7, + "learning_rate": 2.1787285809520946e-06, + "loss": 0.4947, + "step": 5585 + }, + { + "epoch": 0.7, + "learning_rate": 2.1770526456746507e-06, + "loss": 0.5306, + "step": 5586 + }, + { + "epoch": 0.7, + "learning_rate": 2.17537717583037e-06, + "loss": 0.4812, + "step": 5587 + }, + { + "epoch": 0.7, + "learning_rate": 2.1737021716954953e-06, + "loss": 0.5433, + "step": 5588 + }, + { + "epoch": 0.7, + "learning_rate": 2.172027633546193e-06, + "loss": 0.5805, + "step": 5589 + }, + { + "epoch": 0.7, + "learning_rate": 2.170353561658554e-06, + "loss": 0.4954, + "step": 5590 + }, + { + "epoch": 0.7, + "learning_rate": 2.168679956308588e-06, + "loss": 0.5568, + "step": 5591 + }, + { + "epoch": 0.7, + "learning_rate": 2.1670068177722313e-06, + "loss": 0.5015, + "step": 5592 + }, + { + "epoch": 0.7, + "learning_rate": 2.165334146325343e-06, + "loss": 0.4573, + "step": 5593 + }, + { + "epoch": 0.7, + "learning_rate": 2.1636619422437044e-06, + "loss": 0.561, + "step": 5594 + }, + { + "epoch": 0.7, + "learning_rate": 2.16199020580302e-06, + "loss": 0.5748, + "step": 5595 + }, + { + "epoch": 0.7, + "learning_rate": 2.160318937278919e-06, + "loss": 0.5163, + "step": 5596 + }, + { + "epoch": 0.7, + "learning_rate": 2.158648136946948e-06, + "loss": 0.5645, + "step": 5597 + }, + { + "epoch": 0.7, + "learning_rate": 2.1569778050825824e-06, + "loss": 0.5366, + "step": 5598 + }, + { + "epoch": 0.7, + "learning_rate": 2.1553079419612166e-06, + "loss": 0.5155, + "step": 5599 + }, + { + "epoch": 0.7, + "learning_rate": 2.1536385478581708e-06, + "loss": 0.5062, + "step": 5600 + }, + { + "epoch": 0.7, + "learning_rate": 2.1519696230486865e-06, + "loss": 0.2355, + "step": 5601 + }, + { + "epoch": 0.7, + "learning_rate": 2.150301167807925e-06, + "loss": 0.4639, + "step": 5602 + }, + { + "epoch": 0.7, + "learning_rate": 2.148633182410974e-06, + "loss": 0.5067, + "step": 5603 + }, + { + "epoch": 0.7, + "learning_rate": 2.1469656671328427e-06, + "loss": 0.4911, + "step": 5604 + }, + { + "epoch": 0.7, + "learning_rate": 2.145298622248462e-06, + "loss": 0.5101, + "step": 5605 + }, + { + "epoch": 0.7, + "learning_rate": 2.1436320480326865e-06, + "loss": 0.2277, + "step": 5606 + }, + { + "epoch": 0.7, + "learning_rate": 2.1419659447602935e-06, + "loss": 0.442, + "step": 5607 + }, + { + "epoch": 0.7, + "learning_rate": 2.1403003127059786e-06, + "loss": 0.1782, + "step": 5608 + }, + { + "epoch": 0.7, + "learning_rate": 2.1386351521443643e-06, + "loss": 0.5349, + "step": 5609 + }, + { + "epoch": 0.7, + "learning_rate": 2.1369704633499937e-06, + "loss": 0.4789, + "step": 5610 + }, + { + "epoch": 0.7, + "learning_rate": 2.1353062465973324e-06, + "loss": 0.5731, + "step": 5611 + }, + { + "epoch": 0.7, + "learning_rate": 2.1336425021607694e-06, + "loss": 0.2399, + "step": 5612 + }, + { + "epoch": 0.7, + "learning_rate": 2.131979230314611e-06, + "loss": 0.5562, + "step": 5613 + }, + { + "epoch": 0.7, + "learning_rate": 2.1303164313330904e-06, + "loss": 0.5383, + "step": 5614 + }, + { + "epoch": 0.7, + "learning_rate": 2.1286541054903625e-06, + "loss": 0.4754, + "step": 5615 + }, + { + "epoch": 0.7, + "learning_rate": 2.1269922530605026e-06, + "loss": 0.5919, + "step": 5616 + }, + { + "epoch": 0.7, + "learning_rate": 2.1253308743175092e-06, + "loss": 0.528, + "step": 5617 + }, + { + "epoch": 0.7, + "learning_rate": 2.1236699695353e-06, + "loss": 0.4804, + "step": 5618 + }, + { + "epoch": 0.7, + "learning_rate": 2.122009538987717e-06, + "loss": 0.4879, + "step": 5619 + }, + { + "epoch": 0.7, + "learning_rate": 2.1203495829485248e-06, + "loss": 0.4772, + "step": 5620 + }, + { + "epoch": 0.7, + "learning_rate": 2.1186901016914073e-06, + "loss": 0.4977, + "step": 5621 + }, + { + "epoch": 0.7, + "learning_rate": 2.117031095489972e-06, + "loss": 0.4758, + "step": 5622 + }, + { + "epoch": 0.7, + "learning_rate": 2.1153725646177484e-06, + "loss": 0.4679, + "step": 5623 + }, + { + "epoch": 0.7, + "learning_rate": 2.113714509348184e-06, + "loss": 0.4642, + "step": 5624 + }, + { + "epoch": 0.71, + "learning_rate": 2.112056929954652e-06, + "loss": 0.5224, + "step": 5625 + }, + { + "epoch": 0.71, + "learning_rate": 2.1103998267104452e-06, + "loss": 0.4856, + "step": 5626 + }, + { + "epoch": 0.71, + "learning_rate": 2.1087431998887785e-06, + "loss": 0.5475, + "step": 5627 + }, + { + "epoch": 0.71, + "learning_rate": 2.1070870497627905e-06, + "loss": 0.5313, + "step": 5628 + }, + { + "epoch": 0.71, + "learning_rate": 2.1054313766055338e-06, + "loss": 0.4996, + "step": 5629 + }, + { + "epoch": 0.71, + "learning_rate": 2.103776180689991e-06, + "loss": 0.5354, + "step": 5630 + }, + { + "epoch": 0.71, + "learning_rate": 2.1021214622890613e-06, + "loss": 0.589, + "step": 5631 + }, + { + "epoch": 0.71, + "learning_rate": 2.1004672216755656e-06, + "loss": 0.2285, + "step": 5632 + }, + { + "epoch": 0.71, + "learning_rate": 2.0988134591222497e-06, + "loss": 0.471, + "step": 5633 + }, + { + "epoch": 0.71, + "learning_rate": 2.0971601749017735e-06, + "loss": 0.5516, + "step": 5634 + }, + { + "epoch": 0.71, + "learning_rate": 2.095507369286724e-06, + "loss": 0.5161, + "step": 5635 + }, + { + "epoch": 0.71, + "learning_rate": 2.093855042549607e-06, + "loss": 0.5227, + "step": 5636 + }, + { + "epoch": 0.71, + "learning_rate": 2.092203194962849e-06, + "loss": 0.5752, + "step": 5637 + }, + { + "epoch": 0.71, + "learning_rate": 2.0905518267988002e-06, + "loss": 0.477, + "step": 5638 + }, + { + "epoch": 0.71, + "learning_rate": 2.08890093832973e-06, + "loss": 0.5423, + "step": 5639 + }, + { + "epoch": 0.71, + "learning_rate": 2.087250529827825e-06, + "loss": 0.5175, + "step": 5640 + }, + { + "epoch": 0.71, + "learning_rate": 2.0856006015651988e-06, + "loss": 0.5386, + "step": 5641 + }, + { + "epoch": 0.71, + "learning_rate": 2.0839511538138822e-06, + "loss": 0.5047, + "step": 5642 + }, + { + "epoch": 0.71, + "learning_rate": 2.082302186845828e-06, + "loss": 0.4599, + "step": 5643 + }, + { + "epoch": 0.71, + "learning_rate": 2.080653700932912e-06, + "loss": 0.5346, + "step": 5644 + }, + { + "epoch": 0.71, + "learning_rate": 2.0790056963469233e-06, + "loss": 0.522, + "step": 5645 + }, + { + "epoch": 0.71, + "learning_rate": 2.0773581733595795e-06, + "loss": 0.5134, + "step": 5646 + }, + { + "epoch": 0.71, + "learning_rate": 2.075711132242515e-06, + "loss": 0.4644, + "step": 5647 + }, + { + "epoch": 0.71, + "learning_rate": 2.074064573267286e-06, + "loss": 0.5272, + "step": 5648 + }, + { + "epoch": 0.71, + "learning_rate": 2.0724184967053697e-06, + "loss": 0.5094, + "step": 5649 + }, + { + "epoch": 0.71, + "learning_rate": 2.0707729028281605e-06, + "loss": 0.5096, + "step": 5650 + }, + { + "epoch": 0.71, + "learning_rate": 2.0691277919069757e-06, + "loss": 0.5187, + "step": 5651 + }, + { + "epoch": 0.71, + "learning_rate": 2.067483164213055e-06, + "loss": 0.4929, + "step": 5652 + }, + { + "epoch": 0.71, + "learning_rate": 2.0658390200175538e-06, + "loss": 0.5017, + "step": 5653 + }, + { + "epoch": 0.71, + "learning_rate": 2.064195359591552e-06, + "loss": 0.5344, + "step": 5654 + }, + { + "epoch": 0.71, + "learning_rate": 2.062552183206049e-06, + "loss": 0.2188, + "step": 5655 + }, + { + "epoch": 0.71, + "learning_rate": 2.06090949113196e-06, + "loss": 0.4705, + "step": 5656 + }, + { + "epoch": 0.71, + "learning_rate": 2.0592672836401256e-06, + "loss": 0.6029, + "step": 5657 + }, + { + "epoch": 0.71, + "learning_rate": 2.057625561001304e-06, + "loss": 0.4452, + "step": 5658 + }, + { + "epoch": 0.71, + "learning_rate": 2.0559843234861743e-06, + "loss": 0.5233, + "step": 5659 + }, + { + "epoch": 0.71, + "learning_rate": 2.0543435713653377e-06, + "loss": 0.5576, + "step": 5660 + }, + { + "epoch": 0.71, + "learning_rate": 2.052703304909309e-06, + "loss": 0.5387, + "step": 5661 + }, + { + "epoch": 0.71, + "learning_rate": 2.051063524388528e-06, + "loss": 0.5503, + "step": 5662 + }, + { + "epoch": 0.71, + "learning_rate": 2.0494242300733546e-06, + "loss": 0.533, + "step": 5663 + }, + { + "epoch": 0.71, + "learning_rate": 2.047785422234067e-06, + "loss": 0.535, + "step": 5664 + }, + { + "epoch": 0.71, + "learning_rate": 2.0461471011408635e-06, + "loss": 0.5057, + "step": 5665 + }, + { + "epoch": 0.71, + "learning_rate": 2.0445092670638632e-06, + "loss": 0.5642, + "step": 5666 + }, + { + "epoch": 0.71, + "learning_rate": 2.0428719202731014e-06, + "loss": 0.5363, + "step": 5667 + }, + { + "epoch": 0.71, + "learning_rate": 2.0412350610385363e-06, + "loss": 0.458, + "step": 5668 + }, + { + "epoch": 0.71, + "learning_rate": 2.0395986896300453e-06, + "loss": 0.4738, + "step": 5669 + }, + { + "epoch": 0.71, + "learning_rate": 2.037962806317425e-06, + "loss": 0.5159, + "step": 5670 + }, + { + "epoch": 0.71, + "learning_rate": 2.0363274113703913e-06, + "loss": 0.5778, + "step": 5671 + }, + { + "epoch": 0.71, + "learning_rate": 2.034692505058582e-06, + "loss": 0.4931, + "step": 5672 + }, + { + "epoch": 0.71, + "learning_rate": 2.033058087651548e-06, + "loss": 0.4995, + "step": 5673 + }, + { + "epoch": 0.71, + "learning_rate": 2.0314241594187657e-06, + "loss": 0.5347, + "step": 5674 + }, + { + "epoch": 0.71, + "learning_rate": 2.029790720629629e-06, + "loss": 0.5506, + "step": 5675 + }, + { + "epoch": 0.71, + "learning_rate": 2.0281577715534506e-06, + "loss": 0.4767, + "step": 5676 + }, + { + "epoch": 0.71, + "learning_rate": 2.0265253124594636e-06, + "loss": 0.5249, + "step": 5677 + }, + { + "epoch": 0.71, + "learning_rate": 2.0248933436168183e-06, + "loss": 0.4854, + "step": 5678 + }, + { + "epoch": 0.71, + "learning_rate": 2.0232618652945885e-06, + "loss": 0.5237, + "step": 5679 + }, + { + "epoch": 0.71, + "learning_rate": 2.0216308777617595e-06, + "loss": 0.5667, + "step": 5680 + }, + { + "epoch": 0.71, + "learning_rate": 2.0200003812872426e-06, + "loss": 0.5854, + "step": 5681 + }, + { + "epoch": 0.71, + "learning_rate": 2.0183703761398656e-06, + "loss": 0.5754, + "step": 5682 + }, + { + "epoch": 0.71, + "learning_rate": 2.0167408625883744e-06, + "loss": 0.5053, + "step": 5683 + }, + { + "epoch": 0.71, + "learning_rate": 2.0151118409014363e-06, + "loss": 0.5099, + "step": 5684 + }, + { + "epoch": 0.71, + "learning_rate": 2.013483311347636e-06, + "loss": 0.5112, + "step": 5685 + }, + { + "epoch": 0.71, + "learning_rate": 2.0118552741954778e-06, + "loss": 0.4836, + "step": 5686 + }, + { + "epoch": 0.71, + "learning_rate": 2.010227729713382e-06, + "loss": 0.576, + "step": 5687 + }, + { + "epoch": 0.71, + "learning_rate": 2.0086006781696906e-06, + "loss": 0.5281, + "step": 5688 + }, + { + "epoch": 0.71, + "learning_rate": 2.0069741198326637e-06, + "loss": 0.506, + "step": 5689 + }, + { + "epoch": 0.71, + "learning_rate": 2.00534805497048e-06, + "loss": 0.5043, + "step": 5690 + }, + { + "epoch": 0.71, + "learning_rate": 2.0037224838512375e-06, + "loss": 0.5515, + "step": 5691 + }, + { + "epoch": 0.71, + "learning_rate": 2.002097406742953e-06, + "loss": 0.5902, + "step": 5692 + }, + { + "epoch": 0.71, + "learning_rate": 2.000472823913557e-06, + "loss": 0.4957, + "step": 5693 + }, + { + "epoch": 0.71, + "learning_rate": 1.9988487356309062e-06, + "loss": 0.5147, + "step": 5694 + }, + { + "epoch": 0.71, + "learning_rate": 1.9972251421627697e-06, + "loss": 0.5934, + "step": 5695 + }, + { + "epoch": 0.71, + "learning_rate": 1.995602043776838e-06, + "loss": 0.5286, + "step": 5696 + }, + { + "epoch": 0.71, + "learning_rate": 1.9939794407407204e-06, + "loss": 0.5108, + "step": 5697 + }, + { + "epoch": 0.71, + "learning_rate": 1.9923573333219436e-06, + "loss": 0.4895, + "step": 5698 + }, + { + "epoch": 0.71, + "learning_rate": 1.9907357217879496e-06, + "loss": 0.5354, + "step": 5699 + }, + { + "epoch": 0.71, + "learning_rate": 1.989114606406104e-06, + "loss": 0.5124, + "step": 5700 + }, + { + "epoch": 0.71, + "learning_rate": 1.987493987443687e-06, + "loss": 0.5081, + "step": 5701 + }, + { + "epoch": 0.71, + "learning_rate": 1.985873865167897e-06, + "loss": 0.5303, + "step": 5702 + }, + { + "epoch": 0.71, + "learning_rate": 1.984254239845856e-06, + "loss": 0.5007, + "step": 5703 + }, + { + "epoch": 0.71, + "learning_rate": 1.9826351117445935e-06, + "loss": 0.543, + "step": 5704 + }, + { + "epoch": 0.72, + "learning_rate": 1.981016481131066e-06, + "loss": 0.574, + "step": 5705 + }, + { + "epoch": 0.72, + "learning_rate": 1.9793983482721453e-06, + "loss": 0.4987, + "step": 5706 + }, + { + "epoch": 0.72, + "learning_rate": 1.97778071343462e-06, + "loss": 0.5066, + "step": 5707 + }, + { + "epoch": 0.72, + "learning_rate": 1.9761635768852005e-06, + "loss": 0.4995, + "step": 5708 + }, + { + "epoch": 0.72, + "learning_rate": 1.974546938890507e-06, + "loss": 0.5684, + "step": 5709 + }, + { + "epoch": 0.72, + "learning_rate": 1.9729307997170856e-06, + "loss": 0.5259, + "step": 5710 + }, + { + "epoch": 0.72, + "learning_rate": 1.9713151596313963e-06, + "loss": 0.5503, + "step": 5711 + }, + { + "epoch": 0.72, + "learning_rate": 1.9697000188998177e-06, + "loss": 0.2231, + "step": 5712 + }, + { + "epoch": 0.72, + "learning_rate": 1.968085377788646e-06, + "loss": 0.4976, + "step": 5713 + }, + { + "epoch": 0.72, + "learning_rate": 1.9664712365640977e-06, + "loss": 0.5353, + "step": 5714 + }, + { + "epoch": 0.72, + "learning_rate": 1.964857595492299e-06, + "loss": 0.5549, + "step": 5715 + }, + { + "epoch": 0.72, + "learning_rate": 1.9632444548393014e-06, + "loss": 0.5423, + "step": 5716 + }, + { + "epoch": 0.72, + "learning_rate": 1.9616318148710717e-06, + "loss": 0.453, + "step": 5717 + }, + { + "epoch": 0.72, + "learning_rate": 1.960019675853493e-06, + "loss": 0.4673, + "step": 5718 + }, + { + "epoch": 0.72, + "learning_rate": 1.9584080380523686e-06, + "loss": 0.2409, + "step": 5719 + }, + { + "epoch": 0.72, + "learning_rate": 1.9567969017334138e-06, + "loss": 0.4638, + "step": 5720 + }, + { + "epoch": 0.72, + "learning_rate": 1.9551862671622663e-06, + "loss": 0.5338, + "step": 5721 + }, + { + "epoch": 0.72, + "learning_rate": 1.953576134604479e-06, + "loss": 0.2425, + "step": 5722 + }, + { + "epoch": 0.72, + "learning_rate": 1.9519665043255225e-06, + "loss": 0.5238, + "step": 5723 + }, + { + "epoch": 0.72, + "learning_rate": 1.9503573765907864e-06, + "loss": 0.4905, + "step": 5724 + }, + { + "epoch": 0.72, + "learning_rate": 1.9487487516655714e-06, + "loss": 0.5496, + "step": 5725 + }, + { + "epoch": 0.72, + "learning_rate": 1.9471406298151013e-06, + "loss": 0.4846, + "step": 5726 + }, + { + "epoch": 0.72, + "learning_rate": 1.945533011304516e-06, + "loss": 0.4949, + "step": 5727 + }, + { + "epoch": 0.72, + "learning_rate": 1.94392589639887e-06, + "loss": 0.5453, + "step": 5728 + }, + { + "epoch": 0.72, + "learning_rate": 1.9423192853631374e-06, + "loss": 0.5632, + "step": 5729 + }, + { + "epoch": 0.72, + "learning_rate": 1.940713178462209e-06, + "loss": 0.5651, + "step": 5730 + }, + { + "epoch": 0.72, + "learning_rate": 1.939107575960888e-06, + "loss": 0.5357, + "step": 5731 + }, + { + "epoch": 0.72, + "learning_rate": 1.9375024781239006e-06, + "loss": 0.4874, + "step": 5732 + }, + { + "epoch": 0.72, + "learning_rate": 1.935897885215886e-06, + "loss": 0.5443, + "step": 5733 + }, + { + "epoch": 0.72, + "learning_rate": 1.934293797501402e-06, + "loss": 0.5554, + "step": 5734 + }, + { + "epoch": 0.72, + "learning_rate": 1.932690215244924e-06, + "loss": 0.498, + "step": 5735 + }, + { + "epoch": 0.72, + "learning_rate": 1.9310871387108387e-06, + "loss": 0.4742, + "step": 5736 + }, + { + "epoch": 0.72, + "learning_rate": 1.9294845681634556e-06, + "loss": 0.5251, + "step": 5737 + }, + { + "epoch": 0.72, + "learning_rate": 1.9278825038669975e-06, + "loss": 0.5063, + "step": 5738 + }, + { + "epoch": 0.72, + "learning_rate": 1.926280946085605e-06, + "loss": 0.5124, + "step": 5739 + }, + { + "epoch": 0.72, + "learning_rate": 1.924679895083335e-06, + "loss": 0.5232, + "step": 5740 + }, + { + "epoch": 0.72, + "learning_rate": 1.9230793511241623e-06, + "loss": 0.4604, + "step": 5741 + }, + { + "epoch": 0.72, + "learning_rate": 1.921479314471973e-06, + "loss": 0.547, + "step": 5742 + }, + { + "epoch": 0.72, + "learning_rate": 1.9198797853905735e-06, + "loss": 0.4816, + "step": 5743 + }, + { + "epoch": 0.72, + "learning_rate": 1.9182807641436877e-06, + "loss": 0.5239, + "step": 5744 + }, + { + "epoch": 0.72, + "learning_rate": 1.9166822509949533e-06, + "loss": 0.2018, + "step": 5745 + }, + { + "epoch": 0.72, + "learning_rate": 1.915084246207927e-06, + "loss": 0.4777, + "step": 5746 + }, + { + "epoch": 0.72, + "learning_rate": 1.9134867500460765e-06, + "loss": 0.5412, + "step": 5747 + }, + { + "epoch": 0.72, + "learning_rate": 1.9118897627727894e-06, + "loss": 0.5216, + "step": 5748 + }, + { + "epoch": 0.72, + "learning_rate": 1.9102932846513695e-06, + "loss": 0.61, + "step": 5749 + }, + { + "epoch": 0.72, + "learning_rate": 1.908697315945036e-06, + "loss": 0.5311, + "step": 5750 + }, + { + "epoch": 0.72, + "learning_rate": 1.9071018569169264e-06, + "loss": 0.1995, + "step": 5751 + }, + { + "epoch": 0.72, + "learning_rate": 1.905506907830087e-06, + "loss": 0.5106, + "step": 5752 + }, + { + "epoch": 0.72, + "learning_rate": 1.903912468947488e-06, + "loss": 0.5626, + "step": 5753 + }, + { + "epoch": 0.72, + "learning_rate": 1.9023185405320111e-06, + "loss": 0.5332, + "step": 5754 + }, + { + "epoch": 0.72, + "learning_rate": 1.9007251228464563e-06, + "loss": 0.4843, + "step": 5755 + }, + { + "epoch": 0.72, + "learning_rate": 1.899132216153537e-06, + "loss": 0.5478, + "step": 5756 + }, + { + "epoch": 0.72, + "learning_rate": 1.8975398207158863e-06, + "loss": 0.5103, + "step": 5757 + }, + { + "epoch": 0.72, + "learning_rate": 1.8959479367960464e-06, + "loss": 0.5442, + "step": 5758 + }, + { + "epoch": 0.72, + "learning_rate": 1.8943565646564804e-06, + "loss": 0.5323, + "step": 5759 + }, + { + "epoch": 0.72, + "learning_rate": 1.892765704559566e-06, + "loss": 0.4988, + "step": 5760 + }, + { + "epoch": 0.72, + "learning_rate": 1.891175356767596e-06, + "loss": 0.5084, + "step": 5761 + }, + { + "epoch": 0.72, + "learning_rate": 1.8895855215427806e-06, + "loss": 0.5199, + "step": 5762 + }, + { + "epoch": 0.72, + "learning_rate": 1.88799619914724e-06, + "loss": 0.5295, + "step": 5763 + }, + { + "epoch": 0.72, + "learning_rate": 1.8864073898430164e-06, + "loss": 0.578, + "step": 5764 + }, + { + "epoch": 0.72, + "learning_rate": 1.884819093892063e-06, + "loss": 0.4888, + "step": 5765 + }, + { + "epoch": 0.72, + "learning_rate": 1.8832313115562506e-06, + "loss": 0.5353, + "step": 5766 + }, + { + "epoch": 0.72, + "learning_rate": 1.8816440430973666e-06, + "loss": 0.4858, + "step": 5767 + }, + { + "epoch": 0.72, + "learning_rate": 1.880057288777108e-06, + "loss": 0.5085, + "step": 5768 + }, + { + "epoch": 0.72, + "learning_rate": 1.8784710488570922e-06, + "loss": 0.4842, + "step": 5769 + }, + { + "epoch": 0.72, + "learning_rate": 1.876885323598851e-06, + "loss": 0.5424, + "step": 5770 + }, + { + "epoch": 0.72, + "learning_rate": 1.875300113263831e-06, + "loss": 0.516, + "step": 5771 + }, + { + "epoch": 0.72, + "learning_rate": 1.873715418113392e-06, + "loss": 0.5183, + "step": 5772 + }, + { + "epoch": 0.72, + "learning_rate": 1.8721312384088142e-06, + "loss": 0.5087, + "step": 5773 + }, + { + "epoch": 0.72, + "learning_rate": 1.870547574411284e-06, + "loss": 0.5055, + "step": 5774 + }, + { + "epoch": 0.72, + "learning_rate": 1.8689644263819102e-06, + "loss": 0.5051, + "step": 5775 + }, + { + "epoch": 0.72, + "learning_rate": 1.8673817945817146e-06, + "loss": 0.2245, + "step": 5776 + }, + { + "epoch": 0.72, + "learning_rate": 1.8657996792716327e-06, + "loss": 0.5248, + "step": 5777 + }, + { + "epoch": 0.72, + "learning_rate": 1.8642180807125176e-06, + "loss": 0.5458, + "step": 5778 + }, + { + "epoch": 0.72, + "learning_rate": 1.8626369991651323e-06, + "loss": 0.523, + "step": 5779 + }, + { + "epoch": 0.72, + "learning_rate": 1.8610564348901577e-06, + "loss": 0.4963, + "step": 5780 + }, + { + "epoch": 0.72, + "learning_rate": 1.8594763881481908e-06, + "loss": 0.54, + "step": 5781 + }, + { + "epoch": 0.72, + "learning_rate": 1.857896859199741e-06, + "loss": 0.5156, + "step": 5782 + }, + { + "epoch": 0.72, + "learning_rate": 1.856317848305233e-06, + "loss": 0.5349, + "step": 5783 + }, + { + "epoch": 0.73, + "learning_rate": 1.854739355725006e-06, + "loss": 0.5483, + "step": 5784 + }, + { + "epoch": 0.73, + "learning_rate": 1.8531613817193155e-06, + "loss": 0.483, + "step": 5785 + }, + { + "epoch": 0.73, + "learning_rate": 1.8515839265483271e-06, + "loss": 0.4898, + "step": 5786 + }, + { + "epoch": 0.73, + "learning_rate": 1.8500069904721241e-06, + "loss": 0.5533, + "step": 5787 + }, + { + "epoch": 0.73, + "learning_rate": 1.8484305737507041e-06, + "loss": 0.5503, + "step": 5788 + }, + { + "epoch": 0.73, + "learning_rate": 1.8468546766439788e-06, + "loss": 0.5257, + "step": 5789 + }, + { + "epoch": 0.73, + "learning_rate": 1.8452792994117742e-06, + "loss": 0.5444, + "step": 5790 + }, + { + "epoch": 0.73, + "learning_rate": 1.84370444231383e-06, + "loss": 0.555, + "step": 5791 + }, + { + "epoch": 0.73, + "learning_rate": 1.8421301056098035e-06, + "loss": 0.522, + "step": 5792 + }, + { + "epoch": 0.73, + "learning_rate": 1.8405562895592583e-06, + "loss": 0.5027, + "step": 5793 + }, + { + "epoch": 0.73, + "learning_rate": 1.8389829944216797e-06, + "loss": 0.4994, + "step": 5794 + }, + { + "epoch": 0.73, + "learning_rate": 1.8374102204564641e-06, + "loss": 0.4636, + "step": 5795 + }, + { + "epoch": 0.73, + "learning_rate": 1.8358379679229233e-06, + "loss": 0.5028, + "step": 5796 + }, + { + "epoch": 0.73, + "learning_rate": 1.8342662370802816e-06, + "loss": 0.5555, + "step": 5797 + }, + { + "epoch": 0.73, + "learning_rate": 1.8326950281876799e-06, + "loss": 0.4876, + "step": 5798 + }, + { + "epoch": 0.73, + "learning_rate": 1.831124341504168e-06, + "loss": 0.5224, + "step": 5799 + }, + { + "epoch": 0.73, + "learning_rate": 1.8295541772887139e-06, + "loss": 0.5617, + "step": 5800 + }, + { + "epoch": 0.73, + "learning_rate": 1.827984535800198e-06, + "loss": 0.5986, + "step": 5801 + }, + { + "epoch": 0.73, + "learning_rate": 1.8264154172974158e-06, + "loss": 0.6019, + "step": 5802 + }, + { + "epoch": 0.73, + "learning_rate": 1.8248468220390748e-06, + "loss": 0.5029, + "step": 5803 + }, + { + "epoch": 0.73, + "learning_rate": 1.8232787502837967e-06, + "loss": 0.4338, + "step": 5804 + }, + { + "epoch": 0.73, + "learning_rate": 1.82171120229012e-06, + "loss": 0.5144, + "step": 5805 + }, + { + "epoch": 0.73, + "learning_rate": 1.8201441783164897e-06, + "loss": 0.5057, + "step": 5806 + }, + { + "epoch": 0.73, + "learning_rate": 1.8185776786212705e-06, + "loss": 0.4964, + "step": 5807 + }, + { + "epoch": 0.73, + "learning_rate": 1.8170117034627393e-06, + "loss": 0.4972, + "step": 5808 + }, + { + "epoch": 0.73, + "learning_rate": 1.8154462530990863e-06, + "loss": 0.5564, + "step": 5809 + }, + { + "epoch": 0.73, + "learning_rate": 1.8138813277884153e-06, + "loss": 0.5264, + "step": 5810 + }, + { + "epoch": 0.73, + "learning_rate": 1.8123169277887415e-06, + "loss": 0.5083, + "step": 5811 + }, + { + "epoch": 0.73, + "learning_rate": 1.8107530533579959e-06, + "loss": 0.2319, + "step": 5812 + }, + { + "epoch": 0.73, + "learning_rate": 1.8091897047540225e-06, + "loss": 0.5031, + "step": 5813 + }, + { + "epoch": 0.73, + "learning_rate": 1.8076268822345777e-06, + "loss": 0.5711, + "step": 5814 + }, + { + "epoch": 0.73, + "learning_rate": 1.8060645860573327e-06, + "loss": 0.5892, + "step": 5815 + }, + { + "epoch": 0.73, + "learning_rate": 1.8045028164798718e-06, + "loss": 0.5537, + "step": 5816 + }, + { + "epoch": 0.73, + "learning_rate": 1.8029415737596885e-06, + "loss": 0.4725, + "step": 5817 + }, + { + "epoch": 0.73, + "learning_rate": 1.8013808581541941e-06, + "loss": 0.5016, + "step": 5818 + }, + { + "epoch": 0.73, + "learning_rate": 1.7998206699207115e-06, + "loss": 0.5316, + "step": 5819 + }, + { + "epoch": 0.73, + "learning_rate": 1.798261009316477e-06, + "loss": 0.4809, + "step": 5820 + }, + { + "epoch": 0.73, + "learning_rate": 1.7967018765986404e-06, + "loss": 0.5124, + "step": 5821 + }, + { + "epoch": 0.73, + "learning_rate": 1.7951432720242605e-06, + "loss": 0.4876, + "step": 5822 + }, + { + "epoch": 0.73, + "learning_rate": 1.7935851958503142e-06, + "loss": 0.5264, + "step": 5823 + }, + { + "epoch": 0.73, + "learning_rate": 1.7920276483336885e-06, + "loss": 0.5172, + "step": 5824 + }, + { + "epoch": 0.73, + "learning_rate": 1.7904706297311842e-06, + "loss": 0.519, + "step": 5825 + }, + { + "epoch": 0.73, + "learning_rate": 1.7889141402995163e-06, + "loss": 0.5342, + "step": 5826 + }, + { + "epoch": 0.73, + "learning_rate": 1.7873581802953072e-06, + "loss": 0.5213, + "step": 5827 + }, + { + "epoch": 0.73, + "learning_rate": 1.785802749975098e-06, + "loss": 0.5087, + "step": 5828 + }, + { + "epoch": 0.73, + "learning_rate": 1.784247849595339e-06, + "loss": 0.5194, + "step": 5829 + }, + { + "epoch": 0.73, + "learning_rate": 1.7826934794123952e-06, + "loss": 0.5237, + "step": 5830 + }, + { + "epoch": 0.73, + "learning_rate": 1.781139639682543e-06, + "loss": 0.411, + "step": 5831 + }, + { + "epoch": 0.73, + "learning_rate": 1.7795863306619732e-06, + "loss": 0.5579, + "step": 5832 + }, + { + "epoch": 0.73, + "learning_rate": 1.7780335526067842e-06, + "loss": 0.5608, + "step": 5833 + }, + { + "epoch": 0.73, + "learning_rate": 1.7764813057729918e-06, + "loss": 0.4993, + "step": 5834 + }, + { + "epoch": 0.73, + "learning_rate": 1.774929590416522e-06, + "loss": 0.488, + "step": 5835 + }, + { + "epoch": 0.73, + "learning_rate": 1.7733784067932142e-06, + "loss": 0.5401, + "step": 5836 + }, + { + "epoch": 0.73, + "learning_rate": 1.7718277551588213e-06, + "loss": 0.5702, + "step": 5837 + }, + { + "epoch": 0.73, + "learning_rate": 1.7702776357690037e-06, + "loss": 0.5281, + "step": 5838 + }, + { + "epoch": 0.73, + "learning_rate": 1.768728048879339e-06, + "loss": 0.5527, + "step": 5839 + }, + { + "epoch": 0.73, + "learning_rate": 1.7671789947453145e-06, + "loss": 0.5128, + "step": 5840 + }, + { + "epoch": 0.73, + "learning_rate": 1.7656304736223301e-06, + "loss": 0.5068, + "step": 5841 + }, + { + "epoch": 0.73, + "learning_rate": 1.7640824857657007e-06, + "loss": 0.5661, + "step": 5842 + }, + { + "epoch": 0.73, + "learning_rate": 1.7625350314306472e-06, + "loss": 0.5024, + "step": 5843 + }, + { + "epoch": 0.73, + "learning_rate": 1.7609881108723065e-06, + "loss": 0.6039, + "step": 5844 + }, + { + "epoch": 0.73, + "learning_rate": 1.7594417243457286e-06, + "loss": 0.5159, + "step": 5845 + }, + { + "epoch": 0.73, + "learning_rate": 1.7578958721058725e-06, + "loss": 0.4558, + "step": 5846 + }, + { + "epoch": 0.73, + "learning_rate": 1.7563505544076115e-06, + "loss": 0.5066, + "step": 5847 + }, + { + "epoch": 0.73, + "learning_rate": 1.7548057715057303e-06, + "loss": 0.6157, + "step": 5848 + }, + { + "epoch": 0.73, + "learning_rate": 1.7532615236549222e-06, + "loss": 0.4456, + "step": 5849 + }, + { + "epoch": 0.73, + "learning_rate": 1.7517178111097965e-06, + "loss": 0.5209, + "step": 5850 + }, + { + "epoch": 0.73, + "learning_rate": 1.7501746341248727e-06, + "loss": 0.5098, + "step": 5851 + }, + { + "epoch": 0.73, + "learning_rate": 1.7486319929545814e-06, + "loss": 0.5174, + "step": 5852 + }, + { + "epoch": 0.73, + "learning_rate": 1.7470898878532678e-06, + "loss": 0.5533, + "step": 5853 + }, + { + "epoch": 0.73, + "learning_rate": 1.7455483190751825e-06, + "loss": 0.5341, + "step": 5854 + }, + { + "epoch": 0.73, + "learning_rate": 1.7440072868744935e-06, + "loss": 0.4842, + "step": 5855 + }, + { + "epoch": 0.73, + "learning_rate": 1.7424667915052784e-06, + "loss": 0.543, + "step": 5856 + }, + { + "epoch": 0.73, + "learning_rate": 1.7409268332215258e-06, + "loss": 0.554, + "step": 5857 + }, + { + "epoch": 0.73, + "learning_rate": 1.7393874122771376e-06, + "loss": 0.6026, + "step": 5858 + }, + { + "epoch": 0.73, + "learning_rate": 1.7378485289259228e-06, + "loss": 0.4883, + "step": 5859 + }, + { + "epoch": 0.73, + "learning_rate": 1.7363101834216068e-06, + "loss": 0.5359, + "step": 5860 + }, + { + "epoch": 0.73, + "learning_rate": 1.7347723760178237e-06, + "loss": 0.2331, + "step": 5861 + }, + { + "epoch": 0.73, + "learning_rate": 1.7332351069681185e-06, + "loss": 0.5646, + "step": 5862 + }, + { + "epoch": 0.73, + "learning_rate": 1.7316983765259498e-06, + "loss": 0.5702, + "step": 5863 + }, + { + "epoch": 0.74, + "learning_rate": 1.7301621849446865e-06, + "loss": 0.5687, + "step": 5864 + }, + { + "epoch": 0.74, + "learning_rate": 1.7286265324776048e-06, + "loss": 0.5458, + "step": 5865 + }, + { + "epoch": 0.74, + "learning_rate": 1.7270914193778977e-06, + "loss": 0.5167, + "step": 5866 + }, + { + "epoch": 0.74, + "learning_rate": 1.7255568458986654e-06, + "loss": 0.5063, + "step": 5867 + }, + { + "epoch": 0.74, + "learning_rate": 1.724022812292922e-06, + "loss": 0.4922, + "step": 5868 + }, + { + "epoch": 0.74, + "learning_rate": 1.7224893188135921e-06, + "loss": 0.488, + "step": 5869 + }, + { + "epoch": 0.74, + "learning_rate": 1.720956365713507e-06, + "loss": 0.4936, + "step": 5870 + }, + { + "epoch": 0.74, + "learning_rate": 1.7194239532454138e-06, + "loss": 0.5242, + "step": 5871 + }, + { + "epoch": 0.74, + "learning_rate": 1.7178920816619692e-06, + "loss": 0.5436, + "step": 5872 + }, + { + "epoch": 0.74, + "learning_rate": 1.71636075121574e-06, + "loss": 0.5662, + "step": 5873 + }, + { + "epoch": 0.74, + "learning_rate": 1.7148299621592057e-06, + "loss": 0.5111, + "step": 5874 + }, + { + "epoch": 0.74, + "learning_rate": 1.7132997147447522e-06, + "loss": 0.4614, + "step": 5875 + }, + { + "epoch": 0.74, + "learning_rate": 1.7117700092246804e-06, + "loss": 0.528, + "step": 5876 + }, + { + "epoch": 0.74, + "learning_rate": 1.7102408458512e-06, + "loss": 0.4905, + "step": 5877 + }, + { + "epoch": 0.74, + "learning_rate": 1.7087122248764315e-06, + "loss": 0.4753, + "step": 5878 + }, + { + "epoch": 0.74, + "learning_rate": 1.707184146552407e-06, + "loss": 0.5157, + "step": 5879 + }, + { + "epoch": 0.74, + "learning_rate": 1.7056566111310691e-06, + "loss": 0.5084, + "step": 5880 + }, + { + "epoch": 0.74, + "learning_rate": 1.7041296188642669e-06, + "loss": 0.5233, + "step": 5881 + }, + { + "epoch": 0.74, + "learning_rate": 1.7026031700037653e-06, + "loss": 0.5815, + "step": 5882 + }, + { + "epoch": 0.74, + "learning_rate": 1.7010772648012369e-06, + "loss": 0.4932, + "step": 5883 + }, + { + "epoch": 0.74, + "learning_rate": 1.6995519035082652e-06, + "loss": 0.5119, + "step": 5884 + }, + { + "epoch": 0.74, + "learning_rate": 1.698027086376346e-06, + "loss": 0.4744, + "step": 5885 + }, + { + "epoch": 0.74, + "learning_rate": 1.6965028136568795e-06, + "loss": 0.4764, + "step": 5886 + }, + { + "epoch": 0.74, + "learning_rate": 1.6949790856011822e-06, + "loss": 0.5119, + "step": 5887 + }, + { + "epoch": 0.74, + "learning_rate": 1.693455902460478e-06, + "loss": 0.5076, + "step": 5888 + }, + { + "epoch": 0.74, + "learning_rate": 1.6919332644859022e-06, + "loss": 0.578, + "step": 5889 + }, + { + "epoch": 0.74, + "learning_rate": 1.6904111719284994e-06, + "loss": 0.4903, + "step": 5890 + }, + { + "epoch": 0.74, + "learning_rate": 1.6888896250392257e-06, + "loss": 0.5601, + "step": 5891 + }, + { + "epoch": 0.74, + "learning_rate": 1.687368624068943e-06, + "loss": 0.5147, + "step": 5892 + }, + { + "epoch": 0.74, + "learning_rate": 1.685848169268428e-06, + "loss": 0.4552, + "step": 5893 + }, + { + "epoch": 0.74, + "learning_rate": 1.6843282608883653e-06, + "loss": 0.5459, + "step": 5894 + }, + { + "epoch": 0.74, + "learning_rate": 1.6828088991793501e-06, + "loss": 0.5387, + "step": 5895 + }, + { + "epoch": 0.74, + "learning_rate": 1.681290084391886e-06, + "loss": 0.2177, + "step": 5896 + }, + { + "epoch": 0.74, + "learning_rate": 1.6797718167763904e-06, + "loss": 0.5163, + "step": 5897 + }, + { + "epoch": 0.74, + "learning_rate": 1.6782540965831835e-06, + "loss": 0.5151, + "step": 5898 + }, + { + "epoch": 0.74, + "learning_rate": 1.6767369240625003e-06, + "loss": 0.507, + "step": 5899 + }, + { + "epoch": 0.74, + "learning_rate": 1.6752202994644857e-06, + "loss": 0.4896, + "step": 5900 + }, + { + "epoch": 0.74, + "learning_rate": 1.673704223039192e-06, + "loss": 0.4999, + "step": 5901 + }, + { + "epoch": 0.74, + "learning_rate": 1.672188695036583e-06, + "loss": 0.4984, + "step": 5902 + }, + { + "epoch": 0.74, + "learning_rate": 1.6706737157065305e-06, + "loss": 0.5365, + "step": 5903 + }, + { + "epoch": 0.74, + "learning_rate": 1.6691592852988193e-06, + "loss": 0.4334, + "step": 5904 + }, + { + "epoch": 0.74, + "learning_rate": 1.6676454040631362e-06, + "loss": 0.5333, + "step": 5905 + }, + { + "epoch": 0.74, + "learning_rate": 1.6661320722490848e-06, + "loss": 0.5119, + "step": 5906 + }, + { + "epoch": 0.74, + "learning_rate": 1.6646192901061747e-06, + "loss": 0.5532, + "step": 5907 + }, + { + "epoch": 0.74, + "learning_rate": 1.663107057883827e-06, + "loss": 0.4838, + "step": 5908 + }, + { + "epoch": 0.74, + "learning_rate": 1.6615953758313702e-06, + "loss": 0.4851, + "step": 5909 + }, + { + "epoch": 0.74, + "learning_rate": 1.6600842441980425e-06, + "loss": 0.5482, + "step": 5910 + }, + { + "epoch": 0.74, + "learning_rate": 1.6585736632329935e-06, + "loss": 0.4826, + "step": 5911 + }, + { + "epoch": 0.74, + "learning_rate": 1.6570636331852763e-06, + "loss": 0.4679, + "step": 5912 + }, + { + "epoch": 0.74, + "learning_rate": 1.6555541543038595e-06, + "loss": 0.5305, + "step": 5913 + }, + { + "epoch": 0.74, + "learning_rate": 1.6540452268376172e-06, + "loss": 0.4855, + "step": 5914 + }, + { + "epoch": 0.74, + "learning_rate": 1.6525368510353346e-06, + "loss": 0.5393, + "step": 5915 + }, + { + "epoch": 0.74, + "learning_rate": 1.6510290271457046e-06, + "loss": 0.5586, + "step": 5916 + }, + { + "epoch": 0.74, + "learning_rate": 1.6495217554173315e-06, + "loss": 0.5183, + "step": 5917 + }, + { + "epoch": 0.74, + "learning_rate": 1.648015036098723e-06, + "loss": 0.4963, + "step": 5918 + }, + { + "epoch": 0.74, + "learning_rate": 1.6465088694383003e-06, + "loss": 0.5399, + "step": 5919 + }, + { + "epoch": 0.74, + "learning_rate": 1.6450032556843937e-06, + "loss": 0.4339, + "step": 5920 + }, + { + "epoch": 0.74, + "learning_rate": 1.6434981950852402e-06, + "loss": 0.4615, + "step": 5921 + }, + { + "epoch": 0.74, + "learning_rate": 1.641993687888987e-06, + "loss": 0.5248, + "step": 5922 + }, + { + "epoch": 0.74, + "learning_rate": 1.640489734343691e-06, + "loss": 0.555, + "step": 5923 + }, + { + "epoch": 0.74, + "learning_rate": 1.6389863346973134e-06, + "loss": 0.5513, + "step": 5924 + }, + { + "epoch": 0.74, + "learning_rate": 1.6374834891977281e-06, + "loss": 0.4475, + "step": 5925 + }, + { + "epoch": 0.74, + "learning_rate": 1.6359811980927176e-06, + "loss": 0.5948, + "step": 5926 + }, + { + "epoch": 0.74, + "learning_rate": 1.634479461629971e-06, + "loss": 0.4403, + "step": 5927 + }, + { + "epoch": 0.74, + "learning_rate": 1.6329782800570886e-06, + "loss": 0.4975, + "step": 5928 + }, + { + "epoch": 0.74, + "learning_rate": 1.631477653621575e-06, + "loss": 0.588, + "step": 5929 + }, + { + "epoch": 0.74, + "learning_rate": 1.629977582570847e-06, + "loss": 0.5471, + "step": 5930 + }, + { + "epoch": 0.74, + "learning_rate": 1.6284780671522289e-06, + "loss": 0.447, + "step": 5931 + }, + { + "epoch": 0.74, + "learning_rate": 1.6269791076129532e-06, + "loss": 0.5124, + "step": 5932 + }, + { + "epoch": 0.74, + "learning_rate": 1.625480704200162e-06, + "loss": 0.5147, + "step": 5933 + }, + { + "epoch": 0.74, + "learning_rate": 1.6239828571609011e-06, + "loss": 0.5739, + "step": 5934 + }, + { + "epoch": 0.74, + "learning_rate": 1.6224855667421301e-06, + "loss": 0.5188, + "step": 5935 + }, + { + "epoch": 0.74, + "learning_rate": 1.6209888331907142e-06, + "loss": 0.5804, + "step": 5936 + }, + { + "epoch": 0.74, + "learning_rate": 1.6194926567534275e-06, + "loss": 0.5265, + "step": 5937 + }, + { + "epoch": 0.74, + "learning_rate": 1.617997037676951e-06, + "loss": 0.5839, + "step": 5938 + }, + { + "epoch": 0.74, + "learning_rate": 1.6165019762078776e-06, + "loss": 0.5027, + "step": 5939 + }, + { + "epoch": 0.74, + "learning_rate": 1.6150074725927011e-06, + "loss": 0.4965, + "step": 5940 + }, + { + "epoch": 0.74, + "learning_rate": 1.6135135270778302e-06, + "loss": 0.5882, + "step": 5941 + }, + { + "epoch": 0.74, + "learning_rate": 1.6120201399095786e-06, + "loss": 0.4904, + "step": 5942 + }, + { + "epoch": 0.74, + "learning_rate": 1.6105273113341684e-06, + "loss": 0.5572, + "step": 5943 + }, + { + "epoch": 0.75, + "learning_rate": 1.6090350415977307e-06, + "loss": 0.5385, + "step": 5944 + }, + { + "epoch": 0.75, + "learning_rate": 1.6075433309463013e-06, + "loss": 0.4499, + "step": 5945 + }, + { + "epoch": 0.75, + "learning_rate": 1.606052179625826e-06, + "loss": 0.467, + "step": 5946 + }, + { + "epoch": 0.75, + "learning_rate": 1.6045615878821591e-06, + "loss": 0.5013, + "step": 5947 + }, + { + "epoch": 0.75, + "learning_rate": 1.6030715559610616e-06, + "loss": 0.5301, + "step": 5948 + }, + { + "epoch": 0.75, + "learning_rate": 1.601582084108204e-06, + "loss": 0.4823, + "step": 5949 + }, + { + "epoch": 0.75, + "learning_rate": 1.6000931725691593e-06, + "loss": 0.4891, + "step": 5950 + }, + { + "epoch": 0.75, + "learning_rate": 1.598604821589414e-06, + "loss": 0.4847, + "step": 5951 + }, + { + "epoch": 0.75, + "learning_rate": 1.5971170314143592e-06, + "loss": 0.5324, + "step": 5952 + }, + { + "epoch": 0.75, + "learning_rate": 1.595629802289294e-06, + "loss": 0.5682, + "step": 5953 + }, + { + "epoch": 0.75, + "learning_rate": 1.5941431344594255e-06, + "loss": 0.5156, + "step": 5954 + }, + { + "epoch": 0.75, + "learning_rate": 1.5926570281698695e-06, + "loss": 0.5086, + "step": 5955 + }, + { + "epoch": 0.75, + "learning_rate": 1.5911714836656443e-06, + "loss": 0.5658, + "step": 5956 + }, + { + "epoch": 0.75, + "learning_rate": 1.5896865011916806e-06, + "loss": 0.5804, + "step": 5957 + }, + { + "epoch": 0.75, + "learning_rate": 1.5882020809928145e-06, + "loss": 0.5463, + "step": 5958 + }, + { + "epoch": 0.75, + "learning_rate": 1.58671822331379e-06, + "loss": 0.5004, + "step": 5959 + }, + { + "epoch": 0.75, + "learning_rate": 1.5852349283992597e-06, + "loss": 0.4794, + "step": 5960 + }, + { + "epoch": 0.75, + "learning_rate": 1.583752196493778e-06, + "loss": 0.566, + "step": 5961 + }, + { + "epoch": 0.75, + "learning_rate": 1.5822700278418124e-06, + "loss": 0.4722, + "step": 5962 + }, + { + "epoch": 0.75, + "learning_rate": 1.5807884226877345e-06, + "loss": 0.5342, + "step": 5963 + }, + { + "epoch": 0.75, + "learning_rate": 1.5793073812758243e-06, + "loss": 0.5306, + "step": 5964 + }, + { + "epoch": 0.75, + "learning_rate": 1.577826903850268e-06, + "loss": 0.5342, + "step": 5965 + }, + { + "epoch": 0.75, + "learning_rate": 1.5763469906551616e-06, + "loss": 0.5321, + "step": 5966 + }, + { + "epoch": 0.75, + "learning_rate": 1.5748676419345016e-06, + "loss": 0.4657, + "step": 5967 + }, + { + "epoch": 0.75, + "learning_rate": 1.573388857932197e-06, + "loss": 0.4468, + "step": 5968 + }, + { + "epoch": 0.75, + "learning_rate": 1.571910638892063e-06, + "loss": 0.2239, + "step": 5969 + }, + { + "epoch": 0.75, + "learning_rate": 1.5704329850578203e-06, + "loss": 0.5269, + "step": 5970 + }, + { + "epoch": 0.75, + "learning_rate": 1.5689558966730984e-06, + "loss": 0.5088, + "step": 5971 + }, + { + "epoch": 0.75, + "learning_rate": 1.5674793739814288e-06, + "loss": 0.4902, + "step": 5972 + }, + { + "epoch": 0.75, + "learning_rate": 1.5660034172262551e-06, + "loss": 0.4782, + "step": 5973 + }, + { + "epoch": 0.75, + "learning_rate": 1.5645280266509256e-06, + "loss": 0.5313, + "step": 5974 + }, + { + "epoch": 0.75, + "learning_rate": 1.5630532024986944e-06, + "loss": 0.46, + "step": 5975 + }, + { + "epoch": 0.75, + "learning_rate": 1.5615789450127256e-06, + "loss": 0.5195, + "step": 5976 + }, + { + "epoch": 0.75, + "learning_rate": 1.5601052544360834e-06, + "loss": 0.5343, + "step": 5977 + }, + { + "epoch": 0.75, + "learning_rate": 1.558632131011744e-06, + "loss": 0.4604, + "step": 5978 + }, + { + "epoch": 0.75, + "learning_rate": 1.5571595749825885e-06, + "loss": 0.4764, + "step": 5979 + }, + { + "epoch": 0.75, + "learning_rate": 1.5556875865914046e-06, + "loss": 0.4761, + "step": 5980 + }, + { + "epoch": 0.75, + "learning_rate": 1.5542161660808864e-06, + "loss": 0.4605, + "step": 5981 + }, + { + "epoch": 0.75, + "learning_rate": 1.5527453136936354e-06, + "loss": 0.5412, + "step": 5982 + }, + { + "epoch": 0.75, + "learning_rate": 1.5512750296721552e-06, + "loss": 0.5202, + "step": 5983 + }, + { + "epoch": 0.75, + "learning_rate": 1.5498053142588598e-06, + "loss": 0.4761, + "step": 5984 + }, + { + "epoch": 0.75, + "learning_rate": 1.5483361676960695e-06, + "loss": 0.5781, + "step": 5985 + }, + { + "epoch": 0.75, + "learning_rate": 1.5468675902260095e-06, + "loss": 0.5218, + "step": 5986 + }, + { + "epoch": 0.75, + "learning_rate": 1.545399582090812e-06, + "loss": 0.5069, + "step": 5987 + }, + { + "epoch": 0.75, + "learning_rate": 1.5439321435325122e-06, + "loss": 0.5421, + "step": 5988 + }, + { + "epoch": 0.75, + "learning_rate": 1.5424652747930552e-06, + "loss": 0.5596, + "step": 5989 + }, + { + "epoch": 0.75, + "learning_rate": 1.5409989761142913e-06, + "loss": 0.5611, + "step": 5990 + }, + { + "epoch": 0.75, + "learning_rate": 1.539533247737976e-06, + "loss": 0.5554, + "step": 5991 + }, + { + "epoch": 0.75, + "learning_rate": 1.5380680899057727e-06, + "loss": 0.5164, + "step": 5992 + }, + { + "epoch": 0.75, + "learning_rate": 1.5366035028592457e-06, + "loss": 0.5993, + "step": 5993 + }, + { + "epoch": 0.75, + "learning_rate": 1.535139486839871e-06, + "loss": 0.5475, + "step": 5994 + }, + { + "epoch": 0.75, + "learning_rate": 1.533676042089028e-06, + "loss": 0.5177, + "step": 5995 + }, + { + "epoch": 0.75, + "learning_rate": 1.5322131688480007e-06, + "loss": 0.2009, + "step": 5996 + }, + { + "epoch": 0.75, + "learning_rate": 1.530750867357982e-06, + "loss": 0.5582, + "step": 5997 + }, + { + "epoch": 0.75, + "learning_rate": 1.5292891378600688e-06, + "loss": 0.4641, + "step": 5998 + }, + { + "epoch": 0.75, + "learning_rate": 1.5278279805952618e-06, + "loss": 0.5123, + "step": 5999 + }, + { + "epoch": 0.75, + "learning_rate": 1.5263673958044694e-06, + "loss": 0.2241, + "step": 6000 + }, + { + "epoch": 0.75, + "learning_rate": 1.5249073837285061e-06, + "loss": 0.5297, + "step": 6001 + }, + { + "epoch": 0.75, + "learning_rate": 1.5234479446080913e-06, + "loss": 0.4612, + "step": 6002 + }, + { + "epoch": 0.75, + "learning_rate": 1.521989078683851e-06, + "loss": 0.4771, + "step": 6003 + }, + { + "epoch": 0.75, + "learning_rate": 1.5205307861963132e-06, + "loss": 0.5495, + "step": 6004 + }, + { + "epoch": 0.75, + "learning_rate": 1.5190730673859144e-06, + "loss": 0.4681, + "step": 6005 + }, + { + "epoch": 0.75, + "learning_rate": 1.5176159224929965e-06, + "loss": 0.5171, + "step": 6006 + }, + { + "epoch": 0.75, + "learning_rate": 1.5161593517578055e-06, + "loss": 0.572, + "step": 6007 + }, + { + "epoch": 0.75, + "learning_rate": 1.5147033554204943e-06, + "loss": 0.4878, + "step": 6008 + }, + { + "epoch": 0.75, + "learning_rate": 1.5132479337211193e-06, + "loss": 0.5249, + "step": 6009 + }, + { + "epoch": 0.75, + "learning_rate": 1.511793086899645e-06, + "loss": 0.5456, + "step": 6010 + }, + { + "epoch": 0.75, + "learning_rate": 1.5103388151959353e-06, + "loss": 0.5432, + "step": 6011 + }, + { + "epoch": 0.75, + "learning_rate": 1.508885118849766e-06, + "loss": 0.4474, + "step": 6012 + }, + { + "epoch": 0.75, + "learning_rate": 1.507431998100814e-06, + "loss": 0.2386, + "step": 6013 + }, + { + "epoch": 0.75, + "learning_rate": 1.5059794531886624e-06, + "loss": 0.4787, + "step": 6014 + }, + { + "epoch": 0.75, + "learning_rate": 1.5045274843528001e-06, + "loss": 0.4651, + "step": 6015 + }, + { + "epoch": 0.75, + "learning_rate": 1.5030760918326203e-06, + "loss": 0.4783, + "step": 6016 + }, + { + "epoch": 0.75, + "learning_rate": 1.5016252758674228e-06, + "loss": 0.5249, + "step": 6017 + }, + { + "epoch": 0.75, + "learning_rate": 1.5001750366964063e-06, + "loss": 0.4882, + "step": 6018 + }, + { + "epoch": 0.75, + "learning_rate": 1.4987253745586823e-06, + "loss": 0.5426, + "step": 6019 + }, + { + "epoch": 0.75, + "learning_rate": 1.4972762896932619e-06, + "loss": 0.5071, + "step": 6020 + }, + { + "epoch": 0.75, + "learning_rate": 1.4958277823390642e-06, + "loss": 0.5287, + "step": 6021 + }, + { + "epoch": 0.75, + "learning_rate": 1.4943798527349112e-06, + "loss": 0.5795, + "step": 6022 + }, + { + "epoch": 0.75, + "learning_rate": 1.49293250111953e-06, + "loss": 0.5, + "step": 6023 + }, + { + "epoch": 0.76, + "learning_rate": 1.4914857277315547e-06, + "loss": 0.4765, + "step": 6024 + }, + { + "epoch": 0.76, + "learning_rate": 1.490039532809518e-06, + "loss": 0.5546, + "step": 6025 + }, + { + "epoch": 0.76, + "learning_rate": 1.4885939165918634e-06, + "loss": 0.5141, + "step": 6026 + }, + { + "epoch": 0.76, + "learning_rate": 1.4871488793169359e-06, + "loss": 0.5869, + "step": 6027 + }, + { + "epoch": 0.76, + "learning_rate": 1.4857044212229866e-06, + "loss": 0.5332, + "step": 6028 + }, + { + "epoch": 0.76, + "learning_rate": 1.4842605425481704e-06, + "loss": 0.5136, + "step": 6029 + }, + { + "epoch": 0.76, + "learning_rate": 1.4828172435305478e-06, + "loss": 0.4566, + "step": 6030 + }, + { + "epoch": 0.76, + "learning_rate": 1.4813745244080795e-06, + "loss": 0.4966, + "step": 6031 + }, + { + "epoch": 0.76, + "learning_rate": 1.4799323854186353e-06, + "loss": 0.5563, + "step": 6032 + }, + { + "epoch": 0.76, + "learning_rate": 1.4784908267999882e-06, + "loss": 0.5745, + "step": 6033 + }, + { + "epoch": 0.76, + "learning_rate": 1.4770498487898144e-06, + "loss": 0.5003, + "step": 6034 + }, + { + "epoch": 0.76, + "learning_rate": 1.4756094516256969e-06, + "loss": 0.5064, + "step": 6035 + }, + { + "epoch": 0.76, + "learning_rate": 1.4741696355451179e-06, + "loss": 0.5061, + "step": 6036 + }, + { + "epoch": 0.76, + "learning_rate": 1.4727304007854681e-06, + "loss": 0.5277, + "step": 6037 + }, + { + "epoch": 0.76, + "learning_rate": 1.4712917475840415e-06, + "loss": 0.5176, + "step": 6038 + }, + { + "epoch": 0.76, + "learning_rate": 1.4698536761780364e-06, + "loss": 0.5314, + "step": 6039 + }, + { + "epoch": 0.76, + "learning_rate": 1.4684161868045539e-06, + "loss": 0.5173, + "step": 6040 + }, + { + "epoch": 0.76, + "learning_rate": 1.4669792797006017e-06, + "loss": 0.4427, + "step": 6041 + }, + { + "epoch": 0.76, + "learning_rate": 1.4655429551030865e-06, + "loss": 0.5369, + "step": 6042 + }, + { + "epoch": 0.76, + "learning_rate": 1.4641072132488239e-06, + "loss": 0.5494, + "step": 6043 + }, + { + "epoch": 0.76, + "learning_rate": 1.4626720543745315e-06, + "loss": 0.4466, + "step": 6044 + }, + { + "epoch": 0.76, + "learning_rate": 1.4612374787168316e-06, + "loss": 0.4928, + "step": 6045 + }, + { + "epoch": 0.76, + "learning_rate": 1.4598034865122507e-06, + "loss": 0.526, + "step": 6046 + }, + { + "epoch": 0.76, + "learning_rate": 1.4583700779972153e-06, + "loss": 0.553, + "step": 6047 + }, + { + "epoch": 0.76, + "learning_rate": 1.4569372534080595e-06, + "loss": 0.5115, + "step": 6048 + }, + { + "epoch": 0.76, + "learning_rate": 1.4555050129810205e-06, + "loss": 0.4592, + "step": 6049 + }, + { + "epoch": 0.76, + "learning_rate": 1.4540733569522392e-06, + "loss": 0.5337, + "step": 6050 + }, + { + "epoch": 0.76, + "learning_rate": 1.4526422855577604e-06, + "loss": 0.5468, + "step": 6051 + }, + { + "epoch": 0.76, + "learning_rate": 1.4512117990335301e-06, + "loss": 0.5717, + "step": 6052 + }, + { + "epoch": 0.76, + "learning_rate": 1.4497818976153992e-06, + "loss": 0.4767, + "step": 6053 + }, + { + "epoch": 0.76, + "learning_rate": 1.4483525815391242e-06, + "loss": 0.5949, + "step": 6054 + }, + { + "epoch": 0.76, + "learning_rate": 1.4469238510403633e-06, + "loss": 0.5393, + "step": 6055 + }, + { + "epoch": 0.76, + "learning_rate": 1.4454957063546771e-06, + "loss": 0.5097, + "step": 6056 + }, + { + "epoch": 0.76, + "learning_rate": 1.444068147717534e-06, + "loss": 0.4994, + "step": 6057 + }, + { + "epoch": 0.76, + "learning_rate": 1.4426411753642978e-06, + "loss": 0.4917, + "step": 6058 + }, + { + "epoch": 0.76, + "learning_rate": 1.441214789530243e-06, + "loss": 0.4738, + "step": 6059 + }, + { + "epoch": 0.76, + "learning_rate": 1.4397889904505451e-06, + "loss": 0.5379, + "step": 6060 + }, + { + "epoch": 0.76, + "learning_rate": 1.4383637783602823e-06, + "loss": 0.5714, + "step": 6061 + }, + { + "epoch": 0.76, + "learning_rate": 1.436939153494437e-06, + "loss": 0.1939, + "step": 6062 + }, + { + "epoch": 0.76, + "learning_rate": 1.4355151160878922e-06, + "loss": 0.5207, + "step": 6063 + }, + { + "epoch": 0.76, + "learning_rate": 1.4340916663754368e-06, + "loss": 0.4863, + "step": 6064 + }, + { + "epoch": 0.76, + "learning_rate": 1.432668804591762e-06, + "loss": 0.5388, + "step": 6065 + }, + { + "epoch": 0.76, + "learning_rate": 1.4312465309714618e-06, + "loss": 0.5747, + "step": 6066 + }, + { + "epoch": 0.76, + "learning_rate": 1.4298248457490356e-06, + "loss": 0.5404, + "step": 6067 + }, + { + "epoch": 0.76, + "learning_rate": 1.4284037491588798e-06, + "loss": 0.5091, + "step": 6068 + }, + { + "epoch": 0.76, + "learning_rate": 1.4269832414353001e-06, + "loss": 0.522, + "step": 6069 + }, + { + "epoch": 0.76, + "learning_rate": 1.4255633228125016e-06, + "loss": 0.4846, + "step": 6070 + }, + { + "epoch": 0.76, + "learning_rate": 1.4241439935245932e-06, + "loss": 0.5065, + "step": 6071 + }, + { + "epoch": 0.76, + "learning_rate": 1.4227252538055874e-06, + "loss": 0.4946, + "step": 6072 + }, + { + "epoch": 0.76, + "learning_rate": 1.4213071038894e-06, + "loss": 0.4826, + "step": 6073 + }, + { + "epoch": 0.76, + "learning_rate": 1.4198895440098454e-06, + "loss": 0.4636, + "step": 6074 + }, + { + "epoch": 0.76, + "learning_rate": 1.4184725744006443e-06, + "loss": 0.567, + "step": 6075 + }, + { + "epoch": 0.76, + "learning_rate": 1.4170561952954203e-06, + "loss": 0.5448, + "step": 6076 + }, + { + "epoch": 0.76, + "learning_rate": 1.4156404069276992e-06, + "loss": 0.452, + "step": 6077 + }, + { + "epoch": 0.76, + "learning_rate": 1.4142252095309095e-06, + "loss": 0.4943, + "step": 6078 + }, + { + "epoch": 0.76, + "learning_rate": 1.4128106033383787e-06, + "loss": 0.4565, + "step": 6079 + }, + { + "epoch": 0.76, + "learning_rate": 1.411396588583342e-06, + "loss": 0.5348, + "step": 6080 + }, + { + "epoch": 0.76, + "learning_rate": 1.4099831654989349e-06, + "loss": 0.5178, + "step": 6081 + }, + { + "epoch": 0.76, + "learning_rate": 1.4085703343181944e-06, + "loss": 0.5587, + "step": 6082 + }, + { + "epoch": 0.76, + "learning_rate": 1.4071580952740638e-06, + "loss": 0.5383, + "step": 6083 + }, + { + "epoch": 0.76, + "learning_rate": 1.4057464485993822e-06, + "loss": 0.5442, + "step": 6084 + }, + { + "epoch": 0.76, + "learning_rate": 1.4043353945268961e-06, + "loss": 0.5309, + "step": 6085 + }, + { + "epoch": 0.76, + "learning_rate": 1.4029249332892531e-06, + "loss": 0.5496, + "step": 6086 + }, + { + "epoch": 0.76, + "learning_rate": 1.4015150651190024e-06, + "loss": 0.4871, + "step": 6087 + }, + { + "epoch": 0.76, + "learning_rate": 1.4001057902485965e-06, + "loss": 0.4941, + "step": 6088 + }, + { + "epoch": 0.76, + "learning_rate": 1.3986971089103912e-06, + "loss": 0.4815, + "step": 6089 + }, + { + "epoch": 0.76, + "learning_rate": 1.3972890213366387e-06, + "loss": 0.5527, + "step": 6090 + }, + { + "epoch": 0.76, + "learning_rate": 1.3958815277594996e-06, + "loss": 0.5079, + "step": 6091 + }, + { + "epoch": 0.76, + "learning_rate": 1.3944746284110334e-06, + "loss": 0.4479, + "step": 6092 + }, + { + "epoch": 0.76, + "learning_rate": 1.3930683235232034e-06, + "loss": 0.2297, + "step": 6093 + }, + { + "epoch": 0.76, + "learning_rate": 1.391662613327875e-06, + "loss": 0.5179, + "step": 6094 + }, + { + "epoch": 0.76, + "learning_rate": 1.3902574980568118e-06, + "loss": 0.4428, + "step": 6095 + }, + { + "epoch": 0.76, + "learning_rate": 1.3888529779416837e-06, + "loss": 0.5893, + "step": 6096 + }, + { + "epoch": 0.76, + "learning_rate": 1.38744905321406e-06, + "loss": 0.4305, + "step": 6097 + }, + { + "epoch": 0.76, + "learning_rate": 1.3860457241054133e-06, + "loss": 0.4959, + "step": 6098 + }, + { + "epoch": 0.76, + "learning_rate": 1.3846429908471172e-06, + "loss": 0.4926, + "step": 6099 + }, + { + "epoch": 0.76, + "learning_rate": 1.3832408536704488e-06, + "loss": 0.5654, + "step": 6100 + }, + { + "epoch": 0.76, + "learning_rate": 1.3818393128065822e-06, + "loss": 0.5212, + "step": 6101 + }, + { + "epoch": 0.76, + "learning_rate": 1.380438368486598e-06, + "loss": 0.5094, + "step": 6102 + }, + { + "epoch": 0.77, + "learning_rate": 1.3790380209414766e-06, + "loss": 0.4915, + "step": 6103 + }, + { + "epoch": 0.77, + "learning_rate": 1.3776382704021002e-06, + "loss": 0.5304, + "step": 6104 + }, + { + "epoch": 0.77, + "learning_rate": 1.3762391170992534e-06, + "loss": 0.2187, + "step": 6105 + }, + { + "epoch": 0.77, + "learning_rate": 1.3748405612636195e-06, + "loss": 0.5475, + "step": 6106 + }, + { + "epoch": 0.77, + "learning_rate": 1.3734426031257864e-06, + "loss": 0.4908, + "step": 6107 + }, + { + "epoch": 0.77, + "learning_rate": 1.3720452429162423e-06, + "loss": 0.5585, + "step": 6108 + }, + { + "epoch": 0.77, + "learning_rate": 1.3706484808653769e-06, + "loss": 0.5393, + "step": 6109 + }, + { + "epoch": 0.77, + "learning_rate": 1.3692523172034822e-06, + "loss": 0.5718, + "step": 6110 + }, + { + "epoch": 0.77, + "learning_rate": 1.3678567521607477e-06, + "loss": 0.5483, + "step": 6111 + }, + { + "epoch": 0.77, + "learning_rate": 1.366461785967269e-06, + "loss": 0.5314, + "step": 6112 + }, + { + "epoch": 0.77, + "learning_rate": 1.365067418853041e-06, + "loss": 0.5571, + "step": 6113 + }, + { + "epoch": 0.77, + "learning_rate": 1.3636736510479592e-06, + "loss": 0.536, + "step": 6114 + }, + { + "epoch": 0.77, + "learning_rate": 1.3622804827818214e-06, + "loss": 0.5166, + "step": 6115 + }, + { + "epoch": 0.77, + "learning_rate": 1.360887914284328e-06, + "loss": 0.4426, + "step": 6116 + }, + { + "epoch": 0.77, + "learning_rate": 1.359495945785075e-06, + "loss": 0.4986, + "step": 6117 + }, + { + "epoch": 0.77, + "learning_rate": 1.3581045775135636e-06, + "loss": 0.5066, + "step": 6118 + }, + { + "epoch": 0.77, + "learning_rate": 1.3567138096991978e-06, + "loss": 0.5143, + "step": 6119 + }, + { + "epoch": 0.77, + "learning_rate": 1.3553236425712784e-06, + "loss": 0.5295, + "step": 6120 + }, + { + "epoch": 0.77, + "learning_rate": 1.3539340763590098e-06, + "loss": 0.5263, + "step": 6121 + }, + { + "epoch": 0.77, + "learning_rate": 1.3525451112914967e-06, + "loss": 0.4552, + "step": 6122 + }, + { + "epoch": 0.77, + "learning_rate": 1.351156747597746e-06, + "loss": 0.5227, + "step": 6123 + }, + { + "epoch": 0.77, + "learning_rate": 1.3497689855066608e-06, + "loss": 0.4846, + "step": 6124 + }, + { + "epoch": 0.77, + "learning_rate": 1.3483818252470498e-06, + "loss": 0.5346, + "step": 6125 + }, + { + "epoch": 0.77, + "learning_rate": 1.3469952670476211e-06, + "loss": 0.4504, + "step": 6126 + }, + { + "epoch": 0.77, + "learning_rate": 1.3456093111369833e-06, + "loss": 0.5001, + "step": 6127 + }, + { + "epoch": 0.77, + "learning_rate": 1.3442239577436455e-06, + "loss": 0.4899, + "step": 6128 + }, + { + "epoch": 0.77, + "learning_rate": 1.3428392070960193e-06, + "loss": 0.5048, + "step": 6129 + }, + { + "epoch": 0.77, + "learning_rate": 1.3414550594224125e-06, + "loss": 0.527, + "step": 6130 + }, + { + "epoch": 0.77, + "learning_rate": 1.3400715149510379e-06, + "loss": 0.5369, + "step": 6131 + }, + { + "epoch": 0.77, + "learning_rate": 1.3386885739100064e-06, + "loss": 0.4609, + "step": 6132 + }, + { + "epoch": 0.77, + "learning_rate": 1.3373062365273314e-06, + "loss": 0.4829, + "step": 6133 + }, + { + "epoch": 0.77, + "learning_rate": 1.3359245030309254e-06, + "loss": 0.5047, + "step": 6134 + }, + { + "epoch": 0.77, + "learning_rate": 1.3345433736486009e-06, + "loss": 0.5698, + "step": 6135 + }, + { + "epoch": 0.77, + "learning_rate": 1.3331628486080739e-06, + "loss": 0.5172, + "step": 6136 + }, + { + "epoch": 0.77, + "learning_rate": 1.3317829281369542e-06, + "loss": 0.4725, + "step": 6137 + }, + { + "epoch": 0.77, + "learning_rate": 1.3304036124627584e-06, + "loss": 0.4609, + "step": 6138 + }, + { + "epoch": 0.77, + "learning_rate": 1.3290249018129008e-06, + "loss": 0.4699, + "step": 6139 + }, + { + "epoch": 0.77, + "learning_rate": 1.3276467964146967e-06, + "loss": 0.5637, + "step": 6140 + }, + { + "epoch": 0.77, + "learning_rate": 1.32626929649536e-06, + "loss": 0.5205, + "step": 6141 + }, + { + "epoch": 0.77, + "learning_rate": 1.3248924022820082e-06, + "loss": 0.5301, + "step": 6142 + }, + { + "epoch": 0.77, + "learning_rate": 1.3235161140016533e-06, + "loss": 0.5392, + "step": 6143 + }, + { + "epoch": 0.77, + "learning_rate": 1.322140431881212e-06, + "loss": 0.5009, + "step": 6144 + }, + { + "epoch": 0.77, + "learning_rate": 1.3207653561475004e-06, + "loss": 0.5245, + "step": 6145 + }, + { + "epoch": 0.77, + "learning_rate": 1.3193908870272333e-06, + "loss": 0.5369, + "step": 6146 + }, + { + "epoch": 0.77, + "learning_rate": 1.3180170247470264e-06, + "loss": 0.5059, + "step": 6147 + }, + { + "epoch": 0.77, + "learning_rate": 1.3166437695333966e-06, + "loss": 0.5125, + "step": 6148 + }, + { + "epoch": 0.77, + "learning_rate": 1.3152711216127566e-06, + "loss": 0.475, + "step": 6149 + }, + { + "epoch": 0.77, + "learning_rate": 1.3138990812114228e-06, + "loss": 0.5193, + "step": 6150 + }, + { + "epoch": 0.77, + "learning_rate": 1.3125276485556098e-06, + "loss": 0.5067, + "step": 6151 + }, + { + "epoch": 0.77, + "learning_rate": 1.3111568238714328e-06, + "loss": 0.4414, + "step": 6152 + }, + { + "epoch": 0.77, + "learning_rate": 1.3097866073849074e-06, + "loss": 0.2342, + "step": 6153 + }, + { + "epoch": 0.77, + "learning_rate": 1.3084169993219464e-06, + "loss": 0.47, + "step": 6154 + }, + { + "epoch": 0.77, + "learning_rate": 1.3070479999083635e-06, + "loss": 0.4663, + "step": 6155 + }, + { + "epoch": 0.77, + "learning_rate": 1.3056796093698731e-06, + "loss": 0.4866, + "step": 6156 + }, + { + "epoch": 0.77, + "learning_rate": 1.3043118279320888e-06, + "loss": 0.2235, + "step": 6157 + }, + { + "epoch": 0.77, + "learning_rate": 1.302944655820525e-06, + "loss": 0.5052, + "step": 6158 + }, + { + "epoch": 0.77, + "learning_rate": 1.30157809326059e-06, + "loss": 0.5029, + "step": 6159 + }, + { + "epoch": 0.77, + "learning_rate": 1.300212140477598e-06, + "loss": 0.577, + "step": 6160 + }, + { + "epoch": 0.77, + "learning_rate": 1.29884679769676e-06, + "loss": 0.556, + "step": 6161 + }, + { + "epoch": 0.77, + "learning_rate": 1.2974820651431868e-06, + "loss": 0.5325, + "step": 6162 + }, + { + "epoch": 0.77, + "learning_rate": 1.296117943041889e-06, + "loss": 0.5164, + "step": 6163 + }, + { + "epoch": 0.77, + "learning_rate": 1.2947544316177768e-06, + "loss": 0.2059, + "step": 6164 + }, + { + "epoch": 0.77, + "learning_rate": 1.2933915310956568e-06, + "loss": 0.5124, + "step": 6165 + }, + { + "epoch": 0.77, + "learning_rate": 1.2920292417002383e-06, + "loss": 0.502, + "step": 6166 + }, + { + "epoch": 0.77, + "learning_rate": 1.2906675636561284e-06, + "loss": 0.4841, + "step": 6167 + }, + { + "epoch": 0.77, + "learning_rate": 1.2893064971878338e-06, + "loss": 0.5212, + "step": 6168 + }, + { + "epoch": 0.77, + "learning_rate": 1.287946042519762e-06, + "loss": 0.5186, + "step": 6169 + }, + { + "epoch": 0.77, + "learning_rate": 1.2865861998762142e-06, + "loss": 0.5272, + "step": 6170 + }, + { + "epoch": 0.77, + "learning_rate": 1.2852269694813962e-06, + "loss": 0.5266, + "step": 6171 + }, + { + "epoch": 0.77, + "learning_rate": 1.2838683515594108e-06, + "loss": 0.5527, + "step": 6172 + }, + { + "epoch": 0.77, + "learning_rate": 1.2825103463342602e-06, + "loss": 0.5189, + "step": 6173 + }, + { + "epoch": 0.77, + "learning_rate": 1.2811529540298456e-06, + "loss": 0.4629, + "step": 6174 + }, + { + "epoch": 0.77, + "learning_rate": 1.2797961748699677e-06, + "loss": 0.5305, + "step": 6175 + }, + { + "epoch": 0.77, + "learning_rate": 1.2784400090783227e-06, + "loss": 0.4844, + "step": 6176 + }, + { + "epoch": 0.77, + "learning_rate": 1.2770844568785101e-06, + "loss": 0.5964, + "step": 6177 + }, + { + "epoch": 0.77, + "learning_rate": 1.275729518494026e-06, + "loss": 0.4989, + "step": 6178 + }, + { + "epoch": 0.77, + "learning_rate": 1.2743751941482662e-06, + "loss": 0.5606, + "step": 6179 + }, + { + "epoch": 0.77, + "learning_rate": 1.2730214840645256e-06, + "loss": 0.5513, + "step": 6180 + }, + { + "epoch": 0.77, + "learning_rate": 1.2716683884659947e-06, + "loss": 0.5773, + "step": 6181 + }, + { + "epoch": 0.77, + "learning_rate": 1.270315907575766e-06, + "loss": 0.5917, + "step": 6182 + }, + { + "epoch": 0.78, + "learning_rate": 1.2689640416168303e-06, + "loss": 0.5305, + "step": 6183 + }, + { + "epoch": 0.78, + "learning_rate": 1.2676127908120756e-06, + "loss": 0.5077, + "step": 6184 + }, + { + "epoch": 0.78, + "learning_rate": 1.2662621553842907e-06, + "loss": 0.4729, + "step": 6185 + }, + { + "epoch": 0.78, + "learning_rate": 1.2649121355561595e-06, + "loss": 0.53, + "step": 6186 + }, + { + "epoch": 0.78, + "learning_rate": 1.2635627315502668e-06, + "loss": 0.5109, + "step": 6187 + }, + { + "epoch": 0.78, + "learning_rate": 1.2622139435890962e-06, + "loss": 0.4994, + "step": 6188 + }, + { + "epoch": 0.78, + "learning_rate": 1.2608657718950285e-06, + "loss": 0.5625, + "step": 6189 + }, + { + "epoch": 0.78, + "learning_rate": 1.2595182166903435e-06, + "loss": 0.4972, + "step": 6190 + }, + { + "epoch": 0.78, + "learning_rate": 1.2581712781972211e-06, + "loss": 0.5573, + "step": 6191 + }, + { + "epoch": 0.78, + "learning_rate": 1.256824956637734e-06, + "loss": 0.4629, + "step": 6192 + }, + { + "epoch": 0.78, + "learning_rate": 1.2554792522338582e-06, + "loss": 0.4882, + "step": 6193 + }, + { + "epoch": 0.78, + "learning_rate": 1.254134165207468e-06, + "loss": 0.4809, + "step": 6194 + }, + { + "epoch": 0.78, + "learning_rate": 1.2527896957803326e-06, + "loss": 0.4963, + "step": 6195 + }, + { + "epoch": 0.78, + "learning_rate": 1.2514458441741244e-06, + "loss": 0.4739, + "step": 6196 + }, + { + "epoch": 0.78, + "learning_rate": 1.250102610610407e-06, + "loss": 0.5456, + "step": 6197 + }, + { + "epoch": 0.78, + "learning_rate": 1.2487599953106473e-06, + "loss": 0.544, + "step": 6198 + }, + { + "epoch": 0.78, + "learning_rate": 1.2474179984962086e-06, + "loss": 0.4833, + "step": 6199 + }, + { + "epoch": 0.78, + "learning_rate": 1.2460766203883535e-06, + "loss": 0.4564, + "step": 6200 + }, + { + "epoch": 0.78, + "learning_rate": 1.2447358612082423e-06, + "loss": 0.4775, + "step": 6201 + }, + { + "epoch": 0.78, + "learning_rate": 1.2433957211769298e-06, + "loss": 0.1931, + "step": 6202 + }, + { + "epoch": 0.78, + "learning_rate": 1.242056200515373e-06, + "loss": 0.5533, + "step": 6203 + }, + { + "epoch": 0.78, + "learning_rate": 1.2407172994444255e-06, + "loss": 0.4484, + "step": 6204 + }, + { + "epoch": 0.78, + "learning_rate": 1.2393790181848375e-06, + "loss": 0.6197, + "step": 6205 + }, + { + "epoch": 0.78, + "learning_rate": 1.2380413569572592e-06, + "loss": 0.5739, + "step": 6206 + }, + { + "epoch": 0.78, + "learning_rate": 1.2367043159822379e-06, + "loss": 0.4617, + "step": 6207 + }, + { + "epoch": 0.78, + "learning_rate": 1.2353678954802151e-06, + "loss": 0.5968, + "step": 6208 + }, + { + "epoch": 0.78, + "learning_rate": 1.2340320956715352e-06, + "loss": 0.5938, + "step": 6209 + }, + { + "epoch": 0.78, + "learning_rate": 1.232696916776438e-06, + "loss": 0.5242, + "step": 6210 + }, + { + "epoch": 0.78, + "learning_rate": 1.2313623590150604e-06, + "loss": 0.5033, + "step": 6211 + }, + { + "epoch": 0.78, + "learning_rate": 1.2300284226074394e-06, + "loss": 0.5232, + "step": 6212 + }, + { + "epoch": 0.78, + "learning_rate": 1.2286951077735043e-06, + "loss": 0.5348, + "step": 6213 + }, + { + "epoch": 0.78, + "learning_rate": 1.2273624147330877e-06, + "loss": 0.5533, + "step": 6214 + }, + { + "epoch": 0.78, + "learning_rate": 1.2260303437059156e-06, + "loss": 0.4996, + "step": 6215 + }, + { + "epoch": 0.78, + "learning_rate": 1.2246988949116145e-06, + "loss": 0.5119, + "step": 6216 + }, + { + "epoch": 0.78, + "learning_rate": 1.223368068569708e-06, + "loss": 0.5631, + "step": 6217 + }, + { + "epoch": 0.78, + "learning_rate": 1.2220378648996123e-06, + "loss": 0.5038, + "step": 6218 + }, + { + "epoch": 0.78, + "learning_rate": 1.220708284120647e-06, + "loss": 0.4394, + "step": 6219 + }, + { + "epoch": 0.78, + "learning_rate": 1.2193793264520264e-06, + "loss": 0.4975, + "step": 6220 + }, + { + "epoch": 0.78, + "learning_rate": 1.218050992112862e-06, + "loss": 0.5062, + "step": 6221 + }, + { + "epoch": 0.78, + "learning_rate": 1.2167232813221624e-06, + "loss": 0.5833, + "step": 6222 + }, + { + "epoch": 0.78, + "learning_rate": 1.2153961942988362e-06, + "loss": 0.4994, + "step": 6223 + }, + { + "epoch": 0.78, + "learning_rate": 1.2140697312616834e-06, + "loss": 0.5623, + "step": 6224 + }, + { + "epoch": 0.78, + "learning_rate": 1.2127438924294055e-06, + "loss": 0.5039, + "step": 6225 + }, + { + "epoch": 0.78, + "learning_rate": 1.2114186780206006e-06, + "loss": 0.5052, + "step": 6226 + }, + { + "epoch": 0.78, + "learning_rate": 1.2100940882537632e-06, + "loss": 0.2134, + "step": 6227 + }, + { + "epoch": 0.78, + "learning_rate": 1.2087701233472866e-06, + "loss": 0.5294, + "step": 6228 + }, + { + "epoch": 0.78, + "learning_rate": 1.2074467835194553e-06, + "loss": 0.5691, + "step": 6229 + }, + { + "epoch": 0.78, + "learning_rate": 1.2061240689884578e-06, + "loss": 0.4986, + "step": 6230 + }, + { + "epoch": 0.78, + "learning_rate": 1.2048019799723754e-06, + "loss": 0.4817, + "step": 6231 + }, + { + "epoch": 0.78, + "learning_rate": 1.2034805166891884e-06, + "loss": 0.5034, + "step": 6232 + }, + { + "epoch": 0.78, + "learning_rate": 1.2021596793567718e-06, + "loss": 0.5077, + "step": 6233 + }, + { + "epoch": 0.78, + "learning_rate": 1.2008394681928993e-06, + "loss": 0.4843, + "step": 6234 + }, + { + "epoch": 0.78, + "learning_rate": 1.1995198834152415e-06, + "loss": 0.5766, + "step": 6235 + }, + { + "epoch": 0.78, + "learning_rate": 1.198200925241363e-06, + "loss": 0.5201, + "step": 6236 + }, + { + "epoch": 0.78, + "learning_rate": 1.1968825938887269e-06, + "loss": 0.4608, + "step": 6237 + }, + { + "epoch": 0.78, + "learning_rate": 1.1955648895746941e-06, + "loss": 0.5479, + "step": 6238 + }, + { + "epoch": 0.78, + "learning_rate": 1.1942478125165202e-06, + "loss": 0.4404, + "step": 6239 + }, + { + "epoch": 0.78, + "learning_rate": 1.1929313629313584e-06, + "loss": 0.5763, + "step": 6240 + }, + { + "epoch": 0.78, + "learning_rate": 1.1916155410362584e-06, + "loss": 0.5204, + "step": 6241 + }, + { + "epoch": 0.78, + "learning_rate": 1.1903003470481671e-06, + "loss": 0.5399, + "step": 6242 + }, + { + "epoch": 0.78, + "learning_rate": 1.1889857811839251e-06, + "loss": 0.4955, + "step": 6243 + }, + { + "epoch": 0.78, + "learning_rate": 1.1876718436602718e-06, + "loss": 0.4737, + "step": 6244 + }, + { + "epoch": 0.78, + "learning_rate": 1.1863585346938427e-06, + "loss": 0.4741, + "step": 6245 + }, + { + "epoch": 0.78, + "learning_rate": 1.1850458545011701e-06, + "loss": 0.5119, + "step": 6246 + }, + { + "epoch": 0.78, + "learning_rate": 1.183733803298681e-06, + "loss": 0.4591, + "step": 6247 + }, + { + "epoch": 0.78, + "learning_rate": 1.1824223813027009e-06, + "loss": 0.5136, + "step": 6248 + }, + { + "epoch": 0.78, + "learning_rate": 1.1811115887294511e-06, + "loss": 0.5516, + "step": 6249 + }, + { + "epoch": 0.78, + "learning_rate": 1.1798014257950451e-06, + "loss": 0.5118, + "step": 6250 + }, + { + "epoch": 0.78, + "learning_rate": 1.178491892715498e-06, + "loss": 0.4692, + "step": 6251 + }, + { + "epoch": 0.78, + "learning_rate": 1.177182989706719e-06, + "loss": 0.5258, + "step": 6252 + }, + { + "epoch": 0.78, + "learning_rate": 1.1758747169845126e-06, + "loss": 0.4722, + "step": 6253 + }, + { + "epoch": 0.78, + "learning_rate": 1.174567074764581e-06, + "loss": 0.5082, + "step": 6254 + }, + { + "epoch": 0.78, + "learning_rate": 1.1732600632625223e-06, + "loss": 0.5174, + "step": 6255 + }, + { + "epoch": 0.78, + "learning_rate": 1.1719536826938283e-06, + "loss": 0.4875, + "step": 6256 + }, + { + "epoch": 0.78, + "learning_rate": 1.1706479332738879e-06, + "loss": 0.5355, + "step": 6257 + }, + { + "epoch": 0.78, + "learning_rate": 1.1693428152179875e-06, + "loss": 0.5583, + "step": 6258 + }, + { + "epoch": 0.78, + "learning_rate": 1.1680383287413088e-06, + "loss": 0.4609, + "step": 6259 + }, + { + "epoch": 0.78, + "learning_rate": 1.1667344740589292e-06, + "loss": 0.4589, + "step": 6260 + }, + { + "epoch": 0.78, + "learning_rate": 1.1654312513858195e-06, + "loss": 0.5586, + "step": 6261 + }, + { + "epoch": 0.78, + "learning_rate": 1.1641286609368491e-06, + "loss": 0.51, + "step": 6262 + }, + { + "epoch": 0.79, + "learning_rate": 1.1628267029267836e-06, + "loss": 0.4972, + "step": 6263 + }, + { + "epoch": 0.79, + "learning_rate": 1.1615253775702824e-06, + "loss": 0.4633, + "step": 6264 + }, + { + "epoch": 0.79, + "learning_rate": 1.160224685081901e-06, + "loss": 0.5763, + "step": 6265 + }, + { + "epoch": 0.79, + "learning_rate": 1.1589246256760933e-06, + "loss": 0.4842, + "step": 6266 + }, + { + "epoch": 0.79, + "learning_rate": 1.1576251995672033e-06, + "loss": 0.5142, + "step": 6267 + }, + { + "epoch": 0.79, + "learning_rate": 1.1563264069694753e-06, + "loss": 0.5062, + "step": 6268 + }, + { + "epoch": 0.79, + "learning_rate": 1.155028248097047e-06, + "loss": 0.542, + "step": 6269 + }, + { + "epoch": 0.79, + "learning_rate": 1.1537307231639528e-06, + "loss": 0.5474, + "step": 6270 + }, + { + "epoch": 0.79, + "learning_rate": 1.1524338323841234e-06, + "loss": 0.5645, + "step": 6271 + }, + { + "epoch": 0.79, + "learning_rate": 1.15113757597138e-06, + "loss": 0.5595, + "step": 6272 + }, + { + "epoch": 0.79, + "learning_rate": 1.149841954139445e-06, + "loss": 0.5081, + "step": 6273 + }, + { + "epoch": 0.79, + "learning_rate": 1.1485469671019333e-06, + "loss": 0.5478, + "step": 6274 + }, + { + "epoch": 0.79, + "learning_rate": 1.1472526150723567e-06, + "loss": 0.5228, + "step": 6275 + }, + { + "epoch": 0.79, + "learning_rate": 1.1459588982641217e-06, + "loss": 0.4705, + "step": 6276 + }, + { + "epoch": 0.79, + "learning_rate": 1.1446658168905278e-06, + "loss": 0.5945, + "step": 6277 + }, + { + "epoch": 0.79, + "learning_rate": 1.143373371164772e-06, + "loss": 0.5117, + "step": 6278 + }, + { + "epoch": 0.79, + "learning_rate": 1.1420815612999475e-06, + "loss": 0.5252, + "step": 6279 + }, + { + "epoch": 0.79, + "learning_rate": 1.1407903875090398e-06, + "loss": 0.5258, + "step": 6280 + }, + { + "epoch": 0.79, + "learning_rate": 1.1394998500049326e-06, + "loss": 0.5868, + "step": 6281 + }, + { + "epoch": 0.79, + "learning_rate": 1.1382099490004033e-06, + "loss": 0.2114, + "step": 6282 + }, + { + "epoch": 0.79, + "learning_rate": 1.136920684708122e-06, + "loss": 0.2375, + "step": 6283 + }, + { + "epoch": 0.79, + "learning_rate": 1.1356320573406577e-06, + "loss": 0.5481, + "step": 6284 + }, + { + "epoch": 0.79, + "learning_rate": 1.1343440671104717e-06, + "loss": 0.5125, + "step": 6285 + }, + { + "epoch": 0.79, + "learning_rate": 1.1330567142299221e-06, + "loss": 0.4905, + "step": 6286 + }, + { + "epoch": 0.79, + "learning_rate": 1.1317699989112623e-06, + "loss": 0.5492, + "step": 6287 + }, + { + "epoch": 0.79, + "learning_rate": 1.130483921366637e-06, + "loss": 0.5303, + "step": 6288 + }, + { + "epoch": 0.79, + "learning_rate": 1.1291984818080887e-06, + "loss": 0.4976, + "step": 6289 + }, + { + "epoch": 0.79, + "learning_rate": 1.1279136804475543e-06, + "loss": 0.5181, + "step": 6290 + }, + { + "epoch": 0.79, + "learning_rate": 1.1266295174968655e-06, + "loss": 0.4871, + "step": 6291 + }, + { + "epoch": 0.79, + "learning_rate": 1.1253459931677501e-06, + "loss": 0.4796, + "step": 6292 + }, + { + "epoch": 0.79, + "learning_rate": 1.1240631076718255e-06, + "loss": 0.5473, + "step": 6293 + }, + { + "epoch": 0.79, + "learning_rate": 1.1227808612206097e-06, + "loss": 0.5197, + "step": 6294 + }, + { + "epoch": 0.79, + "learning_rate": 1.1214992540255127e-06, + "loss": 0.5666, + "step": 6295 + }, + { + "epoch": 0.79, + "learning_rate": 1.120218286297839e-06, + "loss": 0.4967, + "step": 6296 + }, + { + "epoch": 0.79, + "learning_rate": 1.118937958248788e-06, + "loss": 0.5596, + "step": 6297 + }, + { + "epoch": 0.79, + "learning_rate": 1.117658270089455e-06, + "loss": 0.5569, + "step": 6298 + }, + { + "epoch": 0.79, + "learning_rate": 1.1163792220308267e-06, + "loss": 0.5499, + "step": 6299 + }, + { + "epoch": 0.79, + "learning_rate": 1.115100814283786e-06, + "loss": 0.5559, + "step": 6300 + }, + { + "epoch": 0.79, + "learning_rate": 1.1138230470591111e-06, + "loss": 0.5276, + "step": 6301 + }, + { + "epoch": 0.79, + "learning_rate": 1.1125459205674738e-06, + "loss": 0.5651, + "step": 6302 + }, + { + "epoch": 0.79, + "learning_rate": 1.111269435019441e-06, + "loss": 0.4893, + "step": 6303 + }, + { + "epoch": 0.79, + "learning_rate": 1.1099935906254705e-06, + "loss": 0.548, + "step": 6304 + }, + { + "epoch": 0.79, + "learning_rate": 1.1087183875959183e-06, + "loss": 0.4567, + "step": 6305 + }, + { + "epoch": 0.79, + "learning_rate": 1.1074438261410347e-06, + "loss": 0.5172, + "step": 6306 + }, + { + "epoch": 0.79, + "learning_rate": 1.1061699064709607e-06, + "loss": 0.5296, + "step": 6307 + }, + { + "epoch": 0.79, + "learning_rate": 1.104896628795737e-06, + "loss": 0.5712, + "step": 6308 + }, + { + "epoch": 0.79, + "learning_rate": 1.1036239933252912e-06, + "loss": 0.5085, + "step": 6309 + }, + { + "epoch": 0.79, + "learning_rate": 1.1023520002694509e-06, + "loss": 0.2056, + "step": 6310 + }, + { + "epoch": 0.79, + "learning_rate": 1.1010806498379355e-06, + "loss": 0.4845, + "step": 6311 + }, + { + "epoch": 0.79, + "learning_rate": 1.099809942240359e-06, + "loss": 0.5077, + "step": 6312 + }, + { + "epoch": 0.79, + "learning_rate": 1.098539877686229e-06, + "loss": 0.4326, + "step": 6313 + }, + { + "epoch": 0.79, + "learning_rate": 1.0972704563849484e-06, + "loss": 0.4732, + "step": 6314 + }, + { + "epoch": 0.79, + "learning_rate": 1.096001678545811e-06, + "loss": 0.5669, + "step": 6315 + }, + { + "epoch": 0.79, + "learning_rate": 1.094733544378007e-06, + "loss": 0.4923, + "step": 6316 + }, + { + "epoch": 0.79, + "learning_rate": 1.0934660540906206e-06, + "loss": 0.5286, + "step": 6317 + }, + { + "epoch": 0.79, + "learning_rate": 1.092199207892628e-06, + "loss": 0.5327, + "step": 6318 + }, + { + "epoch": 0.79, + "learning_rate": 1.090933005992903e-06, + "loss": 0.5264, + "step": 6319 + }, + { + "epoch": 0.79, + "learning_rate": 1.0896674486002067e-06, + "loss": 0.4772, + "step": 6320 + }, + { + "epoch": 0.79, + "learning_rate": 1.0884025359232003e-06, + "loss": 0.4778, + "step": 6321 + }, + { + "epoch": 0.79, + "learning_rate": 1.0871382681704351e-06, + "loss": 0.5155, + "step": 6322 + }, + { + "epoch": 0.79, + "learning_rate": 1.0858746455503572e-06, + "loss": 0.5448, + "step": 6323 + }, + { + "epoch": 0.79, + "learning_rate": 1.0846116682713071e-06, + "loss": 0.5332, + "step": 6324 + }, + { + "epoch": 0.79, + "learning_rate": 1.0833493365415182e-06, + "loss": 0.2078, + "step": 6325 + }, + { + "epoch": 0.79, + "learning_rate": 1.0820876505691158e-06, + "loss": 0.5305, + "step": 6326 + }, + { + "epoch": 0.79, + "learning_rate": 1.0808266105621208e-06, + "loss": 0.5115, + "step": 6327 + }, + { + "epoch": 0.79, + "learning_rate": 1.0795662167284471e-06, + "loss": 0.4634, + "step": 6328 + }, + { + "epoch": 0.79, + "learning_rate": 1.0783064692759027e-06, + "loss": 0.4932, + "step": 6329 + }, + { + "epoch": 0.79, + "learning_rate": 1.0770473684121886e-06, + "loss": 0.5218, + "step": 6330 + }, + { + "epoch": 0.79, + "learning_rate": 1.0757889143448974e-06, + "loss": 0.4268, + "step": 6331 + }, + { + "epoch": 0.79, + "learning_rate": 1.0745311072815168e-06, + "loss": 0.5578, + "step": 6332 + }, + { + "epoch": 0.79, + "learning_rate": 1.0732739474294286e-06, + "loss": 0.5393, + "step": 6333 + }, + { + "epoch": 0.79, + "learning_rate": 1.0720174349959062e-06, + "loss": 0.4945, + "step": 6334 + }, + { + "epoch": 0.79, + "learning_rate": 1.0707615701881192e-06, + "loss": 0.5174, + "step": 6335 + }, + { + "epoch": 0.79, + "learning_rate": 1.0695063532131238e-06, + "loss": 0.5061, + "step": 6336 + }, + { + "epoch": 0.79, + "learning_rate": 1.0682517842778772e-06, + "loss": 0.5744, + "step": 6337 + }, + { + "epoch": 0.79, + "learning_rate": 1.0669978635892248e-06, + "loss": 0.5275, + "step": 6338 + }, + { + "epoch": 0.79, + "learning_rate": 1.0657445913539072e-06, + "loss": 0.5587, + "step": 6339 + }, + { + "epoch": 0.79, + "learning_rate": 1.0644919677785577e-06, + "loss": 0.561, + "step": 6340 + }, + { + "epoch": 0.79, + "learning_rate": 1.0632399930697036e-06, + "loss": 0.5433, + "step": 6341 + }, + { + "epoch": 0.79, + "learning_rate": 1.0619886674337616e-06, + "loss": 0.4619, + "step": 6342 + }, + { + "epoch": 0.8, + "learning_rate": 1.060737991077045e-06, + "loss": 0.533, + "step": 6343 + }, + { + "epoch": 0.8, + "learning_rate": 1.0594879642057586e-06, + "loss": 0.4581, + "step": 6344 + }, + { + "epoch": 0.8, + "learning_rate": 1.0582385870260014e-06, + "loss": 0.5467, + "step": 6345 + }, + { + "epoch": 0.8, + "learning_rate": 1.056989859743764e-06, + "loss": 0.4793, + "step": 6346 + }, + { + "epoch": 0.8, + "learning_rate": 1.0557417825649297e-06, + "loss": 0.5187, + "step": 6347 + }, + { + "epoch": 0.8, + "learning_rate": 1.0544943556952764e-06, + "loss": 0.5561, + "step": 6348 + }, + { + "epoch": 0.8, + "learning_rate": 1.0532475793404717e-06, + "loss": 0.5848, + "step": 6349 + }, + { + "epoch": 0.8, + "learning_rate": 1.052001453706078e-06, + "loss": 0.5649, + "step": 6350 + }, + { + "epoch": 0.8, + "learning_rate": 1.0507559789975513e-06, + "loss": 0.5212, + "step": 6351 + }, + { + "epoch": 0.8, + "learning_rate": 1.049511155420238e-06, + "loss": 0.5369, + "step": 6352 + }, + { + "epoch": 0.8, + "learning_rate": 1.048266983179379e-06, + "loss": 0.5283, + "step": 6353 + }, + { + "epoch": 0.8, + "learning_rate": 1.0470234624801068e-06, + "loss": 0.5433, + "step": 6354 + }, + { + "epoch": 0.8, + "learning_rate": 1.0457805935274479e-06, + "loss": 0.553, + "step": 6355 + }, + { + "epoch": 0.8, + "learning_rate": 1.0445383765263179e-06, + "loss": 0.4882, + "step": 6356 + }, + { + "epoch": 0.8, + "learning_rate": 1.0432968116815284e-06, + "loss": 0.4633, + "step": 6357 + }, + { + "epoch": 0.8, + "learning_rate": 1.042055899197782e-06, + "loss": 0.5256, + "step": 6358 + }, + { + "epoch": 0.8, + "learning_rate": 1.0408156392796741e-06, + "loss": 0.5537, + "step": 6359 + }, + { + "epoch": 0.8, + "learning_rate": 1.0395760321316933e-06, + "loss": 0.5333, + "step": 6360 + }, + { + "epoch": 0.8, + "learning_rate": 1.0383370779582185e-06, + "loss": 0.5053, + "step": 6361 + }, + { + "epoch": 0.8, + "learning_rate": 1.0370987769635237e-06, + "loss": 0.4933, + "step": 6362 + }, + { + "epoch": 0.8, + "learning_rate": 1.0358611293517718e-06, + "loss": 0.5364, + "step": 6363 + }, + { + "epoch": 0.8, + "learning_rate": 1.0346241353270208e-06, + "loss": 0.5185, + "step": 6364 + }, + { + "epoch": 0.8, + "learning_rate": 1.0333877950932192e-06, + "loss": 0.5562, + "step": 6365 + }, + { + "epoch": 0.8, + "learning_rate": 1.0321521088542096e-06, + "loss": 0.4565, + "step": 6366 + }, + { + "epoch": 0.8, + "learning_rate": 1.0309170768137262e-06, + "loss": 0.2303, + "step": 6367 + }, + { + "epoch": 0.8, + "learning_rate": 1.0296826991753922e-06, + "loss": 0.5189, + "step": 6368 + }, + { + "epoch": 0.8, + "learning_rate": 1.0284489761427273e-06, + "loss": 0.4984, + "step": 6369 + }, + { + "epoch": 0.8, + "learning_rate": 1.0272159079191407e-06, + "loss": 0.4976, + "step": 6370 + }, + { + "epoch": 0.8, + "learning_rate": 1.0259834947079356e-06, + "loss": 0.4749, + "step": 6371 + }, + { + "epoch": 0.8, + "learning_rate": 1.0247517367123045e-06, + "loss": 0.5767, + "step": 6372 + }, + { + "epoch": 0.8, + "learning_rate": 1.0235206341353359e-06, + "loss": 0.4959, + "step": 6373 + }, + { + "epoch": 0.8, + "learning_rate": 1.0222901871800045e-06, + "loss": 0.5408, + "step": 6374 + }, + { + "epoch": 0.8, + "learning_rate": 1.0210603960491817e-06, + "loss": 0.571, + "step": 6375 + }, + { + "epoch": 0.8, + "learning_rate": 1.0198312609456285e-06, + "loss": 0.5517, + "step": 6376 + }, + { + "epoch": 0.8, + "learning_rate": 1.0186027820719995e-06, + "loss": 0.5881, + "step": 6377 + }, + { + "epoch": 0.8, + "learning_rate": 1.0173749596308408e-06, + "loss": 0.2288, + "step": 6378 + }, + { + "epoch": 0.8, + "learning_rate": 1.0161477938245867e-06, + "loss": 0.4813, + "step": 6379 + }, + { + "epoch": 0.8, + "learning_rate": 1.0149212848555678e-06, + "loss": 0.5103, + "step": 6380 + }, + { + "epoch": 0.8, + "learning_rate": 1.013695432926004e-06, + "loss": 0.5175, + "step": 6381 + }, + { + "epoch": 0.8, + "learning_rate": 1.012470238238008e-06, + "loss": 0.5231, + "step": 6382 + }, + { + "epoch": 0.8, + "learning_rate": 1.0112457009935854e-06, + "loss": 0.4806, + "step": 6383 + }, + { + "epoch": 0.8, + "learning_rate": 1.0100218213946277e-06, + "loss": 0.5236, + "step": 6384 + }, + { + "epoch": 0.8, + "learning_rate": 1.008798599642925e-06, + "loss": 0.5316, + "step": 6385 + }, + { + "epoch": 0.8, + "learning_rate": 1.0075760359401544e-06, + "loss": 0.5053, + "step": 6386 + }, + { + "epoch": 0.8, + "learning_rate": 1.0063541304878866e-06, + "loss": 0.5251, + "step": 6387 + }, + { + "epoch": 0.8, + "learning_rate": 1.005132883487583e-06, + "loss": 0.5114, + "step": 6388 + }, + { + "epoch": 0.8, + "learning_rate": 1.0039122951405978e-06, + "loss": 0.5147, + "step": 6389 + }, + { + "epoch": 0.8, + "learning_rate": 1.0026923656481724e-06, + "loss": 0.2083, + "step": 6390 + }, + { + "epoch": 0.8, + "learning_rate": 1.0014730952114448e-06, + "loss": 0.5209, + "step": 6391 + }, + { + "epoch": 0.8, + "learning_rate": 1.0002544840314415e-06, + "loss": 0.551, + "step": 6392 + }, + { + "epoch": 0.8, + "learning_rate": 9.990365323090817e-07, + "loss": 0.5516, + "step": 6393 + }, + { + "epoch": 0.8, + "learning_rate": 9.978192402451747e-07, + "loss": 0.5355, + "step": 6394 + }, + { + "epoch": 0.8, + "learning_rate": 9.966026080404207e-07, + "loss": 0.4903, + "step": 6395 + }, + { + "epoch": 0.8, + "learning_rate": 9.953866358954118e-07, + "loss": 0.4905, + "step": 6396 + }, + { + "epoch": 0.8, + "learning_rate": 9.941713240106316e-07, + "loss": 0.4387, + "step": 6397 + }, + { + "epoch": 0.8, + "learning_rate": 9.929566725864547e-07, + "loss": 0.5289, + "step": 6398 + }, + { + "epoch": 0.8, + "learning_rate": 9.917426818231463e-07, + "loss": 0.5452, + "step": 6399 + }, + { + "epoch": 0.8, + "learning_rate": 9.905293519208647e-07, + "loss": 0.5008, + "step": 6400 + }, + { + "epoch": 0.8, + "learning_rate": 9.893166830796552e-07, + "loss": 0.4898, + "step": 6401 + }, + { + "epoch": 0.8, + "learning_rate": 9.881046754994572e-07, + "loss": 0.5087, + "step": 6402 + }, + { + "epoch": 0.8, + "learning_rate": 9.868933293801004e-07, + "loss": 0.5025, + "step": 6403 + }, + { + "epoch": 0.8, + "learning_rate": 9.856826449213053e-07, + "loss": 0.579, + "step": 6404 + }, + { + "epoch": 0.8, + "learning_rate": 9.844726223226847e-07, + "loss": 0.4696, + "step": 6405 + }, + { + "epoch": 0.8, + "learning_rate": 9.83263261783739e-07, + "loss": 0.5213, + "step": 6406 + }, + { + "epoch": 0.8, + "learning_rate": 9.82054563503862e-07, + "loss": 0.4868, + "step": 6407 + }, + { + "epoch": 0.8, + "learning_rate": 9.808465276823377e-07, + "loss": 0.5487, + "step": 6408 + }, + { + "epoch": 0.8, + "learning_rate": 9.796391545183414e-07, + "loss": 0.3981, + "step": 6409 + }, + { + "epoch": 0.8, + "learning_rate": 9.784324442109393e-07, + "loss": 0.4981, + "step": 6410 + }, + { + "epoch": 0.8, + "learning_rate": 9.772263969590856e-07, + "loss": 0.5434, + "step": 6411 + }, + { + "epoch": 0.8, + "learning_rate": 9.760210129616283e-07, + "loss": 0.5256, + "step": 6412 + }, + { + "epoch": 0.8, + "learning_rate": 9.748162924173043e-07, + "loss": 0.5399, + "step": 6413 + }, + { + "epoch": 0.8, + "learning_rate": 9.736122355247436e-07, + "loss": 0.4967, + "step": 6414 + }, + { + "epoch": 0.8, + "learning_rate": 9.724088424824629e-07, + "loss": 0.5767, + "step": 6415 + }, + { + "epoch": 0.8, + "learning_rate": 9.71206113488874e-07, + "loss": 0.5066, + "step": 6416 + }, + { + "epoch": 0.8, + "learning_rate": 9.700040487422735e-07, + "loss": 0.5052, + "step": 6417 + }, + { + "epoch": 0.8, + "learning_rate": 9.688026484408536e-07, + "loss": 0.5419, + "step": 6418 + }, + { + "epoch": 0.8, + "learning_rate": 9.676019127826947e-07, + "loss": 0.5358, + "step": 6419 + }, + { + "epoch": 0.8, + "learning_rate": 9.664018419657683e-07, + "loss": 0.4783, + "step": 6420 + }, + { + "epoch": 0.8, + "learning_rate": 9.65202436187937e-07, + "loss": 0.519, + "step": 6421 + }, + { + "epoch": 0.8, + "learning_rate": 9.640036956469495e-07, + "loss": 0.5271, + "step": 6422 + }, + { + "epoch": 0.81, + "learning_rate": 9.628056205404506e-07, + "loss": 0.5365, + "step": 6423 + }, + { + "epoch": 0.81, + "learning_rate": 9.616082110659719e-07, + "loss": 0.5664, + "step": 6424 + }, + { + "epoch": 0.81, + "learning_rate": 9.604114674209363e-07, + "loss": 0.5808, + "step": 6425 + }, + { + "epoch": 0.81, + "learning_rate": 9.592153898026585e-07, + "loss": 0.4944, + "step": 6426 + }, + { + "epoch": 0.81, + "learning_rate": 9.580199784083387e-07, + "loss": 0.4656, + "step": 6427 + }, + { + "epoch": 0.81, + "learning_rate": 9.568252334350713e-07, + "loss": 0.4782, + "step": 6428 + }, + { + "epoch": 0.81, + "learning_rate": 9.556311550798403e-07, + "loss": 0.4982, + "step": 6429 + }, + { + "epoch": 0.81, + "learning_rate": 9.54437743539519e-07, + "loss": 0.5038, + "step": 6430 + }, + { + "epoch": 0.81, + "learning_rate": 9.53244999010871e-07, + "loss": 0.5391, + "step": 6431 + }, + { + "epoch": 0.81, + "learning_rate": 9.520529216905516e-07, + "loss": 0.4866, + "step": 6432 + }, + { + "epoch": 0.81, + "learning_rate": 9.508615117751013e-07, + "loss": 0.4412, + "step": 6433 + }, + { + "epoch": 0.81, + "learning_rate": 9.496707694609553e-07, + "loss": 0.5431, + "step": 6434 + }, + { + "epoch": 0.81, + "learning_rate": 9.484806949444369e-07, + "loss": 0.5396, + "step": 6435 + }, + { + "epoch": 0.81, + "learning_rate": 9.472912884217595e-07, + "loss": 0.2128, + "step": 6436 + }, + { + "epoch": 0.81, + "learning_rate": 9.461025500890281e-07, + "loss": 0.5592, + "step": 6437 + }, + { + "epoch": 0.81, + "learning_rate": 9.449144801422328e-07, + "loss": 0.5123, + "step": 6438 + }, + { + "epoch": 0.81, + "learning_rate": 9.437270787772579e-07, + "loss": 0.5509, + "step": 6439 + }, + { + "epoch": 0.81, + "learning_rate": 9.425403461898758e-07, + "loss": 0.4835, + "step": 6440 + }, + { + "epoch": 0.81, + "learning_rate": 9.413542825757499e-07, + "loss": 0.4869, + "step": 6441 + }, + { + "epoch": 0.81, + "learning_rate": 9.401688881304322e-07, + "loss": 0.5379, + "step": 6442 + }, + { + "epoch": 0.81, + "learning_rate": 9.389841630493635e-07, + "loss": 0.5646, + "step": 6443 + }, + { + "epoch": 0.81, + "learning_rate": 9.378001075278748e-07, + "loss": 0.5581, + "step": 6444 + }, + { + "epoch": 0.81, + "learning_rate": 9.366167217611887e-07, + "loss": 0.5367, + "step": 6445 + }, + { + "epoch": 0.81, + "learning_rate": 9.35434005944415e-07, + "loss": 0.552, + "step": 6446 + }, + { + "epoch": 0.81, + "learning_rate": 9.342519602725536e-07, + "loss": 0.5252, + "step": 6447 + }, + { + "epoch": 0.81, + "learning_rate": 9.330705849404959e-07, + "loss": 0.5964, + "step": 6448 + }, + { + "epoch": 0.81, + "learning_rate": 9.318898801430187e-07, + "loss": 0.5248, + "step": 6449 + }, + { + "epoch": 0.81, + "learning_rate": 9.307098460747915e-07, + "loss": 0.4776, + "step": 6450 + }, + { + "epoch": 0.81, + "learning_rate": 9.295304829303725e-07, + "loss": 0.5358, + "step": 6451 + }, + { + "epoch": 0.81, + "learning_rate": 9.283517909042083e-07, + "loss": 0.4804, + "step": 6452 + }, + { + "epoch": 0.81, + "learning_rate": 9.27173770190637e-07, + "loss": 0.5001, + "step": 6453 + }, + { + "epoch": 0.81, + "learning_rate": 9.259964209838851e-07, + "loss": 0.5324, + "step": 6454 + }, + { + "epoch": 0.81, + "learning_rate": 9.248197434780654e-07, + "loss": 0.4706, + "step": 6455 + }, + { + "epoch": 0.81, + "learning_rate": 9.236437378671836e-07, + "loss": 0.4707, + "step": 6456 + }, + { + "epoch": 0.81, + "learning_rate": 9.224684043451337e-07, + "loss": 0.543, + "step": 6457 + }, + { + "epoch": 0.81, + "learning_rate": 9.21293743105699e-07, + "loss": 0.5314, + "step": 6458 + }, + { + "epoch": 0.81, + "learning_rate": 9.201197543425516e-07, + "loss": 0.4794, + "step": 6459 + }, + { + "epoch": 0.81, + "learning_rate": 9.189464382492535e-07, + "loss": 0.5535, + "step": 6460 + }, + { + "epoch": 0.81, + "learning_rate": 9.177737950192528e-07, + "loss": 0.5792, + "step": 6461 + }, + { + "epoch": 0.81, + "learning_rate": 9.166018248458908e-07, + "loss": 0.5096, + "step": 6462 + }, + { + "epoch": 0.81, + "learning_rate": 9.154305279223946e-07, + "loss": 0.5687, + "step": 6463 + }, + { + "epoch": 0.81, + "learning_rate": 9.142599044418832e-07, + "loss": 0.5312, + "step": 6464 + }, + { + "epoch": 0.81, + "learning_rate": 9.130899545973627e-07, + "loss": 0.4375, + "step": 6465 + }, + { + "epoch": 0.81, + "learning_rate": 9.119206785817275e-07, + "loss": 0.4324, + "step": 6466 + }, + { + "epoch": 0.81, + "learning_rate": 9.107520765877636e-07, + "loss": 0.5087, + "step": 6467 + }, + { + "epoch": 0.81, + "learning_rate": 9.09584148808142e-07, + "loss": 0.2146, + "step": 6468 + }, + { + "epoch": 0.81, + "learning_rate": 9.084168954354256e-07, + "loss": 0.5368, + "step": 6469 + }, + { + "epoch": 0.81, + "learning_rate": 9.072503166620656e-07, + "loss": 0.5445, + "step": 6470 + }, + { + "epoch": 0.81, + "learning_rate": 9.060844126804008e-07, + "loss": 0.2032, + "step": 6471 + }, + { + "epoch": 0.81, + "learning_rate": 9.049191836826604e-07, + "loss": 0.4795, + "step": 6472 + }, + { + "epoch": 0.81, + "learning_rate": 9.037546298609606e-07, + "loss": 0.5335, + "step": 6473 + }, + { + "epoch": 0.81, + "learning_rate": 9.02590751407309e-07, + "loss": 0.5285, + "step": 6474 + }, + { + "epoch": 0.81, + "learning_rate": 9.01427548513597e-07, + "loss": 0.5, + "step": 6475 + }, + { + "epoch": 0.81, + "learning_rate": 9.002650213716085e-07, + "loss": 0.4542, + "step": 6476 + }, + { + "epoch": 0.81, + "learning_rate": 8.991031701730163e-07, + "loss": 0.4832, + "step": 6477 + }, + { + "epoch": 0.81, + "learning_rate": 8.979419951093793e-07, + "loss": 0.4556, + "step": 6478 + }, + { + "epoch": 0.81, + "learning_rate": 8.967814963721466e-07, + "loss": 0.4203, + "step": 6479 + }, + { + "epoch": 0.81, + "learning_rate": 8.956216741526563e-07, + "loss": 0.5258, + "step": 6480 + }, + { + "epoch": 0.81, + "learning_rate": 8.944625286421315e-07, + "loss": 0.5034, + "step": 6481 + }, + { + "epoch": 0.81, + "learning_rate": 8.933040600316878e-07, + "loss": 0.5092, + "step": 6482 + }, + { + "epoch": 0.81, + "learning_rate": 8.921462685123278e-07, + "loss": 0.5325, + "step": 6483 + }, + { + "epoch": 0.81, + "learning_rate": 8.909891542749416e-07, + "loss": 0.5383, + "step": 6484 + }, + { + "epoch": 0.81, + "learning_rate": 8.898327175103094e-07, + "loss": 0.511, + "step": 6485 + }, + { + "epoch": 0.81, + "learning_rate": 8.88676958409097e-07, + "loss": 0.5692, + "step": 6486 + }, + { + "epoch": 0.81, + "learning_rate": 8.87521877161861e-07, + "loss": 0.5297, + "step": 6487 + }, + { + "epoch": 0.81, + "learning_rate": 8.863674739590444e-07, + "loss": 0.5319, + "step": 6488 + }, + { + "epoch": 0.81, + "learning_rate": 8.852137489909807e-07, + "loss": 0.6125, + "step": 6489 + }, + { + "epoch": 0.81, + "learning_rate": 8.840607024478892e-07, + "loss": 0.5634, + "step": 6490 + }, + { + "epoch": 0.81, + "learning_rate": 8.829083345198802e-07, + "loss": 0.5329, + "step": 6491 + }, + { + "epoch": 0.81, + "learning_rate": 8.817566453969473e-07, + "loss": 0.525, + "step": 6492 + }, + { + "epoch": 0.81, + "learning_rate": 8.806056352689763e-07, + "loss": 0.2188, + "step": 6493 + }, + { + "epoch": 0.81, + "learning_rate": 8.794553043257404e-07, + "loss": 0.5545, + "step": 6494 + }, + { + "epoch": 0.81, + "learning_rate": 8.783056527569e-07, + "loss": 0.4871, + "step": 6495 + }, + { + "epoch": 0.81, + "learning_rate": 8.771566807520049e-07, + "loss": 0.4834, + "step": 6496 + }, + { + "epoch": 0.81, + "learning_rate": 8.7600838850049e-07, + "loss": 0.4673, + "step": 6497 + }, + { + "epoch": 0.81, + "learning_rate": 8.7486077619168e-07, + "loss": 0.5373, + "step": 6498 + }, + { + "epoch": 0.81, + "learning_rate": 8.73713844014788e-07, + "loss": 0.5035, + "step": 6499 + }, + { + "epoch": 0.81, + "learning_rate": 8.725675921589138e-07, + "loss": 0.4678, + "step": 6500 + }, + { + "epoch": 0.81, + "learning_rate": 8.714220208130475e-07, + "loss": 0.4625, + "step": 6501 + }, + { + "epoch": 0.82, + "learning_rate": 8.702771301660623e-07, + "loss": 0.4635, + "step": 6502 + }, + { + "epoch": 0.82, + "learning_rate": 8.691329204067234e-07, + "loss": 0.5197, + "step": 6503 + }, + { + "epoch": 0.82, + "learning_rate": 8.67989391723682e-07, + "loss": 0.4857, + "step": 6504 + }, + { + "epoch": 0.82, + "learning_rate": 8.668465443054774e-07, + "loss": 0.4672, + "step": 6505 + }, + { + "epoch": 0.82, + "learning_rate": 8.657043783405361e-07, + "loss": 0.4538, + "step": 6506 + }, + { + "epoch": 0.82, + "learning_rate": 8.64562894017174e-07, + "loss": 0.4849, + "step": 6507 + }, + { + "epoch": 0.82, + "learning_rate": 8.634220915235914e-07, + "loss": 0.5261, + "step": 6508 + }, + { + "epoch": 0.82, + "learning_rate": 8.62281971047878e-07, + "loss": 0.5542, + "step": 6509 + }, + { + "epoch": 0.82, + "learning_rate": 8.611425327780121e-07, + "loss": 0.5415, + "step": 6510 + }, + { + "epoch": 0.82, + "learning_rate": 8.600037769018582e-07, + "loss": 0.4212, + "step": 6511 + }, + { + "epoch": 0.82, + "learning_rate": 8.588657036071696e-07, + "loss": 0.4672, + "step": 6512 + }, + { + "epoch": 0.82, + "learning_rate": 8.577283130815833e-07, + "loss": 0.5447, + "step": 6513 + }, + { + "epoch": 0.82, + "learning_rate": 8.565916055126283e-07, + "loss": 0.5634, + "step": 6514 + }, + { + "epoch": 0.82, + "learning_rate": 8.554555810877191e-07, + "loss": 0.5629, + "step": 6515 + }, + { + "epoch": 0.82, + "learning_rate": 8.543202399941569e-07, + "loss": 0.5031, + "step": 6516 + }, + { + "epoch": 0.82, + "learning_rate": 8.531855824191332e-07, + "loss": 0.5608, + "step": 6517 + }, + { + "epoch": 0.82, + "learning_rate": 8.520516085497211e-07, + "loss": 0.562, + "step": 6518 + }, + { + "epoch": 0.82, + "learning_rate": 8.509183185728864e-07, + "loss": 0.4967, + "step": 6519 + }, + { + "epoch": 0.82, + "learning_rate": 8.497857126754799e-07, + "loss": 0.5681, + "step": 6520 + }, + { + "epoch": 0.82, + "learning_rate": 8.486537910442399e-07, + "loss": 0.5167, + "step": 6521 + }, + { + "epoch": 0.82, + "learning_rate": 8.475225538657917e-07, + "loss": 0.4584, + "step": 6522 + }, + { + "epoch": 0.82, + "learning_rate": 8.463920013266496e-07, + "loss": 0.5683, + "step": 6523 + }, + { + "epoch": 0.82, + "learning_rate": 8.452621336132105e-07, + "loss": 0.4947, + "step": 6524 + }, + { + "epoch": 0.82, + "learning_rate": 8.441329509117624e-07, + "loss": 0.5483, + "step": 6525 + }, + { + "epoch": 0.82, + "learning_rate": 8.430044534084792e-07, + "loss": 0.5458, + "step": 6526 + }, + { + "epoch": 0.82, + "learning_rate": 8.41876641289422e-07, + "loss": 0.4587, + "step": 6527 + }, + { + "epoch": 0.82, + "learning_rate": 8.407495147405398e-07, + "loss": 0.5201, + "step": 6528 + }, + { + "epoch": 0.82, + "learning_rate": 8.396230739476651e-07, + "loss": 0.4924, + "step": 6529 + }, + { + "epoch": 0.82, + "learning_rate": 8.384973190965212e-07, + "loss": 0.5539, + "step": 6530 + }, + { + "epoch": 0.82, + "learning_rate": 8.373722503727167e-07, + "loss": 0.4817, + "step": 6531 + }, + { + "epoch": 0.82, + "learning_rate": 8.362478679617464e-07, + "loss": 0.537, + "step": 6532 + }, + { + "epoch": 0.82, + "learning_rate": 8.351241720489938e-07, + "loss": 0.5676, + "step": 6533 + }, + { + "epoch": 0.82, + "learning_rate": 8.340011628197287e-07, + "loss": 0.4807, + "step": 6534 + }, + { + "epoch": 0.82, + "learning_rate": 8.328788404591054e-07, + "loss": 0.5738, + "step": 6535 + }, + { + "epoch": 0.82, + "learning_rate": 8.317572051521671e-07, + "loss": 0.509, + "step": 6536 + }, + { + "epoch": 0.82, + "learning_rate": 8.306362570838438e-07, + "loss": 0.5772, + "step": 6537 + }, + { + "epoch": 0.82, + "learning_rate": 8.295159964389515e-07, + "loss": 0.5407, + "step": 6538 + }, + { + "epoch": 0.82, + "learning_rate": 8.28396423402194e-07, + "loss": 0.5472, + "step": 6539 + }, + { + "epoch": 0.82, + "learning_rate": 8.272775381581594e-07, + "loss": 0.5019, + "step": 6540 + }, + { + "epoch": 0.82, + "learning_rate": 8.261593408913238e-07, + "loss": 0.4967, + "step": 6541 + }, + { + "epoch": 0.82, + "learning_rate": 8.250418317860503e-07, + "loss": 0.4756, + "step": 6542 + }, + { + "epoch": 0.82, + "learning_rate": 8.239250110265884e-07, + "loss": 0.45, + "step": 6543 + }, + { + "epoch": 0.82, + "learning_rate": 8.22808878797075e-07, + "loss": 0.5695, + "step": 6544 + }, + { + "epoch": 0.82, + "learning_rate": 8.216934352815292e-07, + "loss": 0.4525, + "step": 6545 + }, + { + "epoch": 0.82, + "learning_rate": 8.205786806638616e-07, + "loss": 0.4956, + "step": 6546 + }, + { + "epoch": 0.82, + "learning_rate": 8.194646151278674e-07, + "loss": 0.528, + "step": 6547 + }, + { + "epoch": 0.82, + "learning_rate": 8.183512388572268e-07, + "loss": 0.5702, + "step": 6548 + }, + { + "epoch": 0.82, + "learning_rate": 8.172385520355086e-07, + "loss": 0.5016, + "step": 6549 + }, + { + "epoch": 0.82, + "learning_rate": 8.161265548461683e-07, + "loss": 0.4812, + "step": 6550 + }, + { + "epoch": 0.82, + "learning_rate": 8.150152474725437e-07, + "loss": 0.5341, + "step": 6551 + }, + { + "epoch": 0.82, + "learning_rate": 8.139046300978621e-07, + "loss": 0.52, + "step": 6552 + }, + { + "epoch": 0.82, + "learning_rate": 8.127947029052368e-07, + "loss": 0.5434, + "step": 6553 + }, + { + "epoch": 0.82, + "learning_rate": 8.116854660776674e-07, + "loss": 0.5687, + "step": 6554 + }, + { + "epoch": 0.82, + "learning_rate": 8.105769197980401e-07, + "loss": 0.4848, + "step": 6555 + }, + { + "epoch": 0.82, + "learning_rate": 8.094690642491238e-07, + "loss": 0.5972, + "step": 6556 + }, + { + "epoch": 0.82, + "learning_rate": 8.083618996135772e-07, + "loss": 0.4995, + "step": 6557 + }, + { + "epoch": 0.82, + "learning_rate": 8.07255426073944e-07, + "loss": 0.5214, + "step": 6558 + }, + { + "epoch": 0.82, + "learning_rate": 8.061496438126543e-07, + "loss": 0.558, + "step": 6559 + }, + { + "epoch": 0.82, + "learning_rate": 8.050445530120249e-07, + "loss": 0.5051, + "step": 6560 + }, + { + "epoch": 0.82, + "learning_rate": 8.039401538542547e-07, + "loss": 0.4945, + "step": 6561 + }, + { + "epoch": 0.82, + "learning_rate": 8.028364465214333e-07, + "loss": 0.4579, + "step": 6562 + }, + { + "epoch": 0.82, + "learning_rate": 8.017334311955338e-07, + "loss": 0.4991, + "step": 6563 + }, + { + "epoch": 0.82, + "learning_rate": 8.006311080584156e-07, + "loss": 0.185, + "step": 6564 + }, + { + "epoch": 0.82, + "learning_rate": 7.995294772918244e-07, + "loss": 0.495, + "step": 6565 + }, + { + "epoch": 0.82, + "learning_rate": 7.984285390773932e-07, + "loss": 0.537, + "step": 6566 + }, + { + "epoch": 0.82, + "learning_rate": 7.973282935966359e-07, + "loss": 0.511, + "step": 6567 + }, + { + "epoch": 0.82, + "learning_rate": 7.962287410309566e-07, + "loss": 0.5062, + "step": 6568 + }, + { + "epoch": 0.82, + "learning_rate": 7.951298815616442e-07, + "loss": 0.5458, + "step": 6569 + }, + { + "epoch": 0.82, + "learning_rate": 7.940317153698735e-07, + "loss": 0.5731, + "step": 6570 + }, + { + "epoch": 0.82, + "learning_rate": 7.929342426367037e-07, + "loss": 0.5168, + "step": 6571 + }, + { + "epoch": 0.82, + "learning_rate": 7.918374635430809e-07, + "loss": 0.548, + "step": 6572 + }, + { + "epoch": 0.82, + "learning_rate": 7.90741378269837e-07, + "loss": 0.486, + "step": 6573 + }, + { + "epoch": 0.82, + "learning_rate": 7.89645986997688e-07, + "loss": 0.5262, + "step": 6574 + }, + { + "epoch": 0.82, + "learning_rate": 7.885512899072362e-07, + "loss": 0.4816, + "step": 6575 + }, + { + "epoch": 0.82, + "learning_rate": 7.874572871789704e-07, + "loss": 0.5158, + "step": 6576 + }, + { + "epoch": 0.82, + "learning_rate": 7.863639789932636e-07, + "loss": 0.4879, + "step": 6577 + }, + { + "epoch": 0.82, + "learning_rate": 7.852713655303762e-07, + "loss": 0.5181, + "step": 6578 + }, + { + "epoch": 0.82, + "learning_rate": 7.841794469704511e-07, + "loss": 0.5211, + "step": 6579 + }, + { + "epoch": 0.82, + "learning_rate": 7.830882234935206e-07, + "loss": 0.5132, + "step": 6580 + }, + { + "epoch": 0.82, + "learning_rate": 7.819976952794966e-07, + "loss": 0.4974, + "step": 6581 + }, + { + "epoch": 0.83, + "learning_rate": 7.809078625081823e-07, + "loss": 0.5085, + "step": 6582 + }, + { + "epoch": 0.83, + "learning_rate": 7.798187253592632e-07, + "loss": 0.4731, + "step": 6583 + }, + { + "epoch": 0.83, + "learning_rate": 7.787302840123102e-07, + "loss": 0.4731, + "step": 6584 + }, + { + "epoch": 0.83, + "learning_rate": 7.776425386467806e-07, + "loss": 0.5017, + "step": 6585 + }, + { + "epoch": 0.83, + "learning_rate": 7.765554894420163e-07, + "loss": 0.4995, + "step": 6586 + }, + { + "epoch": 0.83, + "learning_rate": 7.754691365772454e-07, + "loss": 0.4839, + "step": 6587 + }, + { + "epoch": 0.83, + "learning_rate": 7.743834802315775e-07, + "loss": 0.5079, + "step": 6588 + }, + { + "epoch": 0.83, + "learning_rate": 7.732985205840121e-07, + "loss": 0.5623, + "step": 6589 + }, + { + "epoch": 0.83, + "learning_rate": 7.722142578134312e-07, + "loss": 0.4933, + "step": 6590 + }, + { + "epoch": 0.83, + "learning_rate": 7.711306920986022e-07, + "loss": 0.5331, + "step": 6591 + }, + { + "epoch": 0.83, + "learning_rate": 7.700478236181802e-07, + "loss": 0.5389, + "step": 6592 + }, + { + "epoch": 0.83, + "learning_rate": 7.689656525506994e-07, + "loss": 0.4947, + "step": 6593 + }, + { + "epoch": 0.83, + "learning_rate": 7.67884179074585e-07, + "loss": 0.2077, + "step": 6594 + }, + { + "epoch": 0.83, + "learning_rate": 7.668034033681438e-07, + "loss": 0.5149, + "step": 6595 + }, + { + "epoch": 0.83, + "learning_rate": 7.657233256095692e-07, + "loss": 0.4969, + "step": 6596 + }, + { + "epoch": 0.83, + "learning_rate": 7.646439459769389e-07, + "loss": 0.5063, + "step": 6597 + }, + { + "epoch": 0.83, + "learning_rate": 7.635652646482167e-07, + "loss": 0.4456, + "step": 6598 + }, + { + "epoch": 0.83, + "learning_rate": 7.624872818012475e-07, + "loss": 0.5101, + "step": 6599 + }, + { + "epoch": 0.83, + "learning_rate": 7.614099976137646e-07, + "loss": 0.4731, + "step": 6600 + }, + { + "epoch": 0.83, + "learning_rate": 7.603334122633854e-07, + "loss": 0.5105, + "step": 6601 + }, + { + "epoch": 0.83, + "learning_rate": 7.592575259276119e-07, + "loss": 0.5294, + "step": 6602 + }, + { + "epoch": 0.83, + "learning_rate": 7.581823387838317e-07, + "loss": 0.2055, + "step": 6603 + }, + { + "epoch": 0.83, + "learning_rate": 7.571078510093138e-07, + "loss": 0.4869, + "step": 6604 + }, + { + "epoch": 0.83, + "learning_rate": 7.560340627812163e-07, + "loss": 0.4721, + "step": 6605 + }, + { + "epoch": 0.83, + "learning_rate": 7.549609742765785e-07, + "loss": 0.5276, + "step": 6606 + }, + { + "epoch": 0.83, + "learning_rate": 7.538885856723266e-07, + "loss": 0.5123, + "step": 6607 + }, + { + "epoch": 0.83, + "learning_rate": 7.528168971452704e-07, + "loss": 0.4822, + "step": 6608 + }, + { + "epoch": 0.83, + "learning_rate": 7.517459088721058e-07, + "loss": 0.4987, + "step": 6609 + }, + { + "epoch": 0.83, + "learning_rate": 7.506756210294091e-07, + "loss": 0.5173, + "step": 6610 + }, + { + "epoch": 0.83, + "learning_rate": 7.496060337936456e-07, + "loss": 0.4699, + "step": 6611 + }, + { + "epoch": 0.83, + "learning_rate": 7.485371473411635e-07, + "loss": 0.496, + "step": 6612 + }, + { + "epoch": 0.83, + "learning_rate": 7.474689618481945e-07, + "loss": 0.566, + "step": 6613 + }, + { + "epoch": 0.83, + "learning_rate": 7.464014774908573e-07, + "loss": 0.2191, + "step": 6614 + }, + { + "epoch": 0.83, + "learning_rate": 7.453346944451511e-07, + "loss": 0.4909, + "step": 6615 + }, + { + "epoch": 0.83, + "learning_rate": 7.442686128869625e-07, + "loss": 0.5124, + "step": 6616 + }, + { + "epoch": 0.83, + "learning_rate": 7.432032329920613e-07, + "loss": 0.4792, + "step": 6617 + }, + { + "epoch": 0.83, + "learning_rate": 7.421385549361026e-07, + "loss": 0.5443, + "step": 6618 + }, + { + "epoch": 0.83, + "learning_rate": 7.410745788946261e-07, + "loss": 0.5438, + "step": 6619 + }, + { + "epoch": 0.83, + "learning_rate": 7.400113050430519e-07, + "loss": 0.5562, + "step": 6620 + }, + { + "epoch": 0.83, + "learning_rate": 7.389487335566892e-07, + "loss": 0.5434, + "step": 6621 + }, + { + "epoch": 0.83, + "learning_rate": 7.378868646107285e-07, + "loss": 0.5609, + "step": 6622 + }, + { + "epoch": 0.83, + "learning_rate": 7.368256983802452e-07, + "loss": 0.543, + "step": 6623 + }, + { + "epoch": 0.83, + "learning_rate": 7.357652350402001e-07, + "loss": 0.5008, + "step": 6624 + }, + { + "epoch": 0.83, + "learning_rate": 7.347054747654375e-07, + "loss": 0.493, + "step": 6625 + }, + { + "epoch": 0.83, + "learning_rate": 7.336464177306829e-07, + "loss": 0.468, + "step": 6626 + }, + { + "epoch": 0.83, + "learning_rate": 7.325880641105493e-07, + "loss": 0.4834, + "step": 6627 + }, + { + "epoch": 0.83, + "learning_rate": 7.315304140795332e-07, + "loss": 0.5063, + "step": 6628 + }, + { + "epoch": 0.83, + "learning_rate": 7.304734678120135e-07, + "loss": 0.4794, + "step": 6629 + }, + { + "epoch": 0.83, + "learning_rate": 7.294172254822563e-07, + "loss": 0.4654, + "step": 6630 + }, + { + "epoch": 0.83, + "learning_rate": 7.283616872644067e-07, + "loss": 0.5319, + "step": 6631 + }, + { + "epoch": 0.83, + "learning_rate": 7.273068533324978e-07, + "loss": 0.4872, + "step": 6632 + }, + { + "epoch": 0.83, + "learning_rate": 7.26252723860445e-07, + "loss": 0.512, + "step": 6633 + }, + { + "epoch": 0.83, + "learning_rate": 7.251992990220475e-07, + "loss": 0.4644, + "step": 6634 + }, + { + "epoch": 0.83, + "learning_rate": 7.241465789909907e-07, + "loss": 0.2352, + "step": 6635 + }, + { + "epoch": 0.83, + "learning_rate": 7.230945639408388e-07, + "loss": 0.5672, + "step": 6636 + }, + { + "epoch": 0.83, + "learning_rate": 7.220432540450434e-07, + "loss": 0.4878, + "step": 6637 + }, + { + "epoch": 0.83, + "learning_rate": 7.209926494769398e-07, + "loss": 0.5445, + "step": 6638 + }, + { + "epoch": 0.83, + "learning_rate": 7.199427504097467e-07, + "loss": 0.5203, + "step": 6639 + }, + { + "epoch": 0.83, + "learning_rate": 7.18893557016565e-07, + "loss": 0.5384, + "step": 6640 + }, + { + "epoch": 0.83, + "learning_rate": 7.17845069470382e-07, + "loss": 0.4517, + "step": 6641 + }, + { + "epoch": 0.83, + "learning_rate": 7.167972879440649e-07, + "loss": 0.5219, + "step": 6642 + }, + { + "epoch": 0.83, + "learning_rate": 7.157502126103677e-07, + "loss": 0.5097, + "step": 6643 + }, + { + "epoch": 0.83, + "learning_rate": 7.147038436419262e-07, + "loss": 0.5373, + "step": 6644 + }, + { + "epoch": 0.83, + "learning_rate": 7.136581812112614e-07, + "loss": 0.562, + "step": 6645 + }, + { + "epoch": 0.83, + "learning_rate": 7.126132254907769e-07, + "loss": 0.5369, + "step": 6646 + }, + { + "epoch": 0.83, + "learning_rate": 7.115689766527583e-07, + "loss": 0.4882, + "step": 6647 + }, + { + "epoch": 0.83, + "learning_rate": 7.105254348693764e-07, + "loss": 0.5926, + "step": 6648 + }, + { + "epoch": 0.83, + "learning_rate": 7.094826003126853e-07, + "loss": 0.4778, + "step": 6649 + }, + { + "epoch": 0.83, + "learning_rate": 7.084404731546229e-07, + "loss": 0.5058, + "step": 6650 + }, + { + "epoch": 0.83, + "learning_rate": 7.0739905356701e-07, + "loss": 0.4897, + "step": 6651 + }, + { + "epoch": 0.83, + "learning_rate": 7.063583417215486e-07, + "loss": 0.4948, + "step": 6652 + }, + { + "epoch": 0.83, + "learning_rate": 7.053183377898276e-07, + "loss": 0.5149, + "step": 6653 + }, + { + "epoch": 0.83, + "learning_rate": 7.042790419433165e-07, + "loss": 0.5244, + "step": 6654 + }, + { + "epoch": 0.83, + "learning_rate": 7.0324045435337e-07, + "loss": 0.5072, + "step": 6655 + }, + { + "epoch": 0.83, + "learning_rate": 7.022025751912248e-07, + "loss": 0.4955, + "step": 6656 + }, + { + "epoch": 0.83, + "learning_rate": 7.011654046280025e-07, + "loss": 0.5393, + "step": 6657 + }, + { + "epoch": 0.83, + "learning_rate": 7.001289428347036e-07, + "loss": 0.5584, + "step": 6658 + }, + { + "epoch": 0.83, + "learning_rate": 6.990931899822162e-07, + "loss": 0.5335, + "step": 6659 + }, + { + "epoch": 0.83, + "learning_rate": 6.9805814624131e-07, + "loss": 0.4824, + "step": 6660 + }, + { + "epoch": 0.83, + "learning_rate": 6.970238117826378e-07, + "loss": 0.4616, + "step": 6661 + }, + { + "epoch": 0.84, + "learning_rate": 6.959901867767366e-07, + "loss": 0.5662, + "step": 6662 + }, + { + "epoch": 0.84, + "learning_rate": 6.949572713940223e-07, + "loss": 0.5037, + "step": 6663 + }, + { + "epoch": 0.84, + "learning_rate": 6.939250658047985e-07, + "loss": 0.5129, + "step": 6664 + }, + { + "epoch": 0.84, + "learning_rate": 6.9289357017925e-07, + "loss": 0.5049, + "step": 6665 + }, + { + "epoch": 0.84, + "learning_rate": 6.918627846874437e-07, + "loss": 0.1928, + "step": 6666 + }, + { + "epoch": 0.84, + "learning_rate": 6.908327094993328e-07, + "loss": 0.4867, + "step": 6667 + }, + { + "epoch": 0.84, + "learning_rate": 6.898033447847469e-07, + "loss": 0.5766, + "step": 6668 + }, + { + "epoch": 0.84, + "learning_rate": 6.887746907134052e-07, + "loss": 0.5681, + "step": 6669 + }, + { + "epoch": 0.84, + "learning_rate": 6.87746747454906e-07, + "loss": 0.5088, + "step": 6670 + }, + { + "epoch": 0.84, + "learning_rate": 6.867195151787309e-07, + "loss": 0.4817, + "step": 6671 + }, + { + "epoch": 0.84, + "learning_rate": 6.856929940542462e-07, + "loss": 0.49, + "step": 6672 + }, + { + "epoch": 0.84, + "learning_rate": 6.846671842506992e-07, + "loss": 0.5613, + "step": 6673 + }, + { + "epoch": 0.84, + "learning_rate": 6.836420859372183e-07, + "loss": 0.2057, + "step": 6674 + }, + { + "epoch": 0.84, + "learning_rate": 6.826176992828181e-07, + "loss": 0.5233, + "step": 6675 + }, + { + "epoch": 0.84, + "learning_rate": 6.81594024456394e-07, + "loss": 0.5017, + "step": 6676 + }, + { + "epoch": 0.84, + "learning_rate": 6.805710616267242e-07, + "loss": 0.5054, + "step": 6677 + }, + { + "epoch": 0.84, + "learning_rate": 6.795488109624693e-07, + "loss": 0.4379, + "step": 6678 + }, + { + "epoch": 0.84, + "learning_rate": 6.78527272632174e-07, + "loss": 0.5714, + "step": 6679 + }, + { + "epoch": 0.84, + "learning_rate": 6.775064468042625e-07, + "loss": 0.5498, + "step": 6680 + }, + { + "epoch": 0.84, + "learning_rate": 6.764863336470445e-07, + "loss": 0.4911, + "step": 6681 + }, + { + "epoch": 0.84, + "learning_rate": 6.754669333287106e-07, + "loss": 0.5261, + "step": 6682 + }, + { + "epoch": 0.84, + "learning_rate": 6.744482460173346e-07, + "loss": 0.4293, + "step": 6683 + }, + { + "epoch": 0.84, + "learning_rate": 6.734302718808722e-07, + "loss": 0.5034, + "step": 6684 + }, + { + "epoch": 0.84, + "learning_rate": 6.724130110871624e-07, + "loss": 0.4832, + "step": 6685 + }, + { + "epoch": 0.84, + "learning_rate": 6.713964638039266e-07, + "loss": 0.508, + "step": 6686 + }, + { + "epoch": 0.84, + "learning_rate": 6.703806301987653e-07, + "loss": 0.4851, + "step": 6687 + }, + { + "epoch": 0.84, + "learning_rate": 6.693655104391661e-07, + "loss": 0.52, + "step": 6688 + }, + { + "epoch": 0.84, + "learning_rate": 6.683511046924956e-07, + "loss": 0.5748, + "step": 6689 + }, + { + "epoch": 0.84, + "learning_rate": 6.673374131260052e-07, + "loss": 0.5435, + "step": 6690 + }, + { + "epoch": 0.84, + "learning_rate": 6.663244359068261e-07, + "loss": 0.4781, + "step": 6691 + }, + { + "epoch": 0.84, + "learning_rate": 6.653121732019729e-07, + "loss": 0.5003, + "step": 6692 + }, + { + "epoch": 0.84, + "learning_rate": 6.64300625178344e-07, + "loss": 0.5646, + "step": 6693 + }, + { + "epoch": 0.84, + "learning_rate": 6.632897920027159e-07, + "loss": 0.5475, + "step": 6694 + }, + { + "epoch": 0.84, + "learning_rate": 6.622796738417508e-07, + "loss": 0.5028, + "step": 6695 + }, + { + "epoch": 0.84, + "learning_rate": 6.612702708619911e-07, + "loss": 0.4967, + "step": 6696 + }, + { + "epoch": 0.84, + "learning_rate": 6.602615832298625e-07, + "loss": 0.524, + "step": 6697 + }, + { + "epoch": 0.84, + "learning_rate": 6.592536111116727e-07, + "loss": 0.5912, + "step": 6698 + }, + { + "epoch": 0.84, + "learning_rate": 6.582463546736118e-07, + "loss": 0.4431, + "step": 6699 + }, + { + "epoch": 0.84, + "learning_rate": 6.572398140817488e-07, + "loss": 0.5708, + "step": 6700 + }, + { + "epoch": 0.84, + "learning_rate": 6.562339895020375e-07, + "loss": 0.4695, + "step": 6701 + }, + { + "epoch": 0.84, + "learning_rate": 6.552288811003144e-07, + "loss": 0.5514, + "step": 6702 + }, + { + "epoch": 0.84, + "learning_rate": 6.542244890422956e-07, + "loss": 0.4538, + "step": 6703 + }, + { + "epoch": 0.84, + "learning_rate": 6.532208134935808e-07, + "loss": 0.515, + "step": 6704 + }, + { + "epoch": 0.84, + "learning_rate": 6.522178546196511e-07, + "loss": 0.4872, + "step": 6705 + }, + { + "epoch": 0.84, + "learning_rate": 6.512156125858682e-07, + "loss": 0.5074, + "step": 6706 + }, + { + "epoch": 0.84, + "learning_rate": 6.50214087557477e-07, + "loss": 0.497, + "step": 6707 + }, + { + "epoch": 0.84, + "learning_rate": 6.492132796996043e-07, + "loss": 0.5082, + "step": 6708 + }, + { + "epoch": 0.84, + "learning_rate": 6.48213189177258e-07, + "loss": 0.4996, + "step": 6709 + }, + { + "epoch": 0.84, + "learning_rate": 6.472138161553288e-07, + "loss": 0.2272, + "step": 6710 + }, + { + "epoch": 0.84, + "learning_rate": 6.462151607985861e-07, + "loss": 0.516, + "step": 6711 + }, + { + "epoch": 0.84, + "learning_rate": 6.452172232716846e-07, + "loss": 0.4947, + "step": 6712 + }, + { + "epoch": 0.84, + "learning_rate": 6.442200037391588e-07, + "loss": 0.4711, + "step": 6713 + }, + { + "epoch": 0.84, + "learning_rate": 6.432235023654249e-07, + "loss": 0.5665, + "step": 6714 + }, + { + "epoch": 0.84, + "learning_rate": 6.422277193147813e-07, + "loss": 0.4386, + "step": 6715 + }, + { + "epoch": 0.84, + "learning_rate": 6.412326547514091e-07, + "loss": 0.522, + "step": 6716 + }, + { + "epoch": 0.84, + "learning_rate": 6.402383088393671e-07, + "loss": 0.5441, + "step": 6717 + }, + { + "epoch": 0.84, + "learning_rate": 6.392446817425985e-07, + "loss": 0.5701, + "step": 6718 + }, + { + "epoch": 0.84, + "learning_rate": 6.382517736249277e-07, + "loss": 0.5791, + "step": 6719 + }, + { + "epoch": 0.84, + "learning_rate": 6.372595846500607e-07, + "loss": 0.5565, + "step": 6720 + }, + { + "epoch": 0.84, + "learning_rate": 6.362681149815853e-07, + "loss": 0.5424, + "step": 6721 + }, + { + "epoch": 0.84, + "learning_rate": 6.35277364782968e-07, + "loss": 0.5214, + "step": 6722 + }, + { + "epoch": 0.84, + "learning_rate": 6.342873342175598e-07, + "loss": 0.4804, + "step": 6723 + }, + { + "epoch": 0.84, + "learning_rate": 6.332980234485914e-07, + "loss": 0.4852, + "step": 6724 + }, + { + "epoch": 0.84, + "learning_rate": 6.32309432639176e-07, + "loss": 0.5457, + "step": 6725 + }, + { + "epoch": 0.84, + "learning_rate": 6.313215619523078e-07, + "loss": 0.582, + "step": 6726 + }, + { + "epoch": 0.84, + "learning_rate": 6.303344115508603e-07, + "loss": 0.5027, + "step": 6727 + }, + { + "epoch": 0.84, + "learning_rate": 6.293479815975906e-07, + "loss": 0.5439, + "step": 6728 + }, + { + "epoch": 0.84, + "learning_rate": 6.283622722551358e-07, + "loss": 0.5302, + "step": 6729 + }, + { + "epoch": 0.84, + "learning_rate": 6.273772836860159e-07, + "loss": 0.509, + "step": 6730 + }, + { + "epoch": 0.84, + "learning_rate": 6.263930160526294e-07, + "loss": 0.5266, + "step": 6731 + }, + { + "epoch": 0.84, + "learning_rate": 6.254094695172596e-07, + "loss": 0.4525, + "step": 6732 + }, + { + "epoch": 0.84, + "learning_rate": 6.244266442420655e-07, + "loss": 0.5563, + "step": 6733 + }, + { + "epoch": 0.84, + "learning_rate": 6.23444540389092e-07, + "loss": 0.4612, + "step": 6734 + }, + { + "epoch": 0.84, + "learning_rate": 6.224631581202628e-07, + "loss": 0.4854, + "step": 6735 + }, + { + "epoch": 0.84, + "learning_rate": 6.214824975973843e-07, + "loss": 0.4682, + "step": 6736 + }, + { + "epoch": 0.84, + "learning_rate": 6.205025589821423e-07, + "loss": 0.523, + "step": 6737 + }, + { + "epoch": 0.84, + "learning_rate": 6.195233424361035e-07, + "loss": 0.5199, + "step": 6738 + }, + { + "epoch": 0.84, + "learning_rate": 6.185448481207162e-07, + "loss": 0.5757, + "step": 6739 + }, + { + "epoch": 0.84, + "learning_rate": 6.175670761973102e-07, + "loss": 0.5025, + "step": 6740 + }, + { + "epoch": 0.84, + "learning_rate": 6.165900268270947e-07, + "loss": 0.4938, + "step": 6741 + }, + { + "epoch": 0.85, + "learning_rate": 6.156137001711621e-07, + "loss": 0.5646, + "step": 6742 + }, + { + "epoch": 0.85, + "learning_rate": 6.146380963904824e-07, + "loss": 0.4642, + "step": 6743 + }, + { + "epoch": 0.85, + "learning_rate": 6.136632156459093e-07, + "loss": 0.4973, + "step": 6744 + }, + { + "epoch": 0.85, + "learning_rate": 6.126890580981753e-07, + "loss": 0.538, + "step": 6745 + }, + { + "epoch": 0.85, + "learning_rate": 6.117156239078953e-07, + "loss": 0.455, + "step": 6746 + }, + { + "epoch": 0.85, + "learning_rate": 6.107429132355635e-07, + "loss": 0.5497, + "step": 6747 + }, + { + "epoch": 0.85, + "learning_rate": 6.097709262415574e-07, + "loss": 0.5578, + "step": 6748 + }, + { + "epoch": 0.85, + "learning_rate": 6.087996630861309e-07, + "loss": 0.5157, + "step": 6749 + }, + { + "epoch": 0.85, + "learning_rate": 6.07829123929421e-07, + "loss": 0.4977, + "step": 6750 + }, + { + "epoch": 0.85, + "learning_rate": 6.068593089314467e-07, + "loss": 0.4717, + "step": 6751 + }, + { + "epoch": 0.85, + "learning_rate": 6.058902182521054e-07, + "loss": 0.457, + "step": 6752 + }, + { + "epoch": 0.85, + "learning_rate": 6.049218520511762e-07, + "loss": 0.5381, + "step": 6753 + }, + { + "epoch": 0.85, + "learning_rate": 6.039542104883179e-07, + "loss": 0.5217, + "step": 6754 + }, + { + "epoch": 0.85, + "learning_rate": 6.029872937230702e-07, + "loss": 0.5203, + "step": 6755 + }, + { + "epoch": 0.85, + "learning_rate": 6.020211019148536e-07, + "loss": 0.5147, + "step": 6756 + }, + { + "epoch": 0.85, + "learning_rate": 6.010556352229691e-07, + "loss": 0.5826, + "step": 6757 + }, + { + "epoch": 0.85, + "learning_rate": 6.000908938065975e-07, + "loss": 0.563, + "step": 6758 + }, + { + "epoch": 0.85, + "learning_rate": 5.991268778248016e-07, + "loss": 0.4965, + "step": 6759 + }, + { + "epoch": 0.85, + "learning_rate": 5.981635874365221e-07, + "loss": 0.534, + "step": 6760 + }, + { + "epoch": 0.85, + "learning_rate": 5.972010228005814e-07, + "loss": 0.603, + "step": 6761 + }, + { + "epoch": 0.85, + "learning_rate": 5.962391840756832e-07, + "loss": 0.555, + "step": 6762 + }, + { + "epoch": 0.85, + "learning_rate": 5.952780714204098e-07, + "loss": 0.5689, + "step": 6763 + }, + { + "epoch": 0.85, + "learning_rate": 5.943176849932258e-07, + "loss": 0.5255, + "step": 6764 + }, + { + "epoch": 0.85, + "learning_rate": 5.933580249524729e-07, + "loss": 0.4364, + "step": 6765 + }, + { + "epoch": 0.85, + "learning_rate": 5.92399091456376e-07, + "loss": 0.4932, + "step": 6766 + }, + { + "epoch": 0.85, + "learning_rate": 5.914408846630393e-07, + "loss": 0.469, + "step": 6767 + }, + { + "epoch": 0.85, + "learning_rate": 5.904834047304464e-07, + "loss": 0.4952, + "step": 6768 + }, + { + "epoch": 0.85, + "learning_rate": 5.895266518164633e-07, + "loss": 0.4981, + "step": 6769 + }, + { + "epoch": 0.85, + "learning_rate": 5.885706260788321e-07, + "loss": 0.4615, + "step": 6770 + }, + { + "epoch": 0.85, + "learning_rate": 5.87615327675179e-07, + "loss": 0.5761, + "step": 6771 + }, + { + "epoch": 0.85, + "learning_rate": 5.866607567630084e-07, + "loss": 0.5325, + "step": 6772 + }, + { + "epoch": 0.85, + "learning_rate": 5.857069134997051e-07, + "loss": 0.4823, + "step": 6773 + }, + { + "epoch": 0.85, + "learning_rate": 5.847537980425339e-07, + "loss": 0.5386, + "step": 6774 + }, + { + "epoch": 0.85, + "learning_rate": 5.838014105486412e-07, + "loss": 0.5294, + "step": 6775 + }, + { + "epoch": 0.85, + "learning_rate": 5.828497511750486e-07, + "loss": 0.4839, + "step": 6776 + }, + { + "epoch": 0.85, + "learning_rate": 5.818988200786624e-07, + "loss": 0.5523, + "step": 6777 + }, + { + "epoch": 0.85, + "learning_rate": 5.809486174162677e-07, + "loss": 0.4873, + "step": 6778 + }, + { + "epoch": 0.85, + "learning_rate": 5.79999143344529e-07, + "loss": 0.5049, + "step": 6779 + }, + { + "epoch": 0.85, + "learning_rate": 5.790503980199913e-07, + "loss": 0.4896, + "step": 6780 + }, + { + "epoch": 0.85, + "learning_rate": 5.781023815990766e-07, + "loss": 0.5343, + "step": 6781 + }, + { + "epoch": 0.85, + "learning_rate": 5.77155094238091e-07, + "loss": 0.5144, + "step": 6782 + }, + { + "epoch": 0.85, + "learning_rate": 5.762085360932179e-07, + "loss": 0.5569, + "step": 6783 + }, + { + "epoch": 0.85, + "learning_rate": 5.752627073205208e-07, + "loss": 0.5162, + "step": 6784 + }, + { + "epoch": 0.85, + "learning_rate": 5.743176080759444e-07, + "loss": 0.4418, + "step": 6785 + }, + { + "epoch": 0.85, + "learning_rate": 5.733732385153095e-07, + "loss": 0.4459, + "step": 6786 + }, + { + "epoch": 0.85, + "learning_rate": 5.724295987943201e-07, + "loss": 0.5137, + "step": 6787 + }, + { + "epoch": 0.85, + "learning_rate": 5.714866890685594e-07, + "loss": 0.5366, + "step": 6788 + }, + { + "epoch": 0.85, + "learning_rate": 5.70544509493488e-07, + "loss": 0.5408, + "step": 6789 + }, + { + "epoch": 0.85, + "learning_rate": 5.696030602244495e-07, + "loss": 0.4945, + "step": 6790 + }, + { + "epoch": 0.85, + "learning_rate": 5.686623414166648e-07, + "loss": 0.5218, + "step": 6791 + }, + { + "epoch": 0.85, + "learning_rate": 5.677223532252335e-07, + "loss": 0.524, + "step": 6792 + }, + { + "epoch": 0.85, + "learning_rate": 5.667830958051373e-07, + "loss": 0.2188, + "step": 6793 + }, + { + "epoch": 0.85, + "learning_rate": 5.658445693112352e-07, + "loss": 0.5038, + "step": 6794 + }, + { + "epoch": 0.85, + "learning_rate": 5.649067738982677e-07, + "loss": 0.5525, + "step": 6795 + }, + { + "epoch": 0.85, + "learning_rate": 5.639697097208529e-07, + "loss": 0.4994, + "step": 6796 + }, + { + "epoch": 0.85, + "learning_rate": 5.6303337693349e-07, + "loss": 0.4929, + "step": 6797 + }, + { + "epoch": 0.85, + "learning_rate": 5.620977756905571e-07, + "loss": 0.5725, + "step": 6798 + }, + { + "epoch": 0.85, + "learning_rate": 5.611629061463097e-07, + "loss": 0.2252, + "step": 6799 + }, + { + "epoch": 0.85, + "learning_rate": 5.602287684548852e-07, + "loss": 0.5519, + "step": 6800 + }, + { + "epoch": 0.85, + "learning_rate": 5.592953627702991e-07, + "loss": 0.4622, + "step": 6801 + }, + { + "epoch": 0.85, + "learning_rate": 5.583626892464472e-07, + "loss": 0.5034, + "step": 6802 + }, + { + "epoch": 0.85, + "learning_rate": 5.574307480371039e-07, + "loss": 0.486, + "step": 6803 + }, + { + "epoch": 0.85, + "learning_rate": 5.564995392959222e-07, + "loss": 0.5304, + "step": 6804 + }, + { + "epoch": 0.85, + "learning_rate": 5.555690631764371e-07, + "loss": 0.4758, + "step": 6805 + }, + { + "epoch": 0.85, + "learning_rate": 5.546393198320577e-07, + "loss": 0.5458, + "step": 6806 + }, + { + "epoch": 0.85, + "learning_rate": 5.537103094160773e-07, + "loss": 0.5505, + "step": 6807 + }, + { + "epoch": 0.85, + "learning_rate": 5.527820320816657e-07, + "loss": 0.559, + "step": 6808 + }, + { + "epoch": 0.85, + "learning_rate": 5.518544879818727e-07, + "loss": 0.5573, + "step": 6809 + }, + { + "epoch": 0.85, + "learning_rate": 5.509276772696275e-07, + "loss": 0.5244, + "step": 6810 + }, + { + "epoch": 0.85, + "learning_rate": 5.50001600097737e-07, + "loss": 0.4804, + "step": 6811 + }, + { + "epoch": 0.85, + "learning_rate": 5.4907625661889e-07, + "loss": 0.5341, + "step": 6812 + }, + { + "epoch": 0.85, + "learning_rate": 5.481516469856501e-07, + "loss": 0.5033, + "step": 6813 + }, + { + "epoch": 0.85, + "learning_rate": 5.472277713504631e-07, + "loss": 0.5105, + "step": 6814 + }, + { + "epoch": 0.85, + "learning_rate": 5.463046298656533e-07, + "loss": 0.2036, + "step": 6815 + }, + { + "epoch": 0.85, + "learning_rate": 5.453822226834227e-07, + "loss": 0.4983, + "step": 6816 + }, + { + "epoch": 0.85, + "learning_rate": 5.444605499558547e-07, + "loss": 0.4891, + "step": 6817 + }, + { + "epoch": 0.85, + "learning_rate": 5.435396118349079e-07, + "loss": 0.5794, + "step": 6818 + }, + { + "epoch": 0.85, + "learning_rate": 5.426194084724229e-07, + "loss": 0.5028, + "step": 6819 + }, + { + "epoch": 0.85, + "learning_rate": 5.416999400201183e-07, + "loss": 0.4779, + "step": 6820 + }, + { + "epoch": 0.86, + "learning_rate": 5.407812066295914e-07, + "loss": 0.497, + "step": 6821 + }, + { + "epoch": 0.86, + "learning_rate": 5.398632084523176e-07, + "loss": 0.5568, + "step": 6822 + }, + { + "epoch": 0.86, + "learning_rate": 5.389459456396534e-07, + "loss": 0.5332, + "step": 6823 + }, + { + "epoch": 0.86, + "learning_rate": 5.380294183428303e-07, + "loss": 0.5289, + "step": 6824 + }, + { + "epoch": 0.86, + "learning_rate": 5.371136267129617e-07, + "loss": 0.4748, + "step": 6825 + }, + { + "epoch": 0.86, + "learning_rate": 5.361985709010387e-07, + "loss": 0.4783, + "step": 6826 + }, + { + "epoch": 0.86, + "learning_rate": 5.352842510579304e-07, + "loss": 0.5192, + "step": 6827 + }, + { + "epoch": 0.86, + "learning_rate": 5.343706673343868e-07, + "loss": 0.5398, + "step": 6828 + }, + { + "epoch": 0.86, + "learning_rate": 5.334578198810331e-07, + "loss": 0.5737, + "step": 6829 + }, + { + "epoch": 0.86, + "learning_rate": 5.325457088483754e-07, + "loss": 0.4162, + "step": 6830 + }, + { + "epoch": 0.86, + "learning_rate": 5.316343343867986e-07, + "loss": 0.4955, + "step": 6831 + }, + { + "epoch": 0.86, + "learning_rate": 5.307236966465645e-07, + "loss": 0.4895, + "step": 6832 + }, + { + "epoch": 0.86, + "learning_rate": 5.298137957778155e-07, + "loss": 0.5092, + "step": 6833 + }, + { + "epoch": 0.86, + "learning_rate": 5.289046319305712e-07, + "loss": 0.4273, + "step": 6834 + }, + { + "epoch": 0.86, + "learning_rate": 5.279962052547288e-07, + "loss": 0.5384, + "step": 6835 + }, + { + "epoch": 0.86, + "learning_rate": 5.270885159000661e-07, + "loss": 0.4362, + "step": 6836 + }, + { + "epoch": 0.86, + "learning_rate": 5.261815640162377e-07, + "loss": 0.5754, + "step": 6837 + }, + { + "epoch": 0.86, + "learning_rate": 5.252753497527774e-07, + "loss": 0.4617, + "step": 6838 + }, + { + "epoch": 0.86, + "learning_rate": 5.243698732590985e-07, + "loss": 0.5259, + "step": 6839 + }, + { + "epoch": 0.86, + "learning_rate": 5.234651346844888e-07, + "loss": 0.5338, + "step": 6840 + }, + { + "epoch": 0.86, + "learning_rate": 5.22561134178119e-07, + "loss": 0.5401, + "step": 6841 + }, + { + "epoch": 0.86, + "learning_rate": 5.216578718890348e-07, + "loss": 0.537, + "step": 6842 + }, + { + "epoch": 0.86, + "learning_rate": 5.207553479661626e-07, + "loss": 0.5141, + "step": 6843 + }, + { + "epoch": 0.86, + "learning_rate": 5.198535625583057e-07, + "loss": 0.5277, + "step": 6844 + }, + { + "epoch": 0.86, + "learning_rate": 5.18952515814145e-07, + "loss": 0.51, + "step": 6845 + }, + { + "epoch": 0.86, + "learning_rate": 5.180522078822414e-07, + "loss": 0.4776, + "step": 6846 + }, + { + "epoch": 0.86, + "learning_rate": 5.171526389110327e-07, + "loss": 0.4498, + "step": 6847 + }, + { + "epoch": 0.86, + "learning_rate": 5.16253809048835e-07, + "loss": 0.5096, + "step": 6848 + }, + { + "epoch": 0.86, + "learning_rate": 5.153557184438434e-07, + "loss": 0.4929, + "step": 6849 + }, + { + "epoch": 0.86, + "learning_rate": 5.144583672441312e-07, + "loss": 0.4515, + "step": 6850 + }, + { + "epoch": 0.86, + "learning_rate": 5.135617555976469e-07, + "loss": 0.4891, + "step": 6851 + }, + { + "epoch": 0.86, + "learning_rate": 5.126658836522202e-07, + "loss": 0.5137, + "step": 6852 + }, + { + "epoch": 0.86, + "learning_rate": 5.117707515555586e-07, + "loss": 0.5114, + "step": 6853 + }, + { + "epoch": 0.86, + "learning_rate": 5.108763594552463e-07, + "loss": 0.5132, + "step": 6854 + }, + { + "epoch": 0.86, + "learning_rate": 5.099827074987473e-07, + "loss": 0.5123, + "step": 6855 + }, + { + "epoch": 0.86, + "learning_rate": 5.090897958334007e-07, + "loss": 0.5274, + "step": 6856 + }, + { + "epoch": 0.86, + "learning_rate": 5.081976246064252e-07, + "loss": 0.467, + "step": 6857 + }, + { + "epoch": 0.86, + "learning_rate": 5.073061939649188e-07, + "loss": 0.5365, + "step": 6858 + }, + { + "epoch": 0.86, + "learning_rate": 5.064155040558549e-07, + "loss": 0.4804, + "step": 6859 + }, + { + "epoch": 0.86, + "learning_rate": 5.055255550260874e-07, + "loss": 0.4888, + "step": 6860 + }, + { + "epoch": 0.86, + "learning_rate": 5.046363470223448e-07, + "loss": 0.4407, + "step": 6861 + }, + { + "epoch": 0.86, + "learning_rate": 5.037478801912355e-07, + "loss": 0.4484, + "step": 6862 + }, + { + "epoch": 0.86, + "learning_rate": 5.028601546792461e-07, + "loss": 0.5328, + "step": 6863 + }, + { + "epoch": 0.86, + "learning_rate": 5.019731706327396e-07, + "loss": 0.5176, + "step": 6864 + }, + { + "epoch": 0.86, + "learning_rate": 5.01086928197958e-07, + "loss": 0.5515, + "step": 6865 + }, + { + "epoch": 0.86, + "learning_rate": 5.002014275210215e-07, + "loss": 0.189, + "step": 6866 + }, + { + "epoch": 0.86, + "learning_rate": 4.99316668747924e-07, + "loss": 0.5044, + "step": 6867 + }, + { + "epoch": 0.86, + "learning_rate": 4.98432652024542e-07, + "loss": 0.499, + "step": 6868 + }, + { + "epoch": 0.86, + "learning_rate": 4.975493774966272e-07, + "loss": 0.5929, + "step": 6869 + }, + { + "epoch": 0.86, + "learning_rate": 4.966668453098095e-07, + "loss": 0.5514, + "step": 6870 + }, + { + "epoch": 0.86, + "learning_rate": 4.957850556095972e-07, + "loss": 0.5056, + "step": 6871 + }, + { + "epoch": 0.86, + "learning_rate": 4.949040085413736e-07, + "loss": 0.5032, + "step": 6872 + }, + { + "epoch": 0.86, + "learning_rate": 4.940237042504021e-07, + "loss": 0.5667, + "step": 6873 + }, + { + "epoch": 0.86, + "learning_rate": 4.931441428818223e-07, + "loss": 0.5543, + "step": 6874 + }, + { + "epoch": 0.86, + "learning_rate": 4.922653245806524e-07, + "loss": 0.5163, + "step": 6875 + }, + { + "epoch": 0.86, + "learning_rate": 4.913872494917887e-07, + "loss": 0.552, + "step": 6876 + }, + { + "epoch": 0.86, + "learning_rate": 4.905099177600009e-07, + "loss": 0.5258, + "step": 6877 + }, + { + "epoch": 0.86, + "learning_rate": 4.896333295299405e-07, + "loss": 0.517, + "step": 6878 + }, + { + "epoch": 0.86, + "learning_rate": 4.887574849461352e-07, + "loss": 0.569, + "step": 6879 + }, + { + "epoch": 0.86, + "learning_rate": 4.878823841529889e-07, + "loss": 0.5293, + "step": 6880 + }, + { + "epoch": 0.86, + "learning_rate": 4.870080272947841e-07, + "loss": 0.5287, + "step": 6881 + }, + { + "epoch": 0.86, + "learning_rate": 4.861344145156821e-07, + "loss": 0.4799, + "step": 6882 + }, + { + "epoch": 0.86, + "learning_rate": 4.852615459597165e-07, + "loss": 0.5211, + "step": 6883 + }, + { + "epoch": 0.86, + "learning_rate": 4.843894217708034e-07, + "loss": 0.51, + "step": 6884 + }, + { + "epoch": 0.86, + "learning_rate": 4.835180420927338e-07, + "loss": 0.5186, + "step": 6885 + }, + { + "epoch": 0.86, + "learning_rate": 4.826474070691761e-07, + "loss": 0.4857, + "step": 6886 + }, + { + "epoch": 0.86, + "learning_rate": 4.817775168436773e-07, + "loss": 0.4581, + "step": 6887 + }, + { + "epoch": 0.86, + "learning_rate": 4.809083715596585e-07, + "loss": 0.1969, + "step": 6888 + }, + { + "epoch": 0.86, + "learning_rate": 4.800399713604214e-07, + "loss": 0.4675, + "step": 6889 + }, + { + "epoch": 0.86, + "learning_rate": 4.791723163891427e-07, + "loss": 0.4805, + "step": 6890 + }, + { + "epoch": 0.86, + "learning_rate": 4.783054067888776e-07, + "loss": 0.4712, + "step": 6891 + }, + { + "epoch": 0.86, + "learning_rate": 4.774392427025576e-07, + "loss": 0.5644, + "step": 6892 + }, + { + "epoch": 0.86, + "learning_rate": 4.765738242729906e-07, + "loss": 0.4862, + "step": 6893 + }, + { + "epoch": 0.86, + "learning_rate": 4.75709151642863e-07, + "loss": 0.5523, + "step": 6894 + }, + { + "epoch": 0.86, + "learning_rate": 4.748452249547375e-07, + "loss": 0.2178, + "step": 6895 + }, + { + "epoch": 0.86, + "learning_rate": 4.73982044351054e-07, + "loss": 0.5491, + "step": 6896 + }, + { + "epoch": 0.86, + "learning_rate": 4.731196099741292e-07, + "loss": 0.478, + "step": 6897 + }, + { + "epoch": 0.86, + "learning_rate": 4.722579219661583e-07, + "loss": 0.5251, + "step": 6898 + }, + { + "epoch": 0.86, + "learning_rate": 4.7139698046920925e-07, + "loss": 0.5655, + "step": 6899 + }, + { + "epoch": 0.86, + "learning_rate": 4.705367856252313e-07, + "loss": 0.5194, + "step": 6900 + }, + { + "epoch": 0.87, + "learning_rate": 4.696773375760494e-07, + "loss": 0.5059, + "step": 6901 + }, + { + "epoch": 0.87, + "learning_rate": 4.6881863646336346e-07, + "loss": 0.5085, + "step": 6902 + }, + { + "epoch": 0.87, + "learning_rate": 4.679606824287536e-07, + "loss": 0.48, + "step": 6903 + }, + { + "epoch": 0.87, + "learning_rate": 4.671034756136744e-07, + "loss": 0.5323, + "step": 6904 + }, + { + "epoch": 0.87, + "learning_rate": 4.6624701615945624e-07, + "loss": 0.5802, + "step": 6905 + }, + { + "epoch": 0.87, + "learning_rate": 4.653913042073088e-07, + "loss": 0.5767, + "step": 6906 + }, + { + "epoch": 0.87, + "learning_rate": 4.6453633989831826e-07, + "loss": 0.4934, + "step": 6907 + }, + { + "epoch": 0.87, + "learning_rate": 4.6368212337344566e-07, + "loss": 0.5143, + "step": 6908 + }, + { + "epoch": 0.87, + "learning_rate": 4.6282865477353003e-07, + "loss": 0.4271, + "step": 6909 + }, + { + "epoch": 0.87, + "learning_rate": 4.6197593423928763e-07, + "loss": 0.4837, + "step": 6910 + }, + { + "epoch": 0.87, + "learning_rate": 4.6112396191131117e-07, + "loss": 0.5458, + "step": 6911 + }, + { + "epoch": 0.87, + "learning_rate": 4.6027273793006754e-07, + "loss": 0.452, + "step": 6912 + }, + { + "epoch": 0.87, + "learning_rate": 4.594222624359029e-07, + "loss": 0.4738, + "step": 6913 + }, + { + "epoch": 0.87, + "learning_rate": 4.585725355690401e-07, + "loss": 0.2581, + "step": 6914 + }, + { + "epoch": 0.87, + "learning_rate": 4.577235574695771e-07, + "loss": 0.503, + "step": 6915 + }, + { + "epoch": 0.87, + "learning_rate": 4.568753282774896e-07, + "loss": 0.5002, + "step": 6916 + }, + { + "epoch": 0.87, + "learning_rate": 4.560278481326286e-07, + "loss": 0.5498, + "step": 6917 + }, + { + "epoch": 0.87, + "learning_rate": 4.551811171747239e-07, + "loss": 0.5112, + "step": 6918 + }, + { + "epoch": 0.87, + "learning_rate": 4.5433513554337784e-07, + "loss": 0.4422, + "step": 6919 + }, + { + "epoch": 0.87, + "learning_rate": 4.534899033780732e-07, + "loss": 0.4951, + "step": 6920 + }, + { + "epoch": 0.87, + "learning_rate": 4.526454208181663e-07, + "loss": 0.4796, + "step": 6921 + }, + { + "epoch": 0.87, + "learning_rate": 4.518016880028925e-07, + "loss": 0.4837, + "step": 6922 + }, + { + "epoch": 0.87, + "learning_rate": 4.5095870507136096e-07, + "loss": 0.555, + "step": 6923 + }, + { + "epoch": 0.87, + "learning_rate": 4.501164721625595e-07, + "loss": 0.5424, + "step": 6924 + }, + { + "epoch": 0.87, + "learning_rate": 4.4927498941535144e-07, + "loss": 0.574, + "step": 6925 + }, + { + "epoch": 0.87, + "learning_rate": 4.4843425696847463e-07, + "loss": 0.5754, + "step": 6926 + }, + { + "epoch": 0.87, + "learning_rate": 4.47594274960545e-07, + "loss": 0.5147, + "step": 6927 + }, + { + "epoch": 0.87, + "learning_rate": 4.467550435300555e-07, + "loss": 0.4421, + "step": 6928 + }, + { + "epoch": 0.87, + "learning_rate": 4.4591656281537385e-07, + "loss": 0.499, + "step": 6929 + }, + { + "epoch": 0.87, + "learning_rate": 4.4507883295474506e-07, + "loss": 0.4787, + "step": 6930 + }, + { + "epoch": 0.87, + "learning_rate": 4.4424185408628864e-07, + "loss": 0.4789, + "step": 6931 + }, + { + "epoch": 0.87, + "learning_rate": 4.434056263480013e-07, + "loss": 0.5441, + "step": 6932 + }, + { + "epoch": 0.87, + "learning_rate": 4.4257014987775736e-07, + "loss": 0.4914, + "step": 6933 + }, + { + "epoch": 0.87, + "learning_rate": 4.417354248133049e-07, + "loss": 0.4666, + "step": 6934 + }, + { + "epoch": 0.87, + "learning_rate": 4.409014512922699e-07, + "loss": 0.5281, + "step": 6935 + }, + { + "epoch": 0.87, + "learning_rate": 4.40068229452153e-07, + "loss": 0.4786, + "step": 6936 + }, + { + "epoch": 0.87, + "learning_rate": 4.3923575943033146e-07, + "loss": 0.5834, + "step": 6937 + }, + { + "epoch": 0.87, + "learning_rate": 4.384040413640589e-07, + "loss": 0.595, + "step": 6938 + }, + { + "epoch": 0.87, + "learning_rate": 4.3757307539046446e-07, + "loss": 0.5312, + "step": 6939 + }, + { + "epoch": 0.87, + "learning_rate": 4.367428616465547e-07, + "loss": 0.546, + "step": 6940 + }, + { + "epoch": 0.87, + "learning_rate": 4.3591340026921056e-07, + "loss": 0.4768, + "step": 6941 + }, + { + "epoch": 0.87, + "learning_rate": 4.350846913951884e-07, + "loss": 0.457, + "step": 6942 + }, + { + "epoch": 0.87, + "learning_rate": 4.342567351611221e-07, + "loss": 0.4681, + "step": 6943 + }, + { + "epoch": 0.87, + "learning_rate": 4.334295317035214e-07, + "loss": 0.2239, + "step": 6944 + }, + { + "epoch": 0.87, + "learning_rate": 4.326030811587706e-07, + "loss": 0.5373, + "step": 6945 + }, + { + "epoch": 0.87, + "learning_rate": 4.3177738366313225e-07, + "loss": 0.5513, + "step": 6946 + }, + { + "epoch": 0.87, + "learning_rate": 4.3095243935274087e-07, + "loss": 0.5319, + "step": 6947 + }, + { + "epoch": 0.87, + "learning_rate": 4.301282483636099e-07, + "loss": 0.4697, + "step": 6948 + }, + { + "epoch": 0.87, + "learning_rate": 4.293048108316283e-07, + "loss": 0.4976, + "step": 6949 + }, + { + "epoch": 0.87, + "learning_rate": 4.284821268925599e-07, + "loss": 0.4657, + "step": 6950 + }, + { + "epoch": 0.87, + "learning_rate": 4.276601966820454e-07, + "loss": 0.5035, + "step": 6951 + }, + { + "epoch": 0.87, + "learning_rate": 4.2683902033559876e-07, + "loss": 0.4986, + "step": 6952 + }, + { + "epoch": 0.87, + "learning_rate": 4.260185979886122e-07, + "loss": 0.5223, + "step": 6953 + }, + { + "epoch": 0.87, + "learning_rate": 4.2519892977635313e-07, + "loss": 0.5055, + "step": 6954 + }, + { + "epoch": 0.87, + "learning_rate": 4.243800158339639e-07, + "loss": 0.4933, + "step": 6955 + }, + { + "epoch": 0.87, + "learning_rate": 4.235618562964627e-07, + "loss": 0.5328, + "step": 6956 + }, + { + "epoch": 0.87, + "learning_rate": 4.2274445129874496e-07, + "loss": 0.5045, + "step": 6957 + }, + { + "epoch": 0.87, + "learning_rate": 4.219278009755784e-07, + "loss": 0.5459, + "step": 6958 + }, + { + "epoch": 0.87, + "learning_rate": 4.211119054616086e-07, + "loss": 0.4877, + "step": 6959 + }, + { + "epoch": 0.87, + "learning_rate": 4.202967648913564e-07, + "loss": 0.5501, + "step": 6960 + }, + { + "epoch": 0.87, + "learning_rate": 4.194823793992181e-07, + "loss": 0.5193, + "step": 6961 + }, + { + "epoch": 0.87, + "learning_rate": 4.186687491194663e-07, + "loss": 0.481, + "step": 6962 + }, + { + "epoch": 0.87, + "learning_rate": 4.1785587418624653e-07, + "loss": 0.5378, + "step": 6963 + }, + { + "epoch": 0.87, + "learning_rate": 4.1704375473358204e-07, + "loss": 0.4933, + "step": 6964 + }, + { + "epoch": 0.87, + "learning_rate": 4.162323908953714e-07, + "loss": 0.5035, + "step": 6965 + }, + { + "epoch": 0.87, + "learning_rate": 4.1542178280538746e-07, + "loss": 0.4824, + "step": 6966 + }, + { + "epoch": 0.87, + "learning_rate": 4.146119305972801e-07, + "loss": 0.5098, + "step": 6967 + }, + { + "epoch": 0.87, + "learning_rate": 4.1380283440457414e-07, + "loss": 0.5243, + "step": 6968 + }, + { + "epoch": 0.87, + "learning_rate": 4.129944943606667e-07, + "loss": 0.4868, + "step": 6969 + }, + { + "epoch": 0.87, + "learning_rate": 4.1218691059883467e-07, + "loss": 0.4698, + "step": 6970 + }, + { + "epoch": 0.87, + "learning_rate": 4.11380083252228e-07, + "loss": 0.5124, + "step": 6971 + }, + { + "epoch": 0.87, + "learning_rate": 4.1057401245387207e-07, + "loss": 0.4668, + "step": 6972 + }, + { + "epoch": 0.87, + "learning_rate": 4.097686983366689e-07, + "loss": 0.4849, + "step": 6973 + }, + { + "epoch": 0.87, + "learning_rate": 4.0896414103339276e-07, + "loss": 0.52, + "step": 6974 + }, + { + "epoch": 0.87, + "learning_rate": 4.081603406766954e-07, + "loss": 0.5222, + "step": 6975 + }, + { + "epoch": 0.87, + "learning_rate": 4.0735729739910414e-07, + "loss": 0.5373, + "step": 6976 + }, + { + "epoch": 0.87, + "learning_rate": 4.0655501133301965e-07, + "loss": 0.4825, + "step": 6977 + }, + { + "epoch": 0.87, + "learning_rate": 4.057534826107207e-07, + "loss": 0.5278, + "step": 6978 + }, + { + "epoch": 0.87, + "learning_rate": 4.0495271136435643e-07, + "loss": 0.544, + "step": 6979 + }, + { + "epoch": 0.87, + "learning_rate": 4.041526977259558e-07, + "loss": 0.5637, + "step": 6980 + }, + { + "epoch": 0.88, + "learning_rate": 4.0335344182741986e-07, + "loss": 0.519, + "step": 6981 + }, + { + "epoch": 0.88, + "learning_rate": 4.025549438005272e-07, + "loss": 0.5712, + "step": 6982 + }, + { + "epoch": 0.88, + "learning_rate": 4.0175720377692904e-07, + "loss": 0.4962, + "step": 6983 + }, + { + "epoch": 0.88, + "learning_rate": 4.009602218881542e-07, + "loss": 0.5142, + "step": 6984 + }, + { + "epoch": 0.88, + "learning_rate": 4.00163998265603e-07, + "loss": 0.538, + "step": 6985 + }, + { + "epoch": 0.88, + "learning_rate": 3.993685330405533e-07, + "loss": 0.496, + "step": 6986 + }, + { + "epoch": 0.88, + "learning_rate": 3.9857382634415797e-07, + "loss": 0.4831, + "step": 6987 + }, + { + "epoch": 0.88, + "learning_rate": 3.977798783074438e-07, + "loss": 0.5348, + "step": 6988 + }, + { + "epoch": 0.88, + "learning_rate": 3.9698668906131334e-07, + "loss": 0.4371, + "step": 6989 + }, + { + "epoch": 0.88, + "learning_rate": 3.9619425873654247e-07, + "loss": 0.4986, + "step": 6990 + }, + { + "epoch": 0.88, + "learning_rate": 3.95402587463784e-07, + "loss": 0.4893, + "step": 6991 + }, + { + "epoch": 0.88, + "learning_rate": 3.946116753735646e-07, + "loss": 0.509, + "step": 6992 + }, + { + "epoch": 0.88, + "learning_rate": 3.938215225962855e-07, + "loss": 0.5422, + "step": 6993 + }, + { + "epoch": 0.88, + "learning_rate": 3.930321292622241e-07, + "loss": 0.5223, + "step": 6994 + }, + { + "epoch": 0.88, + "learning_rate": 3.9224349550152974e-07, + "loss": 0.6017, + "step": 6995 + }, + { + "epoch": 0.88, + "learning_rate": 3.9145562144422943e-07, + "loss": 0.497, + "step": 6996 + }, + { + "epoch": 0.88, + "learning_rate": 3.906685072202238e-07, + "loss": 0.5493, + "step": 6997 + }, + { + "epoch": 0.88, + "learning_rate": 3.8988215295928775e-07, + "loss": 0.4998, + "step": 6998 + }, + { + "epoch": 0.88, + "learning_rate": 3.8909655879107167e-07, + "loss": 0.4848, + "step": 6999 + }, + { + "epoch": 0.88, + "learning_rate": 3.8831172484510126e-07, + "loss": 0.505, + "step": 7000 + }, + { + "epoch": 0.88, + "learning_rate": 3.875276512507742e-07, + "loss": 0.5404, + "step": 7001 + }, + { + "epoch": 0.88, + "learning_rate": 3.8674433813736534e-07, + "loss": 0.5306, + "step": 7002 + }, + { + "epoch": 0.88, + "learning_rate": 3.8596178563402366e-07, + "loss": 0.5044, + "step": 7003 + }, + { + "epoch": 0.88, + "learning_rate": 3.8517999386977143e-07, + "loss": 0.5307, + "step": 7004 + }, + { + "epoch": 0.88, + "learning_rate": 3.8439896297350896e-07, + "loss": 0.5183, + "step": 7005 + }, + { + "epoch": 0.88, + "learning_rate": 3.8361869307400544e-07, + "loss": 0.5248, + "step": 7006 + }, + { + "epoch": 0.88, + "learning_rate": 3.828391842999096e-07, + "loss": 0.472, + "step": 7007 + }, + { + "epoch": 0.88, + "learning_rate": 3.82060436779742e-07, + "loss": 0.4499, + "step": 7008 + }, + { + "epoch": 0.88, + "learning_rate": 3.8128245064189997e-07, + "loss": 0.494, + "step": 7009 + }, + { + "epoch": 0.88, + "learning_rate": 3.80505226014653e-07, + "loss": 0.4154, + "step": 7010 + }, + { + "epoch": 0.88, + "learning_rate": 3.7972876302614535e-07, + "loss": 0.521, + "step": 7011 + }, + { + "epoch": 0.88, + "learning_rate": 3.7895306180439683e-07, + "loss": 0.4624, + "step": 7012 + }, + { + "epoch": 0.88, + "learning_rate": 3.7817812247730125e-07, + "loss": 0.4796, + "step": 7013 + }, + { + "epoch": 0.88, + "learning_rate": 3.7740394517262637e-07, + "loss": 0.519, + "step": 7014 + }, + { + "epoch": 0.88, + "learning_rate": 3.766305300180151e-07, + "loss": 0.5091, + "step": 7015 + }, + { + "epoch": 0.88, + "learning_rate": 3.7585787714098377e-07, + "loss": 0.2161, + "step": 7016 + }, + { + "epoch": 0.88, + "learning_rate": 3.7508598666892384e-07, + "loss": 0.5314, + "step": 7017 + }, + { + "epoch": 0.88, + "learning_rate": 3.7431485872910014e-07, + "loss": 0.5465, + "step": 7018 + }, + { + "epoch": 0.88, + "learning_rate": 3.7354449344865206e-07, + "loss": 0.4995, + "step": 7019 + }, + { + "epoch": 0.88, + "learning_rate": 3.727748909545942e-07, + "loss": 0.5524, + "step": 7020 + }, + { + "epoch": 0.88, + "learning_rate": 3.720060513738144e-07, + "loss": 0.5206, + "step": 7021 + }, + { + "epoch": 0.88, + "learning_rate": 3.712379748330747e-07, + "loss": 0.5527, + "step": 7022 + }, + { + "epoch": 0.88, + "learning_rate": 3.704706614590126e-07, + "loss": 0.4747, + "step": 7023 + }, + { + "epoch": 0.88, + "learning_rate": 3.6970411137813855e-07, + "loss": 0.5144, + "step": 7024 + }, + { + "epoch": 0.88, + "learning_rate": 3.6893832471683655e-07, + "loss": 0.4829, + "step": 7025 + }, + { + "epoch": 0.88, + "learning_rate": 3.6817330160136613e-07, + "loss": 0.4863, + "step": 7026 + }, + { + "epoch": 0.88, + "learning_rate": 3.674090421578608e-07, + "loss": 0.5272, + "step": 7027 + }, + { + "epoch": 0.88, + "learning_rate": 3.666455465123264e-07, + "loss": 0.5459, + "step": 7028 + }, + { + "epoch": 0.88, + "learning_rate": 3.6588281479064626e-07, + "loss": 0.4887, + "step": 7029 + }, + { + "epoch": 0.88, + "learning_rate": 3.6512084711857464e-07, + "loss": 0.5082, + "step": 7030 + }, + { + "epoch": 0.88, + "learning_rate": 3.6435964362174e-07, + "loss": 0.4824, + "step": 7031 + }, + { + "epoch": 0.88, + "learning_rate": 3.63599204425647e-07, + "loss": 0.4546, + "step": 7032 + }, + { + "epoch": 0.88, + "learning_rate": 3.628395296556725e-07, + "loss": 0.5315, + "step": 7033 + }, + { + "epoch": 0.88, + "learning_rate": 3.620806194370674e-07, + "loss": 0.5152, + "step": 7034 + }, + { + "epoch": 0.88, + "learning_rate": 3.613224738949578e-07, + "loss": 0.5112, + "step": 7035 + }, + { + "epoch": 0.88, + "learning_rate": 3.605650931543425e-07, + "loss": 0.5492, + "step": 7036 + }, + { + "epoch": 0.88, + "learning_rate": 3.5980847734009503e-07, + "loss": 0.4351, + "step": 7037 + }, + { + "epoch": 0.88, + "learning_rate": 3.590526265769606e-07, + "loss": 0.5021, + "step": 7038 + }, + { + "epoch": 0.88, + "learning_rate": 3.582975409895617e-07, + "loss": 0.5851, + "step": 7039 + }, + { + "epoch": 0.88, + "learning_rate": 3.575432207023927e-07, + "loss": 0.4896, + "step": 7040 + }, + { + "epoch": 0.88, + "learning_rate": 3.567896658398218e-07, + "loss": 0.5812, + "step": 7041 + }, + { + "epoch": 0.88, + "learning_rate": 3.560368765260913e-07, + "loss": 0.51, + "step": 7042 + }, + { + "epoch": 0.88, + "learning_rate": 3.552848528853187e-07, + "loss": 0.4547, + "step": 7043 + }, + { + "epoch": 0.88, + "learning_rate": 3.5453359504149133e-07, + "loss": 0.5501, + "step": 7044 + }, + { + "epoch": 0.88, + "learning_rate": 3.537831031184735e-07, + "loss": 0.589, + "step": 7045 + }, + { + "epoch": 0.88, + "learning_rate": 3.530333772400035e-07, + "loss": 0.2054, + "step": 7046 + }, + { + "epoch": 0.88, + "learning_rate": 3.522844175296913e-07, + "loss": 0.5236, + "step": 7047 + }, + { + "epoch": 0.88, + "learning_rate": 3.5153622411102315e-07, + "loss": 0.5803, + "step": 7048 + }, + { + "epoch": 0.88, + "learning_rate": 3.5078879710735535e-07, + "loss": 0.4964, + "step": 7049 + }, + { + "epoch": 0.88, + "learning_rate": 3.500421366419204e-07, + "loss": 0.4952, + "step": 7050 + }, + { + "epoch": 0.88, + "learning_rate": 3.492962428378244e-07, + "loss": 0.502, + "step": 7051 + }, + { + "epoch": 0.88, + "learning_rate": 3.48551115818046e-07, + "loss": 0.4825, + "step": 7052 + }, + { + "epoch": 0.88, + "learning_rate": 3.478067557054393e-07, + "loss": 0.5544, + "step": 7053 + }, + { + "epoch": 0.88, + "learning_rate": 3.470631626227283e-07, + "loss": 0.5388, + "step": 7054 + }, + { + "epoch": 0.88, + "learning_rate": 3.4632033669251444e-07, + "loss": 0.5494, + "step": 7055 + }, + { + "epoch": 0.88, + "learning_rate": 3.455782780372707e-07, + "loss": 0.5828, + "step": 7056 + }, + { + "epoch": 0.88, + "learning_rate": 3.448369867793438e-07, + "loss": 0.535, + "step": 7057 + }, + { + "epoch": 0.88, + "learning_rate": 3.4409646304095436e-07, + "loss": 0.4995, + "step": 7058 + }, + { + "epoch": 0.88, + "learning_rate": 3.4335670694419677e-07, + "loss": 0.48, + "step": 7059 + }, + { + "epoch": 0.88, + "learning_rate": 3.4261771861103635e-07, + "loss": 0.4989, + "step": 7060 + }, + { + "epoch": 0.89, + "learning_rate": 3.41879498163315e-07, + "loss": 0.5875, + "step": 7061 + }, + { + "epoch": 0.89, + "learning_rate": 3.411420457227466e-07, + "loss": 0.5622, + "step": 7062 + }, + { + "epoch": 0.89, + "learning_rate": 3.4040536141091873e-07, + "loss": 0.5334, + "step": 7063 + }, + { + "epoch": 0.89, + "learning_rate": 3.396694453492927e-07, + "loss": 0.5155, + "step": 7064 + }, + { + "epoch": 0.89, + "learning_rate": 3.3893429765920085e-07, + "loss": 0.4781, + "step": 7065 + }, + { + "epoch": 0.89, + "learning_rate": 3.381999184618512e-07, + "loss": 0.4939, + "step": 7066 + }, + { + "epoch": 0.89, + "learning_rate": 3.374663078783252e-07, + "loss": 0.5559, + "step": 7067 + }, + { + "epoch": 0.89, + "learning_rate": 3.367334660295762e-07, + "loss": 0.4306, + "step": 7068 + }, + { + "epoch": 0.89, + "learning_rate": 3.3600139303643254e-07, + "loss": 0.4804, + "step": 7069 + }, + { + "epoch": 0.89, + "learning_rate": 3.352700890195931e-07, + "loss": 0.2183, + "step": 7070 + }, + { + "epoch": 0.89, + "learning_rate": 3.345395540996327e-07, + "loss": 0.2329, + "step": 7071 + }, + { + "epoch": 0.89, + "learning_rate": 3.3380978839699706e-07, + "loss": 0.5103, + "step": 7072 + }, + { + "epoch": 0.89, + "learning_rate": 3.330807920320073e-07, + "loss": 0.5169, + "step": 7073 + }, + { + "epoch": 0.89, + "learning_rate": 3.32352565124856e-07, + "loss": 0.5387, + "step": 7074 + }, + { + "epoch": 0.89, + "learning_rate": 3.3162510779561063e-07, + "loss": 0.5051, + "step": 7075 + }, + { + "epoch": 0.89, + "learning_rate": 3.3089842016420905e-07, + "loss": 0.5396, + "step": 7076 + }, + { + "epoch": 0.89, + "learning_rate": 3.301725023504643e-07, + "loss": 0.4984, + "step": 7077 + }, + { + "epoch": 0.89, + "learning_rate": 3.294473544740623e-07, + "loss": 0.4558, + "step": 7078 + }, + { + "epoch": 0.89, + "learning_rate": 3.2872297665456185e-07, + "loss": 0.531, + "step": 7079 + }, + { + "epoch": 0.89, + "learning_rate": 3.279993690113953e-07, + "loss": 0.5276, + "step": 7080 + }, + { + "epoch": 0.89, + "learning_rate": 3.2727653166386596e-07, + "loss": 0.5115, + "step": 7081 + }, + { + "epoch": 0.89, + "learning_rate": 3.265544647311519e-07, + "loss": 0.4501, + "step": 7082 + }, + { + "epoch": 0.89, + "learning_rate": 3.2583316833230405e-07, + "loss": 0.5349, + "step": 7083 + }, + { + "epoch": 0.89, + "learning_rate": 3.251126425862461e-07, + "loss": 0.5475, + "step": 7084 + }, + { + "epoch": 0.89, + "learning_rate": 3.243928876117758e-07, + "loss": 0.5002, + "step": 7085 + }, + { + "epoch": 0.89, + "learning_rate": 3.2367390352756045e-07, + "loss": 0.5181, + "step": 7086 + }, + { + "epoch": 0.89, + "learning_rate": 3.2295569045214413e-07, + "loss": 0.6282, + "step": 7087 + }, + { + "epoch": 0.89, + "learning_rate": 3.2223824850394104e-07, + "loss": 0.5251, + "step": 7088 + }, + { + "epoch": 0.89, + "learning_rate": 3.215215778012404e-07, + "loss": 0.5032, + "step": 7089 + }, + { + "epoch": 0.89, + "learning_rate": 3.2080567846220266e-07, + "loss": 0.4389, + "step": 7090 + }, + { + "epoch": 0.89, + "learning_rate": 3.200905506048624e-07, + "loss": 0.5139, + "step": 7091 + }, + { + "epoch": 0.89, + "learning_rate": 3.1937619434712464e-07, + "loss": 0.4686, + "step": 7092 + }, + { + "epoch": 0.89, + "learning_rate": 3.186626098067702e-07, + "loss": 0.4401, + "step": 7093 + }, + { + "epoch": 0.89, + "learning_rate": 3.179497971014506e-07, + "loss": 0.496, + "step": 7094 + }, + { + "epoch": 0.89, + "learning_rate": 3.1723775634869113e-07, + "loss": 0.5506, + "step": 7095 + }, + { + "epoch": 0.89, + "learning_rate": 3.1652648766588966e-07, + "loss": 0.531, + "step": 7096 + }, + { + "epoch": 0.89, + "learning_rate": 3.1581599117031624e-07, + "loss": 0.4919, + "step": 7097 + }, + { + "epoch": 0.89, + "learning_rate": 3.151062669791133e-07, + "loss": 0.5359, + "step": 7098 + }, + { + "epoch": 0.89, + "learning_rate": 3.143973152092972e-07, + "loss": 0.4982, + "step": 7099 + }, + { + "epoch": 0.89, + "learning_rate": 3.1368913597775606e-07, + "loss": 0.4953, + "step": 7100 + }, + { + "epoch": 0.89, + "learning_rate": 3.129817294012516e-07, + "loss": 0.5117, + "step": 7101 + }, + { + "epoch": 0.89, + "learning_rate": 3.122750955964166e-07, + "loss": 0.5342, + "step": 7102 + }, + { + "epoch": 0.89, + "learning_rate": 3.1156923467975665e-07, + "loss": 0.5237, + "step": 7103 + }, + { + "epoch": 0.89, + "learning_rate": 3.1086414676765206e-07, + "loss": 0.4895, + "step": 7104 + }, + { + "epoch": 0.89, + "learning_rate": 3.1015983197635327e-07, + "loss": 0.4903, + "step": 7105 + }, + { + "epoch": 0.89, + "learning_rate": 3.094562904219839e-07, + "loss": 0.5082, + "step": 7106 + }, + { + "epoch": 0.89, + "learning_rate": 3.087535222205412e-07, + "loss": 0.4965, + "step": 7107 + }, + { + "epoch": 0.89, + "learning_rate": 3.0805152748789314e-07, + "loss": 0.5, + "step": 7108 + }, + { + "epoch": 0.89, + "learning_rate": 3.073503063397809e-07, + "loss": 0.5176, + "step": 7109 + }, + { + "epoch": 0.89, + "learning_rate": 3.0664985889181873e-07, + "loss": 0.4558, + "step": 7110 + }, + { + "epoch": 0.89, + "learning_rate": 3.059501852594926e-07, + "loss": 0.4982, + "step": 7111 + }, + { + "epoch": 0.89, + "learning_rate": 3.0525128555816243e-07, + "loss": 0.5225, + "step": 7112 + }, + { + "epoch": 0.89, + "learning_rate": 3.045531599030571e-07, + "loss": 0.5626, + "step": 7113 + }, + { + "epoch": 0.89, + "learning_rate": 3.0385580840928077e-07, + "loss": 0.4755, + "step": 7114 + }, + { + "epoch": 0.89, + "learning_rate": 3.031592311918097e-07, + "loss": 0.5212, + "step": 7115 + }, + { + "epoch": 0.89, + "learning_rate": 3.0246342836549145e-07, + "loss": 0.4802, + "step": 7116 + }, + { + "epoch": 0.89, + "learning_rate": 3.017684000450466e-07, + "loss": 0.4554, + "step": 7117 + }, + { + "epoch": 0.89, + "learning_rate": 3.0107414634506893e-07, + "loss": 0.5337, + "step": 7118 + }, + { + "epoch": 0.89, + "learning_rate": 3.003806673800219e-07, + "loss": 0.5063, + "step": 7119 + }, + { + "epoch": 0.89, + "learning_rate": 2.9968796326424355e-07, + "loss": 0.461, + "step": 7120 + }, + { + "epoch": 0.89, + "learning_rate": 2.98996034111943e-07, + "loss": 0.5198, + "step": 7121 + }, + { + "epoch": 0.89, + "learning_rate": 2.9830488003720183e-07, + "loss": 0.5402, + "step": 7122 + }, + { + "epoch": 0.89, + "learning_rate": 2.976145011539755e-07, + "loss": 0.4595, + "step": 7123 + }, + { + "epoch": 0.89, + "learning_rate": 2.96924897576088e-07, + "loss": 0.5419, + "step": 7124 + }, + { + "epoch": 0.89, + "learning_rate": 2.962360694172395e-07, + "loss": 0.5366, + "step": 7125 + }, + { + "epoch": 0.89, + "learning_rate": 2.955480167909991e-07, + "loss": 0.4934, + "step": 7126 + }, + { + "epoch": 0.89, + "learning_rate": 2.9486073981080997e-07, + "loss": 0.5455, + "step": 7127 + }, + { + "epoch": 0.89, + "learning_rate": 2.941742385899865e-07, + "loss": 0.5738, + "step": 7128 + }, + { + "epoch": 0.89, + "learning_rate": 2.93488513241717e-07, + "loss": 0.4601, + "step": 7129 + }, + { + "epoch": 0.89, + "learning_rate": 2.928035638790588e-07, + "loss": 0.4794, + "step": 7130 + }, + { + "epoch": 0.89, + "learning_rate": 2.9211939061494275e-07, + "loss": 0.4846, + "step": 7131 + }, + { + "epoch": 0.89, + "learning_rate": 2.91435993562173e-07, + "loss": 0.4756, + "step": 7132 + }, + { + "epoch": 0.89, + "learning_rate": 2.9075337283342333e-07, + "loss": 0.473, + "step": 7133 + }, + { + "epoch": 0.89, + "learning_rate": 2.9007152854124197e-07, + "loss": 0.4762, + "step": 7134 + }, + { + "epoch": 0.89, + "learning_rate": 2.893904607980469e-07, + "loss": 0.5351, + "step": 7135 + }, + { + "epoch": 0.89, + "learning_rate": 2.8871016971613044e-07, + "loss": 0.5538, + "step": 7136 + }, + { + "epoch": 0.89, + "learning_rate": 2.8803065540765394e-07, + "loss": 0.6044, + "step": 7137 + }, + { + "epoch": 0.89, + "learning_rate": 2.8735191798465236e-07, + "loss": 0.4835, + "step": 7138 + }, + { + "epoch": 0.89, + "learning_rate": 2.866739575590338e-07, + "loss": 0.433, + "step": 7139 + }, + { + "epoch": 0.89, + "learning_rate": 2.8599677424257567e-07, + "loss": 0.5212, + "step": 7140 + }, + { + "epoch": 0.9, + "learning_rate": 2.853203681469291e-07, + "loss": 0.5695, + "step": 7141 + }, + { + "epoch": 0.9, + "learning_rate": 2.846447393836166e-07, + "loss": 0.2074, + "step": 7142 + }, + { + "epoch": 0.9, + "learning_rate": 2.839698880640329e-07, + "loss": 0.5364, + "step": 7143 + }, + { + "epoch": 0.9, + "learning_rate": 2.832958142994424e-07, + "loss": 0.4348, + "step": 7144 + }, + { + "epoch": 0.9, + "learning_rate": 2.82622518200984e-07, + "loss": 0.5416, + "step": 7145 + }, + { + "epoch": 0.9, + "learning_rate": 2.8194999987966707e-07, + "loss": 0.5285, + "step": 7146 + }, + { + "epoch": 0.9, + "learning_rate": 2.812782594463731e-07, + "loss": 0.6028, + "step": 7147 + }, + { + "epoch": 0.9, + "learning_rate": 2.806072970118551e-07, + "loss": 0.2268, + "step": 7148 + }, + { + "epoch": 0.9, + "learning_rate": 2.799371126867384e-07, + "loss": 0.5405, + "step": 7149 + }, + { + "epoch": 0.9, + "learning_rate": 2.792677065815197e-07, + "loss": 0.5742, + "step": 7150 + }, + { + "epoch": 0.9, + "learning_rate": 2.785990788065662e-07, + "loss": 0.4822, + "step": 7151 + }, + { + "epoch": 0.9, + "learning_rate": 2.779312294721181e-07, + "loss": 0.5169, + "step": 7152 + }, + { + "epoch": 0.9, + "learning_rate": 2.7726415868828794e-07, + "loss": 0.4911, + "step": 7153 + }, + { + "epoch": 0.9, + "learning_rate": 2.765978665650576e-07, + "loss": 0.5217, + "step": 7154 + }, + { + "epoch": 0.9, + "learning_rate": 2.7593235321228427e-07, + "loss": 0.4833, + "step": 7155 + }, + { + "epoch": 0.9, + "learning_rate": 2.752676187396913e-07, + "loss": 0.4745, + "step": 7156 + }, + { + "epoch": 0.9, + "learning_rate": 2.746036632568788e-07, + "loss": 0.5461, + "step": 7157 + }, + { + "epoch": 0.9, + "learning_rate": 2.739404868733153e-07, + "loss": 0.489, + "step": 7158 + }, + { + "epoch": 0.9, + "learning_rate": 2.732780896983428e-07, + "loss": 0.5055, + "step": 7159 + }, + { + "epoch": 0.9, + "learning_rate": 2.7261647184117447e-07, + "loss": 0.5218, + "step": 7160 + }, + { + "epoch": 0.9, + "learning_rate": 2.719556334108925e-07, + "loss": 0.4918, + "step": 7161 + }, + { + "epoch": 0.9, + "learning_rate": 2.712955745164536e-07, + "loss": 0.5387, + "step": 7162 + }, + { + "epoch": 0.9, + "learning_rate": 2.706362952666852e-07, + "loss": 0.4867, + "step": 7163 + }, + { + "epoch": 0.9, + "learning_rate": 2.699777957702859e-07, + "loss": 0.4782, + "step": 7164 + }, + { + "epoch": 0.9, + "learning_rate": 2.69320076135825e-07, + "loss": 0.5229, + "step": 7165 + }, + { + "epoch": 0.9, + "learning_rate": 2.686631364717457e-07, + "loss": 0.5457, + "step": 7166 + }, + { + "epoch": 0.9, + "learning_rate": 2.680069768863586e-07, + "loss": 0.4839, + "step": 7167 + }, + { + "epoch": 0.9, + "learning_rate": 2.6735159748784936e-07, + "loss": 0.5375, + "step": 7168 + }, + { + "epoch": 0.9, + "learning_rate": 2.6669699838427263e-07, + "loss": 0.5008, + "step": 7169 + }, + { + "epoch": 0.9, + "learning_rate": 2.6604317968355666e-07, + "loss": 0.506, + "step": 7170 + }, + { + "epoch": 0.9, + "learning_rate": 2.65390141493499e-07, + "loss": 0.5069, + "step": 7171 + }, + { + "epoch": 0.9, + "learning_rate": 2.64737883921769e-07, + "loss": 0.4675, + "step": 7172 + }, + { + "epoch": 0.9, + "learning_rate": 2.6408640707590804e-07, + "loss": 0.5444, + "step": 7173 + }, + { + "epoch": 0.9, + "learning_rate": 2.634357110633279e-07, + "loss": 0.5898, + "step": 7174 + }, + { + "epoch": 0.9, + "learning_rate": 2.627857959913127e-07, + "loss": 0.472, + "step": 7175 + }, + { + "epoch": 0.9, + "learning_rate": 2.621366619670168e-07, + "loss": 0.4879, + "step": 7176 + }, + { + "epoch": 0.9, + "learning_rate": 2.614883090974657e-07, + "loss": 0.5139, + "step": 7177 + }, + { + "epoch": 0.9, + "learning_rate": 2.6084073748955606e-07, + "loss": 0.4929, + "step": 7178 + }, + { + "epoch": 0.9, + "learning_rate": 2.6019394725005755e-07, + "loss": 0.2104, + "step": 7179 + }, + { + "epoch": 0.9, + "learning_rate": 2.5954793848560923e-07, + "loss": 0.453, + "step": 7180 + }, + { + "epoch": 0.9, + "learning_rate": 2.5890271130272096e-07, + "loss": 0.4678, + "step": 7181 + }, + { + "epoch": 0.9, + "learning_rate": 2.582582658077759e-07, + "loss": 0.5259, + "step": 7182 + }, + { + "epoch": 0.9, + "learning_rate": 2.5761460210702585e-07, + "loss": 0.458, + "step": 7183 + }, + { + "epoch": 0.9, + "learning_rate": 2.569717203065941e-07, + "loss": 0.521, + "step": 7184 + }, + { + "epoch": 0.9, + "learning_rate": 2.5632962051247766e-07, + "loss": 0.5185, + "step": 7185 + }, + { + "epoch": 0.9, + "learning_rate": 2.5568830283054123e-07, + "loss": 0.5311, + "step": 7186 + }, + { + "epoch": 0.9, + "learning_rate": 2.55047767366523e-07, + "loss": 0.5345, + "step": 7187 + }, + { + "epoch": 0.9, + "learning_rate": 2.544080142260302e-07, + "loss": 0.5112, + "step": 7188 + }, + { + "epoch": 0.9, + "learning_rate": 2.5376904351454234e-07, + "loss": 0.2345, + "step": 7189 + }, + { + "epoch": 0.9, + "learning_rate": 2.531308553374101e-07, + "loss": 0.5649, + "step": 7190 + }, + { + "epoch": 0.9, + "learning_rate": 2.5249344979985435e-07, + "loss": 0.5541, + "step": 7191 + }, + { + "epoch": 0.9, + "learning_rate": 2.518568270069671e-07, + "loss": 0.539, + "step": 7192 + }, + { + "epoch": 0.9, + "learning_rate": 2.512209870637122e-07, + "loss": 0.5577, + "step": 7193 + }, + { + "epoch": 0.9, + "learning_rate": 2.5058593007492306e-07, + "loss": 0.538, + "step": 7194 + }, + { + "epoch": 0.9, + "learning_rate": 2.4995165614530417e-07, + "loss": 0.3688, + "step": 7195 + }, + { + "epoch": 0.9, + "learning_rate": 2.493181653794324e-07, + "loss": 0.5526, + "step": 7196 + }, + { + "epoch": 0.9, + "learning_rate": 2.486854578817538e-07, + "loss": 0.496, + "step": 7197 + }, + { + "epoch": 0.9, + "learning_rate": 2.4805353375658704e-07, + "loss": 0.472, + "step": 7198 + }, + { + "epoch": 0.9, + "learning_rate": 2.4742239310811934e-07, + "loss": 0.5587, + "step": 7199 + }, + { + "epoch": 0.9, + "learning_rate": 2.467920360404097e-07, + "loss": 0.5645, + "step": 7200 + }, + { + "epoch": 0.9, + "learning_rate": 2.4616246265738943e-07, + "loss": 0.4188, + "step": 7201 + }, + { + "epoch": 0.9, + "learning_rate": 2.4553367306285825e-07, + "loss": 0.5381, + "step": 7202 + }, + { + "epoch": 0.9, + "learning_rate": 2.449056673604894e-07, + "loss": 0.486, + "step": 7203 + }, + { + "epoch": 0.9, + "learning_rate": 2.442784456538233e-07, + "loss": 0.4938, + "step": 7204 + }, + { + "epoch": 0.9, + "learning_rate": 2.4365200804627343e-07, + "loss": 0.5006, + "step": 7205 + }, + { + "epoch": 0.9, + "learning_rate": 2.430263546411249e-07, + "loss": 0.4886, + "step": 7206 + }, + { + "epoch": 0.9, + "learning_rate": 2.4240148554153076e-07, + "loss": 0.4747, + "step": 7207 + }, + { + "epoch": 0.9, + "learning_rate": 2.4177740085051746e-07, + "loss": 0.4628, + "step": 7208 + }, + { + "epoch": 0.9, + "learning_rate": 2.4115410067098055e-07, + "loss": 0.4996, + "step": 7209 + }, + { + "epoch": 0.9, + "learning_rate": 2.405315851056861e-07, + "loss": 0.5369, + "step": 7210 + }, + { + "epoch": 0.9, + "learning_rate": 2.39909854257272e-07, + "loss": 0.5196, + "step": 7211 + }, + { + "epoch": 0.9, + "learning_rate": 2.3928890822824513e-07, + "loss": 0.5341, + "step": 7212 + }, + { + "epoch": 0.9, + "learning_rate": 2.3866874712098475e-07, + "loss": 0.5564, + "step": 7213 + }, + { + "epoch": 0.9, + "learning_rate": 2.3804937103773962e-07, + "loss": 0.5352, + "step": 7214 + }, + { + "epoch": 0.9, + "learning_rate": 2.3743078008062915e-07, + "loss": 0.5473, + "step": 7215 + }, + { + "epoch": 0.9, + "learning_rate": 2.368129743516434e-07, + "loss": 0.418, + "step": 7216 + }, + { + "epoch": 0.9, + "learning_rate": 2.361959539526437e-07, + "loss": 0.5401, + "step": 7217 + }, + { + "epoch": 0.9, + "learning_rate": 2.355797189853598e-07, + "loss": 0.5395, + "step": 7218 + }, + { + "epoch": 0.9, + "learning_rate": 2.3496426955139596e-07, + "loss": 0.5423, + "step": 7219 + }, + { + "epoch": 0.91, + "learning_rate": 2.3434960575222153e-07, + "loss": 0.5163, + "step": 7220 + }, + { + "epoch": 0.91, + "learning_rate": 2.3373572768917995e-07, + "loss": 0.4987, + "step": 7221 + }, + { + "epoch": 0.91, + "learning_rate": 2.3312263546348525e-07, + "loss": 0.5275, + "step": 7222 + }, + { + "epoch": 0.91, + "learning_rate": 2.3251032917621984e-07, + "loss": 0.5644, + "step": 7223 + }, + { + "epoch": 0.91, + "learning_rate": 2.318988089283386e-07, + "loss": 0.5447, + "step": 7224 + }, + { + "epoch": 0.91, + "learning_rate": 2.3128807482066585e-07, + "loss": 0.5174, + "step": 7225 + }, + { + "epoch": 0.91, + "learning_rate": 2.3067812695389547e-07, + "loss": 0.4567, + "step": 7226 + }, + { + "epoch": 0.91, + "learning_rate": 2.3006896542859258e-07, + "loss": 0.5452, + "step": 7227 + }, + { + "epoch": 0.91, + "learning_rate": 2.2946059034519352e-07, + "loss": 0.5143, + "step": 7228 + }, + { + "epoch": 0.91, + "learning_rate": 2.2885300180400306e-07, + "loss": 0.4981, + "step": 7229 + }, + { + "epoch": 0.91, + "learning_rate": 2.282461999051988e-07, + "loss": 0.4775, + "step": 7230 + }, + { + "epoch": 0.91, + "learning_rate": 2.2764018474882577e-07, + "loss": 0.5389, + "step": 7231 + }, + { + "epoch": 0.91, + "learning_rate": 2.2703495643480067e-07, + "loss": 0.2351, + "step": 7232 + }, + { + "epoch": 0.91, + "learning_rate": 2.2643051506291092e-07, + "loss": 0.5282, + "step": 7233 + }, + { + "epoch": 0.91, + "learning_rate": 2.2582686073281402e-07, + "loss": 0.5371, + "step": 7234 + }, + { + "epoch": 0.91, + "learning_rate": 2.2522399354403757e-07, + "loss": 0.4589, + "step": 7235 + }, + { + "epoch": 0.91, + "learning_rate": 2.2462191359597764e-07, + "loss": 0.5315, + "step": 7236 + }, + { + "epoch": 0.91, + "learning_rate": 2.2402062098790367e-07, + "loss": 0.4524, + "step": 7237 + }, + { + "epoch": 0.91, + "learning_rate": 2.234201158189525e-07, + "loss": 0.5451, + "step": 7238 + }, + { + "epoch": 0.91, + "learning_rate": 2.2282039818813385e-07, + "loss": 0.4599, + "step": 7239 + }, + { + "epoch": 0.91, + "learning_rate": 2.222214681943252e-07, + "loss": 0.529, + "step": 7240 + }, + { + "epoch": 0.91, + "learning_rate": 2.216233259362749e-07, + "loss": 0.5484, + "step": 7241 + }, + { + "epoch": 0.91, + "learning_rate": 2.2102597151260286e-07, + "loss": 0.5858, + "step": 7242 + }, + { + "epoch": 0.91, + "learning_rate": 2.2042940502179644e-07, + "loss": 0.5361, + "step": 7243 + }, + { + "epoch": 0.91, + "learning_rate": 2.1983362656221418e-07, + "loss": 0.4938, + "step": 7244 + }, + { + "epoch": 0.91, + "learning_rate": 2.1923863623208642e-07, + "loss": 0.5377, + "step": 7245 + }, + { + "epoch": 0.91, + "learning_rate": 2.186444341295113e-07, + "loss": 0.5327, + "step": 7246 + }, + { + "epoch": 0.91, + "learning_rate": 2.1805102035245773e-07, + "loss": 0.5035, + "step": 7247 + }, + { + "epoch": 0.91, + "learning_rate": 2.174583949987652e-07, + "loss": 0.5128, + "step": 7248 + }, + { + "epoch": 0.91, + "learning_rate": 2.1686655816614333e-07, + "loss": 0.516, + "step": 7249 + }, + { + "epoch": 0.91, + "learning_rate": 2.162755099521696e-07, + "loss": 0.5056, + "step": 7250 + }, + { + "epoch": 0.91, + "learning_rate": 2.156852504542939e-07, + "loss": 0.5446, + "step": 7251 + }, + { + "epoch": 0.91, + "learning_rate": 2.1509577976983497e-07, + "loss": 0.5633, + "step": 7252 + }, + { + "epoch": 0.91, + "learning_rate": 2.145070979959818e-07, + "loss": 0.5319, + "step": 7253 + }, + { + "epoch": 0.91, + "learning_rate": 2.1391920522979337e-07, + "loss": 0.5394, + "step": 7254 + }, + { + "epoch": 0.91, + "learning_rate": 2.1333210156819882e-07, + "loss": 0.5312, + "step": 7255 + }, + { + "epoch": 0.91, + "learning_rate": 2.1274578710799632e-07, + "loss": 0.4806, + "step": 7256 + }, + { + "epoch": 0.91, + "learning_rate": 2.121602619458546e-07, + "loss": 0.5772, + "step": 7257 + }, + { + "epoch": 0.91, + "learning_rate": 2.1157552617831146e-07, + "loss": 0.501, + "step": 7258 + }, + { + "epoch": 0.91, + "learning_rate": 2.1099157990177533e-07, + "loss": 0.5791, + "step": 7259 + }, + { + "epoch": 0.91, + "learning_rate": 2.1040842321252474e-07, + "loss": 0.5044, + "step": 7260 + }, + { + "epoch": 0.91, + "learning_rate": 2.098260562067078e-07, + "loss": 0.5796, + "step": 7261 + }, + { + "epoch": 0.91, + "learning_rate": 2.0924447898034217e-07, + "loss": 0.4977, + "step": 7262 + }, + { + "epoch": 0.91, + "learning_rate": 2.0866369162931444e-07, + "loss": 0.5016, + "step": 7263 + }, + { + "epoch": 0.91, + "learning_rate": 2.0808369424938246e-07, + "loss": 0.5656, + "step": 7264 + }, + { + "epoch": 0.91, + "learning_rate": 2.0750448693617365e-07, + "loss": 0.4922, + "step": 7265 + }, + { + "epoch": 0.91, + "learning_rate": 2.0692606978518381e-07, + "loss": 0.4464, + "step": 7266 + }, + { + "epoch": 0.91, + "learning_rate": 2.0634844289178e-07, + "loss": 0.5011, + "step": 7267 + }, + { + "epoch": 0.91, + "learning_rate": 2.0577160635119886e-07, + "loss": 0.5454, + "step": 7268 + }, + { + "epoch": 0.91, + "learning_rate": 2.0519556025854537e-07, + "loss": 0.4849, + "step": 7269 + }, + { + "epoch": 0.91, + "learning_rate": 2.0462030470879524e-07, + "loss": 0.4932, + "step": 7270 + }, + { + "epoch": 0.91, + "learning_rate": 2.040458397967937e-07, + "loss": 0.4989, + "step": 7271 + }, + { + "epoch": 0.91, + "learning_rate": 2.0347216561725612e-07, + "loss": 0.5182, + "step": 7272 + }, + { + "epoch": 0.91, + "learning_rate": 2.0289928226476684e-07, + "loss": 0.5467, + "step": 7273 + }, + { + "epoch": 0.91, + "learning_rate": 2.023271898337792e-07, + "loss": 0.5436, + "step": 7274 + }, + { + "epoch": 0.91, + "learning_rate": 2.017558884186177e-07, + "loss": 0.5549, + "step": 7275 + }, + { + "epoch": 0.91, + "learning_rate": 2.0118537811347484e-07, + "loss": 0.5258, + "step": 7276 + }, + { + "epoch": 0.91, + "learning_rate": 2.006156590124142e-07, + "loss": 0.5082, + "step": 7277 + }, + { + "epoch": 0.91, + "learning_rate": 2.0004673120936846e-07, + "loss": 0.5168, + "step": 7278 + }, + { + "epoch": 0.91, + "learning_rate": 1.9947859479813815e-07, + "loss": 0.494, + "step": 7279 + }, + { + "epoch": 0.91, + "learning_rate": 1.9891124987239553e-07, + "loss": 0.5055, + "step": 7280 + }, + { + "epoch": 0.91, + "learning_rate": 1.9834469652568188e-07, + "loss": 0.466, + "step": 7281 + }, + { + "epoch": 0.91, + "learning_rate": 1.9777893485140698e-07, + "loss": 0.5049, + "step": 7282 + }, + { + "epoch": 0.91, + "learning_rate": 1.9721396494285062e-07, + "loss": 0.5279, + "step": 7283 + }, + { + "epoch": 0.91, + "learning_rate": 1.9664978689316384e-07, + "loss": 0.5326, + "step": 7284 + }, + { + "epoch": 0.91, + "learning_rate": 1.960864007953628e-07, + "loss": 0.4828, + "step": 7285 + }, + { + "epoch": 0.91, + "learning_rate": 1.9552380674233762e-07, + "loss": 0.5318, + "step": 7286 + }, + { + "epoch": 0.91, + "learning_rate": 1.9496200482684525e-07, + "loss": 0.4822, + "step": 7287 + }, + { + "epoch": 0.91, + "learning_rate": 1.9440099514151266e-07, + "loss": 0.5387, + "step": 7288 + }, + { + "epoch": 0.91, + "learning_rate": 1.9384077777883703e-07, + "loss": 0.5134, + "step": 7289 + }, + { + "epoch": 0.91, + "learning_rate": 1.9328135283118332e-07, + "loss": 0.5218, + "step": 7290 + }, + { + "epoch": 0.91, + "learning_rate": 1.9272272039078666e-07, + "loss": 0.4488, + "step": 7291 + }, + { + "epoch": 0.91, + "learning_rate": 1.921648805497517e-07, + "loss": 0.5144, + "step": 7292 + }, + { + "epoch": 0.91, + "learning_rate": 1.9160783340005264e-07, + "loss": 0.5527, + "step": 7293 + }, + { + "epoch": 0.91, + "learning_rate": 1.9105157903353268e-07, + "loss": 0.5144, + "step": 7294 + }, + { + "epoch": 0.91, + "learning_rate": 1.904961175419029e-07, + "loss": 0.5073, + "step": 7295 + }, + { + "epoch": 0.91, + "learning_rate": 1.8994144901674617e-07, + "loss": 0.5373, + "step": 7296 + }, + { + "epoch": 0.91, + "learning_rate": 1.8938757354951264e-07, + "loss": 0.5338, + "step": 7297 + }, + { + "epoch": 0.91, + "learning_rate": 1.888344912315232e-07, + "loss": 0.5224, + "step": 7298 + }, + { + "epoch": 0.91, + "learning_rate": 1.8828220215396654e-07, + "loss": 0.5029, + "step": 7299 + }, + { + "epoch": 0.92, + "learning_rate": 1.8773070640790258e-07, + "loss": 0.5288, + "step": 7300 + }, + { + "epoch": 0.92, + "learning_rate": 1.8718000408425752e-07, + "loss": 0.4639, + "step": 7301 + }, + { + "epoch": 0.92, + "learning_rate": 1.8663009527382926e-07, + "loss": 0.5242, + "step": 7302 + }, + { + "epoch": 0.92, + "learning_rate": 1.860809800672836e-07, + "loss": 0.5025, + "step": 7303 + }, + { + "epoch": 0.92, + "learning_rate": 1.855326585551559e-07, + "loss": 0.5259, + "step": 7304 + }, + { + "epoch": 0.92, + "learning_rate": 1.8498513082785107e-07, + "loss": 0.503, + "step": 7305 + }, + { + "epoch": 0.92, + "learning_rate": 1.8443839697564247e-07, + "loss": 0.4866, + "step": 7306 + }, + { + "epoch": 0.92, + "learning_rate": 1.8389245708867186e-07, + "loss": 0.4844, + "step": 7307 + }, + { + "epoch": 0.92, + "learning_rate": 1.8334731125695226e-07, + "loss": 0.5168, + "step": 7308 + }, + { + "epoch": 0.92, + "learning_rate": 1.82802959570364e-07, + "loss": 0.5485, + "step": 7309 + }, + { + "epoch": 0.92, + "learning_rate": 1.8225940211865745e-07, + "loss": 0.4502, + "step": 7310 + }, + { + "epoch": 0.92, + "learning_rate": 1.8171663899145042e-07, + "loss": 0.5347, + "step": 7311 + }, + { + "epoch": 0.92, + "learning_rate": 1.8117467027823243e-07, + "loss": 0.5315, + "step": 7312 + }, + { + "epoch": 0.92, + "learning_rate": 1.8063349606835924e-07, + "loss": 0.5155, + "step": 7313 + }, + { + "epoch": 0.92, + "learning_rate": 1.8009311645105777e-07, + "loss": 0.5372, + "step": 7314 + }, + { + "epoch": 0.92, + "learning_rate": 1.7955353151542287e-07, + "loss": 0.4691, + "step": 7315 + }, + { + "epoch": 0.92, + "learning_rate": 1.790147413504184e-07, + "loss": 0.5093, + "step": 7316 + }, + { + "epoch": 0.92, + "learning_rate": 1.784767460448772e-07, + "loss": 0.4647, + "step": 7317 + }, + { + "epoch": 0.92, + "learning_rate": 1.779395456875016e-07, + "loss": 0.4486, + "step": 7318 + }, + { + "epoch": 0.92, + "learning_rate": 1.774031403668619e-07, + "loss": 0.5402, + "step": 7319 + }, + { + "epoch": 0.92, + "learning_rate": 1.7686753017139845e-07, + "loss": 0.5031, + "step": 7320 + }, + { + "epoch": 0.92, + "learning_rate": 1.7633271518942007e-07, + "loss": 0.4884, + "step": 7321 + }, + { + "epoch": 0.92, + "learning_rate": 1.7579869550910344e-07, + "loss": 0.5241, + "step": 7322 + }, + { + "epoch": 0.92, + "learning_rate": 1.7526547121849534e-07, + "loss": 0.4982, + "step": 7323 + }, + { + "epoch": 0.92, + "learning_rate": 1.747330424055116e-07, + "loss": 0.4752, + "step": 7324 + }, + { + "epoch": 0.92, + "learning_rate": 1.7420140915793582e-07, + "loss": 0.5939, + "step": 7325 + }, + { + "epoch": 0.92, + "learning_rate": 1.7367057156342128e-07, + "loss": 0.2113, + "step": 7326 + }, + { + "epoch": 0.92, + "learning_rate": 1.7314052970948958e-07, + "loss": 0.4395, + "step": 7327 + }, + { + "epoch": 0.92, + "learning_rate": 1.7261128368353143e-07, + "loss": 0.5059, + "step": 7328 + }, + { + "epoch": 0.92, + "learning_rate": 1.7208283357280587e-07, + "loss": 0.5072, + "step": 7329 + }, + { + "epoch": 0.92, + "learning_rate": 1.7155517946444155e-07, + "loss": 0.4849, + "step": 7330 + }, + { + "epoch": 0.92, + "learning_rate": 1.7102832144543503e-07, + "loss": 0.5506, + "step": 7331 + }, + { + "epoch": 0.92, + "learning_rate": 1.7050225960265286e-07, + "loss": 0.4336, + "step": 7332 + }, + { + "epoch": 0.92, + "learning_rate": 1.6997699402282798e-07, + "loss": 0.5773, + "step": 7333 + }, + { + "epoch": 0.92, + "learning_rate": 1.6945252479256436e-07, + "loss": 0.5083, + "step": 7334 + }, + { + "epoch": 0.92, + "learning_rate": 1.6892885199833342e-07, + "loss": 0.5398, + "step": 7335 + }, + { + "epoch": 0.92, + "learning_rate": 1.6840597572647665e-07, + "loss": 0.5263, + "step": 7336 + }, + { + "epoch": 0.92, + "learning_rate": 1.6788389606320233e-07, + "loss": 0.4594, + "step": 7337 + }, + { + "epoch": 0.92, + "learning_rate": 1.6736261309458824e-07, + "loss": 0.5286, + "step": 7338 + }, + { + "epoch": 0.92, + "learning_rate": 1.668421269065812e-07, + "loss": 0.5475, + "step": 7339 + }, + { + "epoch": 0.92, + "learning_rate": 1.663224375849959e-07, + "loss": 0.4999, + "step": 7340 + }, + { + "epoch": 0.92, + "learning_rate": 1.65803545215516e-07, + "loss": 0.5671, + "step": 7341 + }, + { + "epoch": 0.92, + "learning_rate": 1.6528544988369476e-07, + "loss": 0.568, + "step": 7342 + }, + { + "epoch": 0.92, + "learning_rate": 1.647681516749533e-07, + "loss": 0.4778, + "step": 7343 + }, + { + "epoch": 0.92, + "learning_rate": 1.6425165067457948e-07, + "loss": 0.5457, + "step": 7344 + }, + { + "epoch": 0.92, + "learning_rate": 1.6373594696773188e-07, + "loss": 0.5029, + "step": 7345 + }, + { + "epoch": 0.92, + "learning_rate": 1.63221040639438e-07, + "loss": 0.5117, + "step": 7346 + }, + { + "epoch": 0.92, + "learning_rate": 1.627069317745922e-07, + "loss": 0.5574, + "step": 7347 + }, + { + "epoch": 0.92, + "learning_rate": 1.6219362045795885e-07, + "loss": 0.4822, + "step": 7348 + }, + { + "epoch": 0.92, + "learning_rate": 1.616811067741686e-07, + "loss": 0.4516, + "step": 7349 + }, + { + "epoch": 0.92, + "learning_rate": 1.611693908077233e-07, + "loss": 0.4599, + "step": 7350 + }, + { + "epoch": 0.92, + "learning_rate": 1.6065847264299216e-07, + "loss": 0.5258, + "step": 7351 + }, + { + "epoch": 0.92, + "learning_rate": 1.6014835236421222e-07, + "loss": 0.5134, + "step": 7352 + }, + { + "epoch": 0.92, + "learning_rate": 1.5963903005549008e-07, + "loss": 0.4808, + "step": 7353 + }, + { + "epoch": 0.92, + "learning_rate": 1.5913050580079914e-07, + "loss": 0.4836, + "step": 7354 + }, + { + "epoch": 0.92, + "learning_rate": 1.58622779683984e-07, + "loss": 0.5606, + "step": 7355 + }, + { + "epoch": 0.92, + "learning_rate": 1.5811585178875433e-07, + "loss": 0.4484, + "step": 7356 + }, + { + "epoch": 0.92, + "learning_rate": 1.5760972219869052e-07, + "loss": 0.512, + "step": 7357 + }, + { + "epoch": 0.92, + "learning_rate": 1.5710439099724028e-07, + "loss": 0.5394, + "step": 7358 + }, + { + "epoch": 0.92, + "learning_rate": 1.5659985826772083e-07, + "loss": 0.5007, + "step": 7359 + }, + { + "epoch": 0.92, + "learning_rate": 1.5609612409331621e-07, + "loss": 0.4619, + "step": 7360 + }, + { + "epoch": 0.92, + "learning_rate": 1.5559318855707994e-07, + "loss": 0.535, + "step": 7361 + }, + { + "epoch": 0.92, + "learning_rate": 1.5509105174193296e-07, + "loss": 0.4755, + "step": 7362 + }, + { + "epoch": 0.92, + "learning_rate": 1.5458971373066566e-07, + "loss": 0.5227, + "step": 7363 + }, + { + "epoch": 0.92, + "learning_rate": 1.540891746059353e-07, + "loss": 0.5065, + "step": 7364 + }, + { + "epoch": 0.92, + "learning_rate": 1.535894344502692e-07, + "loss": 0.4782, + "step": 7365 + }, + { + "epoch": 0.92, + "learning_rate": 1.5309049334606142e-07, + "loss": 0.2279, + "step": 7366 + }, + { + "epoch": 0.92, + "learning_rate": 1.5259235137557449e-07, + "loss": 0.2114, + "step": 7367 + }, + { + "epoch": 0.92, + "learning_rate": 1.5209500862094108e-07, + "loss": 0.4997, + "step": 7368 + }, + { + "epoch": 0.92, + "learning_rate": 1.5159846516415832e-07, + "loss": 0.4888, + "step": 7369 + }, + { + "epoch": 0.92, + "learning_rate": 1.5110272108709523e-07, + "loss": 0.5406, + "step": 7370 + }, + { + "epoch": 0.92, + "learning_rate": 1.506077764714864e-07, + "loss": 0.4456, + "step": 7371 + }, + { + "epoch": 0.92, + "learning_rate": 1.501136313989371e-07, + "loss": 0.4894, + "step": 7372 + }, + { + "epoch": 0.92, + "learning_rate": 1.4962028595091883e-07, + "loss": 0.2124, + "step": 7373 + }, + { + "epoch": 0.92, + "learning_rate": 1.4912774020877207e-07, + "loss": 0.5459, + "step": 7374 + }, + { + "epoch": 0.92, + "learning_rate": 1.4863599425370523e-07, + "loss": 0.5647, + "step": 7375 + }, + { + "epoch": 0.92, + "learning_rate": 1.48145048166794e-07, + "loss": 0.4452, + "step": 7376 + }, + { + "epoch": 0.92, + "learning_rate": 1.4765490202898414e-07, + "loss": 0.5162, + "step": 7377 + }, + { + "epoch": 0.92, + "learning_rate": 1.4716555592108828e-07, + "loss": 0.5073, + "step": 7378 + }, + { + "epoch": 0.92, + "learning_rate": 1.4667700992378685e-07, + "loss": 0.483, + "step": 7379 + }, + { + "epoch": 0.93, + "learning_rate": 1.461892641176299e-07, + "loss": 0.477, + "step": 7380 + }, + { + "epoch": 0.93, + "learning_rate": 1.4570231858303309e-07, + "loss": 0.4845, + "step": 7381 + }, + { + "epoch": 0.93, + "learning_rate": 1.4521617340028216e-07, + "loss": 0.5871, + "step": 7382 + }, + { + "epoch": 0.93, + "learning_rate": 1.447308286495297e-07, + "loss": 0.464, + "step": 7383 + }, + { + "epoch": 0.93, + "learning_rate": 1.442462844107978e-07, + "loss": 0.5697, + "step": 7384 + }, + { + "epoch": 0.93, + "learning_rate": 1.437625407639759e-07, + "loss": 0.5536, + "step": 7385 + }, + { + "epoch": 0.93, + "learning_rate": 1.4327959778881962e-07, + "loss": 0.4809, + "step": 7386 + }, + { + "epoch": 0.93, + "learning_rate": 1.4279745556495527e-07, + "loss": 0.4829, + "step": 7387 + }, + { + "epoch": 0.93, + "learning_rate": 1.423161141718754e-07, + "loss": 0.5698, + "step": 7388 + }, + { + "epoch": 0.93, + "learning_rate": 1.4183557368894151e-07, + "loss": 0.5207, + "step": 7389 + }, + { + "epoch": 0.93, + "learning_rate": 1.4135583419538302e-07, + "loss": 0.5636, + "step": 7390 + }, + { + "epoch": 0.93, + "learning_rate": 1.4087689577029662e-07, + "loss": 0.4538, + "step": 7391 + }, + { + "epoch": 0.93, + "learning_rate": 1.4039875849264694e-07, + "loss": 0.4817, + "step": 7392 + }, + { + "epoch": 0.93, + "learning_rate": 1.3992142244126705e-07, + "loss": 0.4692, + "step": 7393 + }, + { + "epoch": 0.93, + "learning_rate": 1.3944488769485731e-07, + "loss": 0.509, + "step": 7394 + }, + { + "epoch": 0.93, + "learning_rate": 1.389691543319871e-07, + "loss": 0.5133, + "step": 7395 + }, + { + "epoch": 0.93, + "learning_rate": 1.384942224310931e-07, + "loss": 0.4657, + "step": 7396 + }, + { + "epoch": 0.93, + "learning_rate": 1.3802009207047828e-07, + "loss": 0.4578, + "step": 7397 + }, + { + "epoch": 0.93, + "learning_rate": 1.3754676332831563e-07, + "loss": 0.5805, + "step": 7398 + }, + { + "epoch": 0.93, + "learning_rate": 1.3707423628264492e-07, + "loss": 0.5205, + "step": 7399 + }, + { + "epoch": 0.93, + "learning_rate": 1.3660251101137446e-07, + "loss": 0.5609, + "step": 7400 + }, + { + "epoch": 0.93, + "learning_rate": 1.3613158759227974e-07, + "loss": 0.5303, + "step": 7401 + }, + { + "epoch": 0.93, + "learning_rate": 1.356614661030048e-07, + "loss": 0.5247, + "step": 7402 + }, + { + "epoch": 0.93, + "learning_rate": 1.3519214662105928e-07, + "loss": 0.5007, + "step": 7403 + }, + { + "epoch": 0.93, + "learning_rate": 1.3472362922382355e-07, + "loss": 0.5333, + "step": 7404 + }, + { + "epoch": 0.93, + "learning_rate": 1.3425591398854355e-07, + "loss": 0.5284, + "step": 7405 + }, + { + "epoch": 0.93, + "learning_rate": 1.3378900099233484e-07, + "loss": 0.4881, + "step": 7406 + }, + { + "epoch": 0.93, + "learning_rate": 1.3332289031217916e-07, + "loss": 0.5315, + "step": 7407 + }, + { + "epoch": 0.93, + "learning_rate": 1.328575820249256e-07, + "loss": 0.5155, + "step": 7408 + }, + { + "epoch": 0.93, + "learning_rate": 1.323930762072928e-07, + "loss": 0.4979, + "step": 7409 + }, + { + "epoch": 0.93, + "learning_rate": 1.319293729358656e-07, + "loss": 0.5165, + "step": 7410 + }, + { + "epoch": 0.93, + "learning_rate": 1.314664722870973e-07, + "loss": 0.4659, + "step": 7411 + }, + { + "epoch": 0.93, + "learning_rate": 1.3100437433730962e-07, + "loss": 0.4813, + "step": 7412 + }, + { + "epoch": 0.93, + "learning_rate": 1.3054307916268883e-07, + "loss": 0.5032, + "step": 7413 + }, + { + "epoch": 0.93, + "learning_rate": 1.3008258683929188e-07, + "loss": 0.4814, + "step": 7414 + }, + { + "epoch": 0.93, + "learning_rate": 1.2962289744304245e-07, + "loss": 0.2227, + "step": 7415 + }, + { + "epoch": 0.93, + "learning_rate": 1.291640110497322e-07, + "loss": 0.5221, + "step": 7416 + }, + { + "epoch": 0.93, + "learning_rate": 1.2870592773501945e-07, + "loss": 0.537, + "step": 7417 + }, + { + "epoch": 0.93, + "learning_rate": 1.28248647574431e-07, + "loss": 0.5387, + "step": 7418 + }, + { + "epoch": 0.93, + "learning_rate": 1.2779217064336048e-07, + "loss": 0.4997, + "step": 7419 + }, + { + "epoch": 0.93, + "learning_rate": 1.273364970170693e-07, + "loss": 0.502, + "step": 7420 + }, + { + "epoch": 0.93, + "learning_rate": 1.2688162677068738e-07, + "loss": 0.5206, + "step": 7421 + }, + { + "epoch": 0.93, + "learning_rate": 1.264275599792103e-07, + "loss": 0.4211, + "step": 7422 + }, + { + "epoch": 0.93, + "learning_rate": 1.2597429671750427e-07, + "loss": 0.5512, + "step": 7423 + }, + { + "epoch": 0.93, + "learning_rate": 1.2552183706029842e-07, + "loss": 0.5215, + "step": 7424 + }, + { + "epoch": 0.93, + "learning_rate": 1.250701810821936e-07, + "loss": 0.4868, + "step": 7425 + }, + { + "epoch": 0.93, + "learning_rate": 1.2461932885765637e-07, + "loss": 0.5511, + "step": 7426 + }, + { + "epoch": 0.93, + "learning_rate": 1.2416928046102062e-07, + "loss": 0.561, + "step": 7427 + }, + { + "epoch": 0.93, + "learning_rate": 1.2372003596648918e-07, + "loss": 0.4949, + "step": 7428 + }, + { + "epoch": 0.93, + "learning_rate": 1.2327159544812896e-07, + "loss": 0.5181, + "step": 7429 + }, + { + "epoch": 0.93, + "learning_rate": 1.2282395897987855e-07, + "loss": 0.5067, + "step": 7430 + }, + { + "epoch": 0.93, + "learning_rate": 1.2237712663554056e-07, + "loss": 0.4874, + "step": 7431 + }, + { + "epoch": 0.93, + "learning_rate": 1.2193109848878726e-07, + "loss": 0.5593, + "step": 7432 + }, + { + "epoch": 0.93, + "learning_rate": 1.2148587461315754e-07, + "loss": 0.5064, + "step": 7433 + }, + { + "epoch": 0.93, + "learning_rate": 1.2104145508205768e-07, + "loss": 0.5453, + "step": 7434 + }, + { + "epoch": 0.93, + "learning_rate": 1.2059783996876074e-07, + "loss": 0.5078, + "step": 7435 + }, + { + "epoch": 0.93, + "learning_rate": 1.201550293464082e-07, + "loss": 0.5421, + "step": 7436 + }, + { + "epoch": 0.93, + "learning_rate": 1.197130232880078e-07, + "loss": 0.5378, + "step": 7437 + }, + { + "epoch": 0.93, + "learning_rate": 1.1927182186643627e-07, + "loss": 0.4916, + "step": 7438 + }, + { + "epoch": 0.93, + "learning_rate": 1.1883142515443591e-07, + "loss": 0.5016, + "step": 7439 + }, + { + "epoch": 0.93, + "learning_rate": 1.18391833224617e-07, + "loss": 0.4616, + "step": 7440 + }, + { + "epoch": 0.93, + "learning_rate": 1.1795304614945769e-07, + "loss": 0.5563, + "step": 7441 + }, + { + "epoch": 0.93, + "learning_rate": 1.1751506400130285e-07, + "loss": 0.4829, + "step": 7442 + }, + { + "epoch": 0.93, + "learning_rate": 1.1707788685236421e-07, + "loss": 0.4868, + "step": 7443 + }, + { + "epoch": 0.93, + "learning_rate": 1.1664151477472241e-07, + "loss": 0.4746, + "step": 7444 + }, + { + "epoch": 0.93, + "learning_rate": 1.1620594784032268e-07, + "loss": 0.5044, + "step": 7445 + }, + { + "epoch": 0.93, + "learning_rate": 1.1577118612098038e-07, + "loss": 0.4732, + "step": 7446 + }, + { + "epoch": 0.93, + "learning_rate": 1.1533722968837646e-07, + "loss": 0.4584, + "step": 7447 + }, + { + "epoch": 0.93, + "learning_rate": 1.1490407861405927e-07, + "loss": 0.5802, + "step": 7448 + }, + { + "epoch": 0.93, + "learning_rate": 1.1447173296944447e-07, + "loss": 0.5259, + "step": 7449 + }, + { + "epoch": 0.93, + "learning_rate": 1.1404019282581558e-07, + "loss": 0.5451, + "step": 7450 + }, + { + "epoch": 0.93, + "learning_rate": 1.1360945825432235e-07, + "loss": 0.5776, + "step": 7451 + }, + { + "epoch": 0.93, + "learning_rate": 1.1317952932598186e-07, + "loss": 0.5447, + "step": 7452 + }, + { + "epoch": 0.93, + "learning_rate": 1.1275040611167853e-07, + "loss": 0.5027, + "step": 7453 + }, + { + "epoch": 0.93, + "learning_rate": 1.1232208868216465e-07, + "loss": 0.4563, + "step": 7454 + }, + { + "epoch": 0.93, + "learning_rate": 1.1189457710805873e-07, + "loss": 0.4795, + "step": 7455 + }, + { + "epoch": 0.93, + "learning_rate": 1.1146787145984661e-07, + "loss": 0.5322, + "step": 7456 + }, + { + "epoch": 0.93, + "learning_rate": 1.1104197180788145e-07, + "loss": 0.4786, + "step": 7457 + }, + { + "epoch": 0.93, + "learning_rate": 1.106168782223832e-07, + "loss": 0.5608, + "step": 7458 + }, + { + "epoch": 0.93, + "learning_rate": 1.1019259077343913e-07, + "loss": 0.498, + "step": 7459 + }, + { + "epoch": 0.94, + "learning_rate": 1.0976910953100384e-07, + "loss": 0.4971, + "step": 7460 + }, + { + "epoch": 0.94, + "learning_rate": 1.0934643456489868e-07, + "loss": 0.5453, + "step": 7461 + }, + { + "epoch": 0.94, + "learning_rate": 1.0892456594481182e-07, + "loss": 0.5323, + "step": 7462 + }, + { + "epoch": 0.94, + "learning_rate": 1.0850350374029928e-07, + "loss": 0.545, + "step": 7463 + }, + { + "epoch": 0.94, + "learning_rate": 1.0808324802078329e-07, + "loss": 0.4704, + "step": 7464 + }, + { + "epoch": 0.94, + "learning_rate": 1.07663798855554e-07, + "loss": 0.4688, + "step": 7465 + }, + { + "epoch": 0.94, + "learning_rate": 1.072451563137672e-07, + "loss": 0.5274, + "step": 7466 + }, + { + "epoch": 0.94, + "learning_rate": 1.0682732046444766e-07, + "loss": 0.4971, + "step": 7467 + }, + { + "epoch": 0.94, + "learning_rate": 1.0641029137648528e-07, + "loss": 0.4957, + "step": 7468 + }, + { + "epoch": 0.94, + "learning_rate": 1.059940691186373e-07, + "loss": 0.5088, + "step": 7469 + }, + { + "epoch": 0.94, + "learning_rate": 1.0557865375952936e-07, + "loss": 0.5244, + "step": 7470 + }, + { + "epoch": 0.94, + "learning_rate": 1.0516404536765168e-07, + "loss": 0.4792, + "step": 7471 + }, + { + "epoch": 0.94, + "learning_rate": 1.0475024401136403e-07, + "loss": 0.4918, + "step": 7472 + }, + { + "epoch": 0.94, + "learning_rate": 1.0433724975889126e-07, + "loss": 0.5506, + "step": 7473 + }, + { + "epoch": 0.94, + "learning_rate": 1.039250626783267e-07, + "loss": 0.4929, + "step": 7474 + }, + { + "epoch": 0.94, + "learning_rate": 1.0351368283762819e-07, + "loss": 0.5654, + "step": 7475 + }, + { + "epoch": 0.94, + "learning_rate": 1.0310311030462261e-07, + "loss": 0.5024, + "step": 7476 + }, + { + "epoch": 0.94, + "learning_rate": 1.0269334514700302e-07, + "loss": 0.2336, + "step": 7477 + }, + { + "epoch": 0.94, + "learning_rate": 1.0228438743232982e-07, + "loss": 0.5298, + "step": 7478 + }, + { + "epoch": 0.94, + "learning_rate": 1.0187623722802908e-07, + "loss": 0.5234, + "step": 7479 + }, + { + "epoch": 0.94, + "learning_rate": 1.014688946013953e-07, + "loss": 0.2553, + "step": 7480 + }, + { + "epoch": 0.94, + "learning_rate": 1.0106235961958865e-07, + "loss": 0.4534, + "step": 7481 + }, + { + "epoch": 0.94, + "learning_rate": 1.0065663234963663e-07, + "loss": 0.5318, + "step": 7482 + }, + { + "epoch": 0.94, + "learning_rate": 1.0025171285843294e-07, + "loss": 0.5253, + "step": 7483 + }, + { + "epoch": 0.94, + "learning_rate": 9.984760121273973e-08, + "loss": 0.4969, + "step": 7484 + }, + { + "epoch": 0.94, + "learning_rate": 9.944429747918427e-08, + "loss": 0.5497, + "step": 7485 + }, + { + "epoch": 0.94, + "learning_rate": 9.904180172426059e-08, + "loss": 0.5437, + "step": 7486 + }, + { + "epoch": 0.94, + "learning_rate": 9.864011401433171e-08, + "loss": 0.5278, + "step": 7487 + }, + { + "epoch": 0.94, + "learning_rate": 9.823923441562467e-08, + "loss": 0.5221, + "step": 7488 + }, + { + "epoch": 0.94, + "learning_rate": 9.783916299423436e-08, + "loss": 0.5106, + "step": 7489 + }, + { + "epoch": 0.94, + "learning_rate": 9.743989981612356e-08, + "loss": 0.5292, + "step": 7490 + }, + { + "epoch": 0.94, + "learning_rate": 9.704144494711909e-08, + "loss": 0.5203, + "step": 7491 + }, + { + "epoch": 0.94, + "learning_rate": 9.664379845291783e-08, + "loss": 0.5302, + "step": 7492 + }, + { + "epoch": 0.94, + "learning_rate": 9.624696039908122e-08, + "loss": 0.536, + "step": 7493 + }, + { + "epoch": 0.94, + "learning_rate": 9.585093085103747e-08, + "loss": 0.5497, + "step": 7494 + }, + { + "epoch": 0.94, + "learning_rate": 9.545570987408159e-08, + "loss": 0.4948, + "step": 7495 + }, + { + "epoch": 0.94, + "learning_rate": 9.506129753337645e-08, + "loss": 0.4765, + "step": 7496 + }, + { + "epoch": 0.94, + "learning_rate": 9.466769389395059e-08, + "loss": 0.5233, + "step": 7497 + }, + { + "epoch": 0.94, + "learning_rate": 9.427489902069931e-08, + "loss": 0.5535, + "step": 7498 + }, + { + "epoch": 0.94, + "learning_rate": 9.388291297838414e-08, + "loss": 0.1891, + "step": 7499 + }, + { + "epoch": 0.94, + "learning_rate": 9.349173583163452e-08, + "loss": 0.5107, + "step": 7500 + }, + { + "epoch": 0.94, + "learning_rate": 9.310136764494493e-08, + "loss": 0.4374, + "step": 7501 + }, + { + "epoch": 0.94, + "learning_rate": 9.27118084826778e-08, + "loss": 0.5198, + "step": 7502 + }, + { + "epoch": 0.94, + "learning_rate": 9.23230584090623e-08, + "loss": 0.4988, + "step": 7503 + }, + { + "epoch": 0.94, + "learning_rate": 9.193511748819162e-08, + "loss": 0.5749, + "step": 7504 + }, + { + "epoch": 0.94, + "learning_rate": 9.154798578402957e-08, + "loss": 0.4407, + "step": 7505 + }, + { + "epoch": 0.94, + "learning_rate": 9.116166336040289e-08, + "loss": 0.4739, + "step": 7506 + }, + { + "epoch": 0.94, + "learning_rate": 9.077615028100783e-08, + "loss": 0.5596, + "step": 7507 + }, + { + "epoch": 0.94, + "learning_rate": 9.039144660940469e-08, + "loss": 0.5501, + "step": 7508 + }, + { + "epoch": 0.94, + "learning_rate": 9.000755240902215e-08, + "loss": 0.5234, + "step": 7509 + }, + { + "epoch": 0.94, + "learning_rate": 8.962446774315403e-08, + "loss": 0.4719, + "step": 7510 + }, + { + "epoch": 0.94, + "learning_rate": 8.924219267496204e-08, + "loss": 0.5462, + "step": 7511 + }, + { + "epoch": 0.94, + "learning_rate": 8.886072726747353e-08, + "loss": 0.4695, + "step": 7512 + }, + { + "epoch": 0.94, + "learning_rate": 8.848007158358208e-08, + "loss": 0.5509, + "step": 7513 + }, + { + "epoch": 0.94, + "learning_rate": 8.81002256860497e-08, + "loss": 0.5548, + "step": 7514 + }, + { + "epoch": 0.94, + "learning_rate": 8.772118963750187e-08, + "loss": 0.553, + "step": 7515 + }, + { + "epoch": 0.94, + "learning_rate": 8.734296350043248e-08, + "loss": 0.5656, + "step": 7516 + }, + { + "epoch": 0.94, + "learning_rate": 8.69655473372022e-08, + "loss": 0.4411, + "step": 7517 + }, + { + "epoch": 0.94, + "learning_rate": 8.658894121003681e-08, + "loss": 0.5415, + "step": 7518 + }, + { + "epoch": 0.94, + "learning_rate": 8.62131451810294e-08, + "loss": 0.5582, + "step": 7519 + }, + { + "epoch": 0.94, + "learning_rate": 8.583815931213934e-08, + "loss": 0.5054, + "step": 7520 + }, + { + "epoch": 0.94, + "learning_rate": 8.54639836651927e-08, + "loss": 0.5028, + "step": 7521 + }, + { + "epoch": 0.94, + "learning_rate": 8.509061830188125e-08, + "loss": 0.4497, + "step": 7522 + }, + { + "epoch": 0.94, + "learning_rate": 8.471806328376298e-08, + "loss": 0.4338, + "step": 7523 + }, + { + "epoch": 0.94, + "learning_rate": 8.434631867226428e-08, + "loss": 0.5063, + "step": 7524 + }, + { + "epoch": 0.94, + "learning_rate": 8.397538452867616e-08, + "loss": 0.5263, + "step": 7525 + }, + { + "epoch": 0.94, + "learning_rate": 8.360526091415522e-08, + "loss": 0.5557, + "step": 7526 + }, + { + "epoch": 0.94, + "learning_rate": 8.323594788972656e-08, + "loss": 0.5518, + "step": 7527 + }, + { + "epoch": 0.94, + "learning_rate": 8.286744551627979e-08, + "loss": 0.5556, + "step": 7528 + }, + { + "epoch": 0.94, + "learning_rate": 8.249975385457298e-08, + "loss": 0.4634, + "step": 7529 + }, + { + "epoch": 0.94, + "learning_rate": 8.21328729652282e-08, + "loss": 0.4922, + "step": 7530 + }, + { + "epoch": 0.94, + "learning_rate": 8.17668029087354e-08, + "loss": 0.5725, + "step": 7531 + }, + { + "epoch": 0.94, + "learning_rate": 8.140154374545018e-08, + "loss": 0.4376, + "step": 7532 + }, + { + "epoch": 0.94, + "learning_rate": 8.103709553559435e-08, + "loss": 0.5175, + "step": 7533 + }, + { + "epoch": 0.94, + "learning_rate": 8.067345833925711e-08, + "loss": 0.5631, + "step": 7534 + }, + { + "epoch": 0.94, + "learning_rate": 8.031063221639268e-08, + "loss": 0.4942, + "step": 7535 + }, + { + "epoch": 0.94, + "learning_rate": 7.994861722682101e-08, + "loss": 0.572, + "step": 7536 + }, + { + "epoch": 0.94, + "learning_rate": 7.958741343023047e-08, + "loss": 0.5322, + "step": 7537 + }, + { + "epoch": 0.94, + "learning_rate": 7.922702088617396e-08, + "loss": 0.2192, + "step": 7538 + }, + { + "epoch": 0.95, + "learning_rate": 7.886743965407173e-08, + "loss": 0.5592, + "step": 7539 + }, + { + "epoch": 0.95, + "learning_rate": 7.850866979320915e-08, + "loss": 0.5017, + "step": 7540 + }, + { + "epoch": 0.95, + "learning_rate": 7.815071136273889e-08, + "loss": 0.5003, + "step": 7541 + }, + { + "epoch": 0.95, + "learning_rate": 7.779356442167818e-08, + "loss": 0.5071, + "step": 7542 + }, + { + "epoch": 0.95, + "learning_rate": 7.74372290289127e-08, + "loss": 0.4707, + "step": 7543 + }, + { + "epoch": 0.95, + "learning_rate": 7.708170524319325e-08, + "loss": 0.454, + "step": 7544 + }, + { + "epoch": 0.95, + "learning_rate": 7.672699312313569e-08, + "loss": 0.4734, + "step": 7545 + }, + { + "epoch": 0.95, + "learning_rate": 7.637309272722492e-08, + "loss": 0.5087, + "step": 7546 + }, + { + "epoch": 0.95, + "learning_rate": 7.602000411380817e-08, + "loss": 0.5414, + "step": 7547 + }, + { + "epoch": 0.95, + "learning_rate": 7.566772734110217e-08, + "loss": 0.5187, + "step": 7548 + }, + { + "epoch": 0.95, + "learning_rate": 7.531626246718826e-08, + "loss": 0.5022, + "step": 7549 + }, + { + "epoch": 0.95, + "learning_rate": 7.496560955001453e-08, + "loss": 0.4856, + "step": 7550 + }, + { + "epoch": 0.95, + "learning_rate": 7.461576864739361e-08, + "loss": 0.5471, + "step": 7551 + }, + { + "epoch": 0.95, + "learning_rate": 7.426673981700716e-08, + "loss": 0.5186, + "step": 7552 + }, + { + "epoch": 0.95, + "learning_rate": 7.391852311640024e-08, + "loss": 0.5488, + "step": 7553 + }, + { + "epoch": 0.95, + "learning_rate": 7.357111860298527e-08, + "loss": 0.6044, + "step": 7554 + }, + { + "epoch": 0.95, + "learning_rate": 7.322452633404032e-08, + "loss": 0.534, + "step": 7555 + }, + { + "epoch": 0.95, + "learning_rate": 7.287874636671021e-08, + "loss": 0.5283, + "step": 7556 + }, + { + "epoch": 0.95, + "learning_rate": 7.253377875800494e-08, + "loss": 0.5315, + "step": 7557 + }, + { + "epoch": 0.95, + "learning_rate": 7.21896235648012e-08, + "loss": 0.4461, + "step": 7558 + }, + { + "epoch": 0.95, + "learning_rate": 7.184628084384193e-08, + "loss": 0.4952, + "step": 7559 + }, + { + "epoch": 0.95, + "learning_rate": 7.150375065173465e-08, + "loss": 0.4911, + "step": 7560 + }, + { + "epoch": 0.95, + "learning_rate": 7.116203304495528e-08, + "loss": 0.4756, + "step": 7561 + }, + { + "epoch": 0.95, + "learning_rate": 7.082112807984376e-08, + "loss": 0.5265, + "step": 7562 + }, + { + "epoch": 0.95, + "learning_rate": 7.048103581260679e-08, + "loss": 0.5253, + "step": 7563 + }, + { + "epoch": 0.95, + "learning_rate": 7.014175629931674e-08, + "loss": 0.1987, + "step": 7564 + }, + { + "epoch": 0.95, + "learning_rate": 6.980328959591331e-08, + "loss": 0.4927, + "step": 7565 + }, + { + "epoch": 0.95, + "learning_rate": 6.94656357582002e-08, + "loss": 0.5362, + "step": 7566 + }, + { + "epoch": 0.95, + "learning_rate": 6.912879484184842e-08, + "loss": 0.467, + "step": 7567 + }, + { + "epoch": 0.95, + "learning_rate": 6.879276690239467e-08, + "loss": 0.4967, + "step": 7568 + }, + { + "epoch": 0.95, + "learning_rate": 6.84575519952413e-08, + "loss": 0.5817, + "step": 7569 + }, + { + "epoch": 0.95, + "learning_rate": 6.812315017565685e-08, + "loss": 0.5571, + "step": 7570 + }, + { + "epoch": 0.95, + "learning_rate": 6.778956149877614e-08, + "loss": 0.4882, + "step": 7571 + }, + { + "epoch": 0.95, + "learning_rate": 6.745678601959904e-08, + "loss": 0.4948, + "step": 7572 + }, + { + "epoch": 0.95, + "learning_rate": 6.712482379299278e-08, + "loss": 0.5052, + "step": 7573 + }, + { + "epoch": 0.95, + "learning_rate": 6.679367487368805e-08, + "loss": 0.515, + "step": 7574 + }, + { + "epoch": 0.95, + "learning_rate": 6.646333931628446e-08, + "loss": 0.4694, + "step": 7575 + }, + { + "epoch": 0.95, + "learning_rate": 6.613381717524513e-08, + "loss": 0.5159, + "step": 7576 + }, + { + "epoch": 0.95, + "learning_rate": 6.580510850490052e-08, + "loss": 0.4699, + "step": 7577 + }, + { + "epoch": 0.95, + "learning_rate": 6.547721335944612e-08, + "loss": 0.548, + "step": 7578 + }, + { + "epoch": 0.95, + "learning_rate": 6.515013179294427e-08, + "loss": 0.4896, + "step": 7579 + }, + { + "epoch": 0.95, + "learning_rate": 6.482386385932182e-08, + "loss": 0.5213, + "step": 7580 + }, + { + "epoch": 0.95, + "learning_rate": 6.449840961237241e-08, + "loss": 0.5242, + "step": 7581 + }, + { + "epoch": 0.95, + "learning_rate": 6.417376910575479e-08, + "loss": 0.5243, + "step": 7582 + }, + { + "epoch": 0.95, + "learning_rate": 6.384994239299447e-08, + "loss": 0.457, + "step": 7583 + }, + { + "epoch": 0.95, + "learning_rate": 6.352692952748263e-08, + "loss": 0.4867, + "step": 7584 + }, + { + "epoch": 0.95, + "learning_rate": 6.320473056247555e-08, + "loss": 0.5041, + "step": 7585 + }, + { + "epoch": 0.95, + "learning_rate": 6.288334555109577e-08, + "loss": 0.5809, + "step": 7586 + }, + { + "epoch": 0.95, + "learning_rate": 6.256277454633197e-08, + "loss": 0.4394, + "step": 7587 + }, + { + "epoch": 0.95, + "learning_rate": 6.224301760103801e-08, + "loss": 0.5155, + "step": 7588 + }, + { + "epoch": 0.95, + "learning_rate": 6.192407476793394e-08, + "loss": 0.4693, + "step": 7589 + }, + { + "epoch": 0.95, + "learning_rate": 6.160594609960491e-08, + "loss": 0.4936, + "step": 7590 + }, + { + "epoch": 0.95, + "learning_rate": 6.128863164850285e-08, + "loss": 0.4997, + "step": 7591 + }, + { + "epoch": 0.95, + "learning_rate": 6.09721314669448e-08, + "loss": 0.4946, + "step": 7592 + }, + { + "epoch": 0.95, + "learning_rate": 6.065644560711404e-08, + "loss": 0.5325, + "step": 7593 + }, + { + "epoch": 0.95, + "learning_rate": 6.03415741210589e-08, + "loss": 0.499, + "step": 7594 + }, + { + "epoch": 0.95, + "learning_rate": 6.002751706069343e-08, + "loss": 0.5964, + "step": 7595 + }, + { + "epoch": 0.95, + "learning_rate": 5.971427447779842e-08, + "loss": 0.2167, + "step": 7596 + }, + { + "epoch": 0.95, + "learning_rate": 5.940184642401925e-08, + "loss": 0.5816, + "step": 7597 + }, + { + "epoch": 0.95, + "learning_rate": 5.9090232950867463e-08, + "loss": 0.5508, + "step": 7598 + }, + { + "epoch": 0.95, + "learning_rate": 5.877943410972087e-08, + "loss": 0.5332, + "step": 7599 + }, + { + "epoch": 0.95, + "learning_rate": 5.84694499518218e-08, + "loss": 0.4648, + "step": 7600 + }, + { + "epoch": 0.95, + "learning_rate": 5.816028052827938e-08, + "loss": 0.4599, + "step": 7601 + }, + { + "epoch": 0.95, + "learning_rate": 5.785192589006727e-08, + "loss": 0.4738, + "step": 7602 + }, + { + "epoch": 0.95, + "learning_rate": 5.754438608802537e-08, + "loss": 0.4928, + "step": 7603 + }, + { + "epoch": 0.95, + "learning_rate": 5.723766117286034e-08, + "loss": 0.5436, + "step": 7604 + }, + { + "epoch": 0.95, + "learning_rate": 5.693175119514227e-08, + "loss": 0.4836, + "step": 7605 + }, + { + "epoch": 0.95, + "learning_rate": 5.662665620530861e-08, + "loss": 0.497, + "step": 7606 + }, + { + "epoch": 0.95, + "learning_rate": 5.6322376253661884e-08, + "loss": 0.5447, + "step": 7607 + }, + { + "epoch": 0.95, + "learning_rate": 5.6018911390369747e-08, + "loss": 0.4234, + "step": 7608 + }, + { + "epoch": 0.95, + "learning_rate": 5.571626166546606e-08, + "loss": 0.4989, + "step": 7609 + }, + { + "epoch": 0.95, + "learning_rate": 5.541442712885092e-08, + "loss": 0.4326, + "step": 7610 + }, + { + "epoch": 0.95, + "learning_rate": 5.51134078302884e-08, + "loss": 0.4831, + "step": 7611 + }, + { + "epoch": 0.95, + "learning_rate": 5.4813203819409354e-08, + "loss": 0.493, + "step": 7612 + }, + { + "epoch": 0.95, + "learning_rate": 5.451381514570975e-08, + "loss": 0.556, + "step": 7613 + }, + { + "epoch": 0.95, + "learning_rate": 5.4215241858551206e-08, + "loss": 0.5148, + "step": 7614 + }, + { + "epoch": 0.95, + "learning_rate": 5.391748400716157e-08, + "loss": 0.563, + "step": 7615 + }, + { + "epoch": 0.95, + "learning_rate": 5.3620541640633236e-08, + "loss": 0.4748, + "step": 7616 + }, + { + "epoch": 0.95, + "learning_rate": 5.332441480792427e-08, + "loss": 0.4908, + "step": 7617 + }, + { + "epoch": 0.95, + "learning_rate": 5.302910355785895e-08, + "loss": 0.4318, + "step": 7618 + }, + { + "epoch": 0.96, + "learning_rate": 5.27346079391261e-08, + "loss": 0.4639, + "step": 7619 + }, + { + "epoch": 0.96, + "learning_rate": 5.2440928000281886e-08, + "loss": 0.471, + "step": 7620 + }, + { + "epoch": 0.96, + "learning_rate": 5.214806378974535e-08, + "loss": 0.5525, + "step": 7621 + }, + { + "epoch": 0.96, + "learning_rate": 5.185601535580342e-08, + "loss": 0.4032, + "step": 7622 + }, + { + "epoch": 0.96, + "learning_rate": 5.1564782746607036e-08, + "loss": 0.4278, + "step": 7623 + }, + { + "epoch": 0.96, + "learning_rate": 5.127436601017277e-08, + "loss": 0.5237, + "step": 7624 + }, + { + "epoch": 0.96, + "learning_rate": 5.0984765194384e-08, + "loss": 0.5016, + "step": 7625 + }, + { + "epoch": 0.96, + "learning_rate": 5.0695980346988635e-08, + "loss": 0.5302, + "step": 7626 + }, + { + "epoch": 0.96, + "learning_rate": 5.0408011515599706e-08, + "loss": 0.5164, + "step": 7627 + }, + { + "epoch": 0.96, + "learning_rate": 5.012085874769534e-08, + "loss": 0.5074, + "step": 7628 + }, + { + "epoch": 0.96, + "learning_rate": 4.983452209062045e-08, + "loss": 0.5748, + "step": 7629 + }, + { + "epoch": 0.96, + "learning_rate": 4.954900159158504e-08, + "loss": 0.5315, + "step": 7630 + }, + { + "epoch": 0.96, + "learning_rate": 4.9264297297663685e-08, + "loss": 0.2175, + "step": 7631 + }, + { + "epoch": 0.96, + "learning_rate": 4.898040925579828e-08, + "loss": 0.5323, + "step": 7632 + }, + { + "epoch": 0.96, + "learning_rate": 4.869733751279304e-08, + "loss": 0.5006, + "step": 7633 + }, + { + "epoch": 0.96, + "learning_rate": 4.841508211532009e-08, + "loss": 0.6332, + "step": 7634 + }, + { + "epoch": 0.96, + "learning_rate": 4.813364310991664e-08, + "loss": 0.4977, + "step": 7635 + }, + { + "epoch": 0.96, + "learning_rate": 4.785302054298446e-08, + "loss": 0.5462, + "step": 7636 + }, + { + "epoch": 0.96, + "learning_rate": 4.757321446079155e-08, + "loss": 0.4531, + "step": 7637 + }, + { + "epoch": 0.96, + "learning_rate": 4.7294224909470444e-08, + "loss": 0.4811, + "step": 7638 + }, + { + "epoch": 0.96, + "learning_rate": 4.70160519350199e-08, + "loss": 0.5233, + "step": 7639 + }, + { + "epoch": 0.96, + "learning_rate": 4.67386955833038e-08, + "loss": 0.4394, + "step": 7640 + }, + { + "epoch": 0.96, + "learning_rate": 4.646215590005054e-08, + "loss": 0.4975, + "step": 7641 + }, + { + "epoch": 0.96, + "learning_rate": 4.618643293085534e-08, + "loss": 0.4969, + "step": 7642 + }, + { + "epoch": 0.96, + "learning_rate": 4.5911526721177935e-08, + "loss": 0.52, + "step": 7643 + }, + { + "epoch": 0.96, + "learning_rate": 4.563743731634318e-08, + "loss": 0.215, + "step": 7644 + }, + { + "epoch": 0.96, + "learning_rate": 4.536416476154104e-08, + "loss": 0.4983, + "step": 7645 + }, + { + "epoch": 0.96, + "learning_rate": 4.509170910182825e-08, + "loss": 0.4545, + "step": 7646 + }, + { + "epoch": 0.96, + "learning_rate": 4.4820070382124994e-08, + "loss": 0.5148, + "step": 7647 + }, + { + "epoch": 0.96, + "learning_rate": 4.4549248647218765e-08, + "loss": 0.4839, + "step": 7648 + }, + { + "epoch": 0.96, + "learning_rate": 4.427924394176053e-08, + "loss": 0.4819, + "step": 7649 + }, + { + "epoch": 0.96, + "learning_rate": 4.401005631026745e-08, + "loss": 0.4932, + "step": 7650 + }, + { + "epoch": 0.96, + "learning_rate": 4.374168579712124e-08, + "loss": 0.5247, + "step": 7651 + }, + { + "epoch": 0.96, + "learning_rate": 4.347413244657039e-08, + "loss": 0.5392, + "step": 7652 + }, + { + "epoch": 0.96, + "learning_rate": 4.3207396302727966e-08, + "loss": 0.4874, + "step": 7653 + }, + { + "epoch": 0.96, + "learning_rate": 4.2941477409570444e-08, + "loss": 0.516, + "step": 7654 + }, + { + "epoch": 0.96, + "learning_rate": 4.267637581094275e-08, + "loss": 0.4986, + "step": 7655 + }, + { + "epoch": 0.96, + "learning_rate": 4.2412091550552704e-08, + "loss": 0.5373, + "step": 7656 + }, + { + "epoch": 0.96, + "learning_rate": 4.2148624671974334e-08, + "loss": 0.4966, + "step": 7657 + }, + { + "epoch": 0.96, + "learning_rate": 4.188597521864679e-08, + "loss": 0.4847, + "step": 7658 + }, + { + "epoch": 0.96, + "learning_rate": 4.162414323387487e-08, + "loss": 0.4987, + "step": 7659 + }, + { + "epoch": 0.96, + "learning_rate": 4.136312876082682e-08, + "loss": 0.4884, + "step": 7660 + }, + { + "epoch": 0.96, + "learning_rate": 4.1102931842538215e-08, + "loss": 0.4991, + "step": 7661 + }, + { + "epoch": 0.96, + "learning_rate": 4.084355252190919e-08, + "loss": 0.584, + "step": 7662 + }, + { + "epoch": 0.96, + "learning_rate": 4.058499084170442e-08, + "loss": 0.5162, + "step": 7663 + }, + { + "epoch": 0.96, + "learning_rate": 4.0327246844554244e-08, + "loss": 0.4787, + "step": 7664 + }, + { + "epoch": 0.96, + "learning_rate": 4.007032057295468e-08, + "loss": 0.4953, + "step": 7665 + }, + { + "epoch": 0.96, + "learning_rate": 3.9814212069265725e-08, + "loss": 0.4884, + "step": 7666 + }, + { + "epoch": 0.96, + "learning_rate": 3.95589213757136e-08, + "loss": 0.5242, + "step": 7667 + }, + { + "epoch": 0.96, + "learning_rate": 3.930444853438964e-08, + "loss": 0.4916, + "step": 7668 + }, + { + "epoch": 0.96, + "learning_rate": 3.905079358724972e-08, + "loss": 0.4972, + "step": 7669 + }, + { + "epoch": 0.96, + "learning_rate": 3.8797956576114846e-08, + "loss": 0.4873, + "step": 7670 + }, + { + "epoch": 0.96, + "learning_rate": 3.8545937542671665e-08, + "loss": 0.4984, + "step": 7671 + }, + { + "epoch": 0.96, + "learning_rate": 3.8294736528472487e-08, + "loss": 0.4582, + "step": 7672 + }, + { + "epoch": 0.96, + "learning_rate": 3.804435357493308e-08, + "loss": 0.451, + "step": 7673 + }, + { + "epoch": 0.96, + "learning_rate": 3.7794788723335975e-08, + "loss": 0.5103, + "step": 7674 + }, + { + "epoch": 0.96, + "learning_rate": 3.75460420148277e-08, + "loss": 0.5073, + "step": 7675 + }, + { + "epoch": 0.96, + "learning_rate": 3.7298113490421006e-08, + "loss": 0.5063, + "step": 7676 + }, + { + "epoch": 0.96, + "learning_rate": 3.705100319099208e-08, + "loss": 0.4824, + "step": 7677 + }, + { + "epoch": 0.96, + "learning_rate": 3.68047111572839e-08, + "loss": 0.5474, + "step": 7678 + }, + { + "epoch": 0.96, + "learning_rate": 3.655923742990397e-08, + "loss": 0.5717, + "step": 7679 + }, + { + "epoch": 0.96, + "learning_rate": 3.631458204932437e-08, + "loss": 0.4494, + "step": 7680 + }, + { + "epoch": 0.96, + "learning_rate": 3.6070745055882814e-08, + "loss": 0.5651, + "step": 7681 + }, + { + "epoch": 0.96, + "learning_rate": 3.582772648978161e-08, + "loss": 0.5357, + "step": 7682 + }, + { + "epoch": 0.96, + "learning_rate": 3.55855263910887e-08, + "loss": 0.2261, + "step": 7683 + }, + { + "epoch": 0.96, + "learning_rate": 3.534414479973658e-08, + "loss": 0.4777, + "step": 7684 + }, + { + "epoch": 0.96, + "learning_rate": 3.510358175552342e-08, + "loss": 0.5021, + "step": 7685 + }, + { + "epoch": 0.96, + "learning_rate": 3.4863837298111934e-08, + "loss": 0.5082, + "step": 7686 + }, + { + "epoch": 0.96, + "learning_rate": 3.46249114670294e-08, + "loss": 0.4906, + "step": 7687 + }, + { + "epoch": 0.96, + "learning_rate": 3.4386804301669294e-08, + "loss": 0.5295, + "step": 7688 + }, + { + "epoch": 0.96, + "learning_rate": 3.414951584128967e-08, + "loss": 0.5506, + "step": 7689 + }, + { + "epoch": 0.96, + "learning_rate": 3.391304612501256e-08, + "loss": 0.5019, + "step": 7690 + }, + { + "epoch": 0.96, + "learning_rate": 3.367739519182678e-08, + "loss": 0.5511, + "step": 7691 + }, + { + "epoch": 0.96, + "learning_rate": 3.344256308058458e-08, + "loss": 0.4987, + "step": 7692 + }, + { + "epoch": 0.96, + "learning_rate": 3.320854983000443e-08, + "loss": 0.48, + "step": 7693 + }, + { + "epoch": 0.96, + "learning_rate": 3.2975355478668814e-08, + "loss": 0.5003, + "step": 7694 + }, + { + "epoch": 0.96, + "learning_rate": 3.274298006502585e-08, + "loss": 0.523, + "step": 7695 + }, + { + "epoch": 0.96, + "learning_rate": 3.251142362738879e-08, + "loss": 0.4686, + "step": 7696 + }, + { + "epoch": 0.96, + "learning_rate": 3.228068620393432e-08, + "loss": 0.601, + "step": 7697 + }, + { + "epoch": 0.96, + "learning_rate": 3.205076783270644e-08, + "loss": 0.2239, + "step": 7698 + }, + { + "epoch": 0.97, + "learning_rate": 3.1821668551612616e-08, + "loss": 0.4518, + "step": 7699 + }, + { + "epoch": 0.97, + "learning_rate": 3.159338839842541e-08, + "loss": 0.492, + "step": 7700 + }, + { + "epoch": 0.97, + "learning_rate": 3.136592741078248e-08, + "loss": 0.4851, + "step": 7701 + }, + { + "epoch": 0.97, + "learning_rate": 3.113928562618607e-08, + "loss": 0.4598, + "step": 7702 + }, + { + "epoch": 0.97, + "learning_rate": 3.09134630820046e-08, + "loss": 0.506, + "step": 7703 + }, + { + "epoch": 0.97, + "learning_rate": 3.0688459815469417e-08, + "loss": 0.4771, + "step": 7704 + }, + { + "epoch": 0.97, + "learning_rate": 3.046427586367917e-08, + "loss": 0.5209, + "step": 7705 + }, + { + "epoch": 0.97, + "learning_rate": 3.024091126359541e-08, + "loss": 0.4807, + "step": 7706 + }, + { + "epoch": 0.97, + "learning_rate": 3.001836605204533e-08, + "loss": 0.5067, + "step": 7707 + }, + { + "epoch": 0.97, + "learning_rate": 2.9796640265721266e-08, + "loss": 0.5519, + "step": 7708 + }, + { + "epoch": 0.97, + "learning_rate": 2.9575733941180074e-08, + "loss": 0.5099, + "step": 7709 + }, + { + "epoch": 0.97, + "learning_rate": 2.935564711484429e-08, + "loss": 0.5383, + "step": 7710 + }, + { + "epoch": 0.97, + "learning_rate": 2.913637982299933e-08, + "loss": 0.5467, + "step": 7711 + }, + { + "epoch": 0.97, + "learning_rate": 2.8917932101798497e-08, + "loss": 0.536, + "step": 7712 + }, + { + "epoch": 0.97, + "learning_rate": 2.8700303987257428e-08, + "loss": 0.4633, + "step": 7713 + }, + { + "epoch": 0.97, + "learning_rate": 2.848349551525742e-08, + "loss": 0.468, + "step": 7714 + }, + { + "epoch": 0.97, + "learning_rate": 2.826750672154488e-08, + "loss": 0.4814, + "step": 7715 + }, + { + "epoch": 0.97, + "learning_rate": 2.805233764173132e-08, + "loss": 0.5114, + "step": 7716 + }, + { + "epoch": 0.97, + "learning_rate": 2.783798831129225e-08, + "loss": 0.5238, + "step": 7717 + }, + { + "epoch": 0.97, + "learning_rate": 2.7624458765568852e-08, + "loss": 0.573, + "step": 7718 + }, + { + "epoch": 0.97, + "learning_rate": 2.7411749039766288e-08, + "loss": 0.4944, + "step": 7719 + }, + { + "epoch": 0.97, + "learning_rate": 2.7199859168955955e-08, + "loss": 0.4858, + "step": 7720 + }, + { + "epoch": 0.97, + "learning_rate": 2.6988789188072128e-08, + "loss": 0.4952, + "step": 7721 + }, + { + "epoch": 0.97, + "learning_rate": 2.677853913191586e-08, + "loss": 0.4951, + "step": 7722 + }, + { + "epoch": 0.97, + "learning_rate": 2.6569109035151642e-08, + "loss": 0.4767, + "step": 7723 + }, + { + "epoch": 0.97, + "learning_rate": 2.6360498932309077e-08, + "loss": 0.5249, + "step": 7724 + }, + { + "epoch": 0.97, + "learning_rate": 2.6152708857782873e-08, + "loss": 0.2223, + "step": 7725 + }, + { + "epoch": 0.97, + "learning_rate": 2.5945738845832846e-08, + "loss": 0.5209, + "step": 7726 + }, + { + "epoch": 0.97, + "learning_rate": 2.573958893058226e-08, + "loss": 0.5086, + "step": 7727 + }, + { + "epoch": 0.97, + "learning_rate": 2.5534259146021147e-08, + "loss": 0.2113, + "step": 7728 + }, + { + "epoch": 0.97, + "learning_rate": 2.5329749526002423e-08, + "loss": 0.5051, + "step": 7729 + }, + { + "epoch": 0.97, + "learning_rate": 2.5126060104244678e-08, + "loss": 0.5245, + "step": 7730 + }, + { + "epoch": 0.97, + "learning_rate": 2.492319091433104e-08, + "loss": 0.496, + "step": 7731 + }, + { + "epoch": 0.97, + "learning_rate": 2.472114198971032e-08, + "loss": 0.5369, + "step": 7732 + }, + { + "epoch": 0.97, + "learning_rate": 2.4519913363694748e-08, + "loss": 0.4967, + "step": 7733 + }, + { + "epoch": 0.97, + "learning_rate": 2.4319505069462234e-08, + "loss": 0.4912, + "step": 7734 + }, + { + "epoch": 0.97, + "learning_rate": 2.411991714005468e-08, + "loss": 0.4954, + "step": 7735 + }, + { + "epoch": 0.97, + "learning_rate": 2.3921149608379102e-08, + "loss": 0.4987, + "step": 7736 + }, + { + "epoch": 0.97, + "learning_rate": 2.372320250720761e-08, + "loss": 0.5696, + "step": 7737 + }, + { + "epoch": 0.97, + "learning_rate": 2.3526075869176323e-08, + "loss": 0.4559, + "step": 7738 + }, + { + "epoch": 0.97, + "learning_rate": 2.3329769726787022e-08, + "loss": 0.5144, + "step": 7739 + }, + { + "epoch": 0.97, + "learning_rate": 2.3134284112404927e-08, + "loss": 0.4911, + "step": 7740 + }, + { + "epoch": 0.97, + "learning_rate": 2.2939619058261474e-08, + "loss": 0.5178, + "step": 7741 + }, + { + "epoch": 0.97, + "learning_rate": 2.2745774596450996e-08, + "loss": 0.5062, + "step": 7742 + }, + { + "epoch": 0.97, + "learning_rate": 2.2552750758935148e-08, + "loss": 0.5249, + "step": 7743 + }, + { + "epoch": 0.97, + "learning_rate": 2.236054757753736e-08, + "loss": 0.5545, + "step": 7744 + }, + { + "epoch": 0.97, + "learning_rate": 2.2169165083947842e-08, + "loss": 0.5241, + "step": 7745 + }, + { + "epoch": 0.97, + "learning_rate": 2.1978603309720238e-08, + "loss": 0.4891, + "step": 7746 + }, + { + "epoch": 0.97, + "learning_rate": 2.1788862286273303e-08, + "loss": 0.5332, + "step": 7747 + }, + { + "epoch": 0.97, + "learning_rate": 2.1599942044891465e-08, + "loss": 0.4912, + "step": 7748 + }, + { + "epoch": 0.97, + "learning_rate": 2.1411842616722577e-08, + "loss": 0.4908, + "step": 7749 + }, + { + "epoch": 0.97, + "learning_rate": 2.122456403277906e-08, + "loss": 0.4434, + "step": 7750 + }, + { + "epoch": 0.97, + "learning_rate": 2.103810632393899e-08, + "loss": 0.5274, + "step": 7751 + }, + { + "epoch": 0.97, + "learning_rate": 2.0852469520943885e-08, + "loss": 0.4986, + "step": 7752 + }, + { + "epoch": 0.97, + "learning_rate": 2.066765365440093e-08, + "loss": 0.533, + "step": 7753 + }, + { + "epoch": 0.97, + "learning_rate": 2.0483658754781865e-08, + "loss": 0.4907, + "step": 7754 + }, + { + "epoch": 0.97, + "learning_rate": 2.0300484852422974e-08, + "loss": 0.5172, + "step": 7755 + }, + { + "epoch": 0.97, + "learning_rate": 2.0118131977524547e-08, + "loss": 0.5922, + "step": 7756 + }, + { + "epoch": 0.97, + "learning_rate": 1.9936600160151977e-08, + "loss": 0.4661, + "step": 7757 + }, + { + "epoch": 0.97, + "learning_rate": 1.9755889430235764e-08, + "loss": 0.5358, + "step": 7758 + }, + { + "epoch": 0.97, + "learning_rate": 1.9575999817570414e-08, + "loss": 0.4573, + "step": 7759 + }, + { + "epoch": 0.97, + "learning_rate": 1.9396931351814975e-08, + "loss": 0.4209, + "step": 7760 + }, + { + "epoch": 0.97, + "learning_rate": 1.9218684062493608e-08, + "loss": 0.5147, + "step": 7761 + }, + { + "epoch": 0.97, + "learning_rate": 1.9041257978994477e-08, + "loss": 0.5143, + "step": 7762 + }, + { + "epoch": 0.97, + "learning_rate": 1.8864653130570843e-08, + "loss": 0.4947, + "step": 7763 + }, + { + "epoch": 0.97, + "learning_rate": 1.8688869546340527e-08, + "loss": 0.5381, + "step": 7764 + }, + { + "epoch": 0.97, + "learning_rate": 1.85139072552859e-08, + "loss": 0.5804, + "step": 7765 + }, + { + "epoch": 0.97, + "learning_rate": 1.833976628625389e-08, + "loss": 0.5105, + "step": 7766 + }, + { + "epoch": 0.97, + "learning_rate": 1.8166446667955417e-08, + "loss": 0.4906, + "step": 7767 + }, + { + "epoch": 0.97, + "learning_rate": 1.7993948428967067e-08, + "loss": 0.4679, + "step": 7768 + }, + { + "epoch": 0.97, + "learning_rate": 1.782227159772887e-08, + "loss": 0.5042, + "step": 7769 + }, + { + "epoch": 0.97, + "learning_rate": 1.765141620254707e-08, + "loss": 0.617, + "step": 7770 + }, + { + "epoch": 0.97, + "learning_rate": 1.7481382271590797e-08, + "loss": 0.5406, + "step": 7771 + }, + { + "epoch": 0.97, + "learning_rate": 1.73121698328943e-08, + "loss": 0.5127, + "step": 7772 + }, + { + "epoch": 0.97, + "learning_rate": 1.7143778914356922e-08, + "loss": 0.5176, + "step": 7773 + }, + { + "epoch": 0.97, + "learning_rate": 1.697620954374146e-08, + "loss": 0.4499, + "step": 7774 + }, + { + "epoch": 0.97, + "learning_rate": 1.680946174867637e-08, + "loss": 0.6188, + "step": 7775 + }, + { + "epoch": 0.97, + "learning_rate": 1.6643535556653546e-08, + "loss": 0.4892, + "step": 7776 + }, + { + "epoch": 0.97, + "learning_rate": 1.6478430995031102e-08, + "loss": 0.5337, + "step": 7777 + }, + { + "epoch": 0.97, + "learning_rate": 1.631414809102949e-08, + "loss": 0.5137, + "step": 7778 + }, + { + "epoch": 0.98, + "learning_rate": 1.615068687173593e-08, + "loss": 0.2253, + "step": 7779 + }, + { + "epoch": 0.98, + "learning_rate": 1.5988047364100533e-08, + "loss": 0.5183, + "step": 7780 + }, + { + "epoch": 0.98, + "learning_rate": 1.582622959493796e-08, + "loss": 0.4911, + "step": 7781 + }, + { + "epoch": 0.98, + "learning_rate": 1.5665233590929084e-08, + "loss": 0.5072, + "step": 7782 + }, + { + "epoch": 0.98, + "learning_rate": 1.5505059378617128e-08, + "loss": 0.49, + "step": 7783 + }, + { + "epoch": 0.98, + "learning_rate": 1.5345706984410957e-08, + "loss": 0.4838, + "step": 7784 + }, + { + "epoch": 0.98, + "learning_rate": 1.5187176434584007e-08, + "loss": 0.566, + "step": 7785 + }, + { + "epoch": 0.98, + "learning_rate": 1.502946775527425e-08, + "loss": 0.4856, + "step": 7786 + }, + { + "epoch": 0.98, + "learning_rate": 1.487258097248312e-08, + "loss": 0.4704, + "step": 7787 + }, + { + "epoch": 0.98, + "learning_rate": 1.471651611207825e-08, + "loss": 0.4331, + "step": 7788 + }, + { + "epoch": 0.98, + "learning_rate": 1.4561273199789616e-08, + "loss": 0.5068, + "step": 7789 + }, + { + "epoch": 0.98, + "learning_rate": 1.4406852261213966e-08, + "loss": 0.4446, + "step": 7790 + }, + { + "epoch": 0.98, + "learning_rate": 1.4253253321810933e-08, + "loss": 0.55, + "step": 7791 + }, + { + "epoch": 0.98, + "learning_rate": 1.4100476406905817e-08, + "loss": 0.491, + "step": 7792 + }, + { + "epoch": 0.98, + "learning_rate": 1.3948521541686799e-08, + "loss": 0.5171, + "step": 7793 + }, + { + "epoch": 0.98, + "learning_rate": 1.3797388751207174e-08, + "loss": 0.457, + "step": 7794 + }, + { + "epoch": 0.98, + "learning_rate": 1.3647078060385898e-08, + "loss": 0.4897, + "step": 7795 + }, + { + "epoch": 0.98, + "learning_rate": 1.349758949400537e-08, + "loss": 0.4969, + "step": 7796 + }, + { + "epoch": 0.98, + "learning_rate": 1.3348923076711983e-08, + "loss": 0.5098, + "step": 7797 + }, + { + "epoch": 0.98, + "learning_rate": 1.3201078833017244e-08, + "loss": 0.4766, + "step": 7798 + }, + { + "epoch": 0.98, + "learning_rate": 1.305405678729721e-08, + "loss": 0.5061, + "step": 7799 + }, + { + "epoch": 0.98, + "learning_rate": 1.2907856963791376e-08, + "loss": 0.4831, + "step": 7800 + }, + { + "epoch": 0.98, + "learning_rate": 1.2762479386605464e-08, + "loss": 0.5342, + "step": 7801 + }, + { + "epoch": 0.98, + "learning_rate": 1.2617924079708077e-08, + "loss": 0.509, + "step": 7802 + }, + { + "epoch": 0.98, + "learning_rate": 1.2474191066932372e-08, + "loss": 0.5216, + "step": 7803 + }, + { + "epoch": 0.98, + "learning_rate": 1.2331280371977172e-08, + "loss": 0.2053, + "step": 7804 + }, + { + "epoch": 0.98, + "learning_rate": 1.2189192018404184e-08, + "loss": 0.521, + "step": 7805 + }, + { + "epoch": 0.98, + "learning_rate": 1.2047926029640777e-08, + "loss": 0.4941, + "step": 7806 + }, + { + "epoch": 0.98, + "learning_rate": 1.1907482428977768e-08, + "loss": 0.5548, + "step": 7807 + }, + { + "epoch": 0.98, + "learning_rate": 1.1767861239570521e-08, + "loss": 0.4452, + "step": 7808 + }, + { + "epoch": 0.98, + "learning_rate": 1.1629062484439513e-08, + "loss": 0.4986, + "step": 7809 + }, + { + "epoch": 0.98, + "learning_rate": 1.1491086186469214e-08, + "loss": 0.5127, + "step": 7810 + }, + { + "epoch": 0.98, + "learning_rate": 1.1353932368408093e-08, + "loss": 0.5064, + "step": 7811 + }, + { + "epoch": 0.98, + "learning_rate": 1.121760105286973e-08, + "loss": 0.4262, + "step": 7812 + }, + { + "epoch": 0.98, + "learning_rate": 1.1082092262331145e-08, + "loss": 0.4634, + "step": 7813 + }, + { + "epoch": 0.98, + "learning_rate": 1.0947406019135021e-08, + "loss": 0.1927, + "step": 7814 + }, + { + "epoch": 0.98, + "learning_rate": 1.0813542345487482e-08, + "loss": 0.4278, + "step": 7815 + }, + { + "epoch": 0.98, + "learning_rate": 1.0680501263459764e-08, + "loss": 0.5286, + "step": 7816 + }, + { + "epoch": 0.98, + "learning_rate": 1.0548282794985987e-08, + "loss": 0.5411, + "step": 7817 + }, + { + "epoch": 0.98, + "learning_rate": 1.0416886961865935e-08, + "loss": 0.475, + "step": 7818 + }, + { + "epoch": 0.98, + "learning_rate": 1.02863137857645e-08, + "loss": 0.5437, + "step": 7819 + }, + { + "epoch": 0.98, + "learning_rate": 1.0156563288208909e-08, + "loss": 0.5184, + "step": 7820 + }, + { + "epoch": 0.98, + "learning_rate": 1.0027635490592047e-08, + "loss": 0.4988, + "step": 7821 + }, + { + "epoch": 0.98, + "learning_rate": 9.899530414170244e-09, + "loss": 0.4968, + "step": 7822 + }, + { + "epoch": 0.98, + "learning_rate": 9.772248080066048e-09, + "loss": 0.5212, + "step": 7823 + }, + { + "epoch": 0.98, + "learning_rate": 9.645788509264341e-09, + "loss": 0.4913, + "step": 7824 + }, + { + "epoch": 0.98, + "learning_rate": 9.520151722615668e-09, + "loss": 0.5347, + "step": 7825 + }, + { + "epoch": 0.98, + "learning_rate": 9.395337740833454e-09, + "loss": 0.555, + "step": 7826 + }, + { + "epoch": 0.98, + "learning_rate": 9.2713465844968e-09, + "loss": 0.4889, + "step": 7827 + }, + { + "epoch": 0.98, + "learning_rate": 9.148178274049347e-09, + "loss": 0.4554, + "step": 7828 + }, + { + "epoch": 0.98, + "learning_rate": 9.025832829797632e-09, + "loss": 0.5018, + "step": 7829 + }, + { + "epoch": 0.98, + "learning_rate": 8.904310271913851e-09, + "loss": 0.5577, + "step": 7830 + }, + { + "epoch": 0.98, + "learning_rate": 8.783610620434203e-09, + "loss": 0.5009, + "step": 7831 + }, + { + "epoch": 0.98, + "learning_rate": 8.66373389525832e-09, + "loss": 0.5045, + "step": 7832 + }, + { + "epoch": 0.98, + "learning_rate": 8.54468011615095e-09, + "loss": 0.5521, + "step": 7833 + }, + { + "epoch": 0.98, + "learning_rate": 8.426449302741946e-09, + "loss": 0.5186, + "step": 7834 + }, + { + "epoch": 0.98, + "learning_rate": 8.309041474523494e-09, + "loss": 0.4263, + "step": 7835 + }, + { + "epoch": 0.98, + "learning_rate": 8.192456650853997e-09, + "loss": 0.5185, + "step": 7836 + }, + { + "epoch": 0.98, + "learning_rate": 8.076694850955302e-09, + "loss": 0.4891, + "step": 7837 + }, + { + "epoch": 0.98, + "learning_rate": 7.961756093913253e-09, + "loss": 0.5275, + "step": 7838 + }, + { + "epoch": 0.98, + "learning_rate": 7.847640398678246e-09, + "loss": 0.4656, + "step": 7839 + }, + { + "epoch": 0.98, + "learning_rate": 7.734347784065788e-09, + "loss": 0.5317, + "step": 7840 + }, + { + "epoch": 0.98, + "learning_rate": 7.621878268754268e-09, + "loss": 0.4205, + "step": 7841 + }, + { + "epoch": 0.98, + "learning_rate": 7.510231871287188e-09, + "loss": 0.4936, + "step": 7842 + }, + { + "epoch": 0.98, + "learning_rate": 7.399408610073156e-09, + "loss": 0.4762, + "step": 7843 + }, + { + "epoch": 0.98, + "learning_rate": 7.289408503382556e-09, + "loss": 0.5382, + "step": 7844 + }, + { + "epoch": 0.98, + "learning_rate": 7.180231569353102e-09, + "loss": 0.2238, + "step": 7845 + }, + { + "epoch": 0.98, + "learning_rate": 7.071877825984841e-09, + "loss": 0.4762, + "step": 7846 + }, + { + "epoch": 0.98, + "learning_rate": 6.964347291142371e-09, + "loss": 0.5242, + "step": 7847 + }, + { + "epoch": 0.98, + "learning_rate": 6.857639982554842e-09, + "loss": 0.5215, + "step": 7848 + }, + { + "epoch": 0.98, + "learning_rate": 6.7517559178154055e-09, + "loss": 0.4655, + "step": 7849 + }, + { + "epoch": 0.98, + "learning_rate": 6.646695114382318e-09, + "loss": 0.5314, + "step": 7850 + }, + { + "epoch": 0.98, + "learning_rate": 6.542457589576723e-09, + "loss": 0.5375, + "step": 7851 + }, + { + "epoch": 0.98, + "learning_rate": 6.439043360585428e-09, + "loss": 0.4886, + "step": 7852 + }, + { + "epoch": 0.98, + "learning_rate": 6.336452444458685e-09, + "loss": 0.551, + "step": 7853 + }, + { + "epoch": 0.98, + "learning_rate": 6.23468485811074e-09, + "loss": 0.5649, + "step": 7854 + }, + { + "epoch": 0.98, + "learning_rate": 6.133740618320949e-09, + "loss": 0.5205, + "step": 7855 + }, + { + "epoch": 0.98, + "learning_rate": 6.033619741732666e-09, + "loss": 0.5369, + "step": 7856 + }, + { + "epoch": 0.98, + "learning_rate": 5.934322244852686e-09, + "loss": 0.5533, + "step": 7857 + }, + { + "epoch": 0.98, + "learning_rate": 5.8358481440529135e-09, + "loss": 0.5389, + "step": 7858 + }, + { + "epoch": 0.99, + "learning_rate": 5.73819745556925e-09, + "loss": 0.4982, + "step": 7859 + }, + { + "epoch": 0.99, + "learning_rate": 5.6413701955021496e-09, + "loss": 0.4704, + "step": 7860 + }, + { + "epoch": 0.99, + "learning_rate": 5.545366379815509e-09, + "loss": 0.4431, + "step": 7861 + }, + { + "epoch": 0.99, + "learning_rate": 5.4501860243383334e-09, + "loss": 0.488, + "step": 7862 + }, + { + "epoch": 0.99, + "learning_rate": 5.355829144763624e-09, + "loss": 0.5509, + "step": 7863 + }, + { + "epoch": 0.99, + "learning_rate": 5.262295756647828e-09, + "loss": 0.457, + "step": 7864 + }, + { + "epoch": 0.99, + "learning_rate": 5.169585875412497e-09, + "loss": 0.4752, + "step": 7865 + }, + { + "epoch": 0.99, + "learning_rate": 5.077699516343737e-09, + "loss": 0.221, + "step": 7866 + }, + { + "epoch": 0.99, + "learning_rate": 4.986636694590541e-09, + "loss": 0.4783, + "step": 7867 + }, + { + "epoch": 0.99, + "learning_rate": 4.89639742516701e-09, + "loss": 0.5211, + "step": 7868 + }, + { + "epoch": 0.99, + "learning_rate": 4.806981722951798e-09, + "loss": 0.52, + "step": 7869 + }, + { + "epoch": 0.99, + "learning_rate": 4.718389602687002e-09, + "loss": 0.5041, + "step": 7870 + }, + { + "epoch": 0.99, + "learning_rate": 4.63062107897927e-09, + "loss": 0.5148, + "step": 7871 + }, + { + "epoch": 0.99, + "learning_rate": 4.543676166299249e-09, + "loss": 0.4927, + "step": 7872 + }, + { + "epoch": 0.99, + "learning_rate": 4.45755487898214e-09, + "loss": 0.5203, + "step": 7873 + }, + { + "epoch": 0.99, + "learning_rate": 4.37225723122714e-09, + "loss": 0.5037, + "step": 7874 + }, + { + "epoch": 0.99, + "learning_rate": 4.287783237097998e-09, + "loss": 0.5628, + "step": 7875 + }, + { + "epoch": 0.99, + "learning_rate": 4.204132910521908e-09, + "loss": 0.2178, + "step": 7876 + }, + { + "epoch": 0.99, + "learning_rate": 4.121306265291169e-09, + "loss": 0.4886, + "step": 7877 + }, + { + "epoch": 0.99, + "learning_rate": 4.039303315060972e-09, + "loss": 0.5641, + "step": 7878 + }, + { + "epoch": 0.99, + "learning_rate": 3.95812407335272e-09, + "loss": 0.4823, + "step": 7879 + }, + { + "epoch": 0.99, + "learning_rate": 3.877768553549599e-09, + "loss": 0.5291, + "step": 7880 + }, + { + "epoch": 0.99, + "learning_rate": 3.798236768901564e-09, + "loss": 0.4953, + "step": 7881 + }, + { + "epoch": 0.99, + "learning_rate": 3.719528732519795e-09, + "loss": 0.1997, + "step": 7882 + }, + { + "epoch": 0.99, + "learning_rate": 3.641644457382798e-09, + "loss": 0.5338, + "step": 7883 + }, + { + "epoch": 0.99, + "learning_rate": 3.5645839563308584e-09, + "loss": 0.5091, + "step": 7884 + }, + { + "epoch": 0.99, + "learning_rate": 3.4883472420699227e-09, + "loss": 0.5838, + "step": 7885 + }, + { + "epoch": 0.99, + "learning_rate": 3.4129343271688265e-09, + "loss": 0.5185, + "step": 7886 + }, + { + "epoch": 0.99, + "learning_rate": 3.338345224061512e-09, + "loss": 0.4555, + "step": 7887 + }, + { + "epoch": 0.99, + "learning_rate": 3.2645799450459205e-09, + "loss": 0.5427, + "step": 7888 + }, + { + "epoch": 0.99, + "learning_rate": 3.191638502284544e-09, + "loss": 0.5301, + "step": 7889 + }, + { + "epoch": 0.99, + "learning_rate": 3.1195209078027644e-09, + "loss": 0.5817, + "step": 7890 + }, + { + "epoch": 0.99, + "learning_rate": 3.0482271734910694e-09, + "loss": 0.5378, + "step": 7891 + }, + { + "epoch": 0.99, + "learning_rate": 2.9777573111050563e-09, + "loss": 0.5289, + "step": 7892 + }, + { + "epoch": 0.99, + "learning_rate": 2.908111332262098e-09, + "loss": 0.4886, + "step": 7893 + }, + { + "epoch": 0.99, + "learning_rate": 2.839289248445787e-09, + "loss": 0.5179, + "step": 7894 + }, + { + "epoch": 0.99, + "learning_rate": 2.771291071002602e-09, + "loss": 0.5674, + "step": 7895 + }, + { + "epoch": 0.99, + "learning_rate": 2.7041168111446856e-09, + "loss": 0.4946, + "step": 7896 + }, + { + "epoch": 0.99, + "learning_rate": 2.637766479946513e-09, + "loss": 0.5179, + "step": 7897 + }, + { + "epoch": 0.99, + "learning_rate": 2.5722400883482212e-09, + "loss": 0.2128, + "step": 7898 + }, + { + "epoch": 0.99, + "learning_rate": 2.5075376471533906e-09, + "loss": 0.503, + "step": 7899 + }, + { + "epoch": 0.99, + "learning_rate": 2.4436591670290444e-09, + "loss": 0.4765, + "step": 7900 + }, + { + "epoch": 0.99, + "learning_rate": 2.380604658507868e-09, + "loss": 0.4947, + "step": 7901 + }, + { + "epoch": 0.99, + "learning_rate": 2.3183741319859896e-09, + "loss": 0.4763, + "step": 7902 + }, + { + "epoch": 0.99, + "learning_rate": 2.256967597722981e-09, + "loss": 0.4426, + "step": 7903 + }, + { + "epoch": 0.99, + "learning_rate": 2.1963850658440757e-09, + "loss": 0.4559, + "step": 7904 + }, + { + "epoch": 0.99, + "learning_rate": 2.13662654633795e-09, + "loss": 0.503, + "step": 7905 + }, + { + "epoch": 0.99, + "learning_rate": 2.0776920490561682e-09, + "loss": 0.5267, + "step": 7906 + }, + { + "epoch": 0.99, + "learning_rate": 2.0195815837159573e-09, + "loss": 0.5508, + "step": 7907 + }, + { + "epoch": 0.99, + "learning_rate": 1.962295159899097e-09, + "loss": 0.5026, + "step": 7908 + }, + { + "epoch": 0.99, + "learning_rate": 1.9058327870496995e-09, + "loss": 0.472, + "step": 7909 + }, + { + "epoch": 0.99, + "learning_rate": 1.8501944744775403e-09, + "loss": 0.5524, + "step": 7910 + }, + { + "epoch": 0.99, + "learning_rate": 1.7953802313558365e-09, + "loss": 0.4558, + "step": 7911 + }, + { + "epoch": 0.99, + "learning_rate": 1.7413900667218043e-09, + "loss": 0.5568, + "step": 7912 + }, + { + "epoch": 0.99, + "learning_rate": 1.6882239894777663e-09, + "loss": 0.513, + "step": 7913 + }, + { + "epoch": 0.99, + "learning_rate": 1.6358820083889338e-09, + "loss": 0.5144, + "step": 7914 + }, + { + "epoch": 0.99, + "learning_rate": 1.58436413208507e-09, + "loss": 0.5108, + "step": 7915 + }, + { + "epoch": 0.99, + "learning_rate": 1.5336703690604915e-09, + "loss": 0.4665, + "step": 7916 + }, + { + "epoch": 0.99, + "learning_rate": 1.4838007276729572e-09, + "loss": 0.4855, + "step": 7917 + }, + { + "epoch": 0.99, + "learning_rate": 1.4347552161453337e-09, + "loss": 0.5138, + "step": 7918 + }, + { + "epoch": 0.99, + "learning_rate": 1.3865338425633756e-09, + "loss": 0.5487, + "step": 7919 + }, + { + "epoch": 0.99, + "learning_rate": 1.339136614877945e-09, + "loss": 0.4346, + "step": 7920 + }, + { + "epoch": 0.99, + "learning_rate": 1.2925635409039016e-09, + "loss": 0.4745, + "step": 7921 + }, + { + "epoch": 0.99, + "learning_rate": 1.2468146283195482e-09, + "loss": 0.4957, + "step": 7922 + }, + { + "epoch": 0.99, + "learning_rate": 1.2018898846671846e-09, + "loss": 0.5473, + "step": 7923 + }, + { + "epoch": 0.99, + "learning_rate": 1.157789317355329e-09, + "loss": 0.5175, + "step": 7924 + }, + { + "epoch": 0.99, + "learning_rate": 1.1145129336537218e-09, + "loss": 0.4702, + "step": 7925 + }, + { + "epoch": 0.99, + "learning_rate": 1.0720607406977669e-09, + "loss": 0.5378, + "step": 7926 + }, + { + "epoch": 0.99, + "learning_rate": 1.0304327454868645e-09, + "loss": 0.5727, + "step": 7927 + }, + { + "epoch": 0.99, + "learning_rate": 9.896289548849692e-10, + "loss": 0.469, + "step": 7928 + }, + { + "epoch": 0.99, + "learning_rate": 9.496493756189217e-10, + "loss": 0.5498, + "step": 7929 + }, + { + "epoch": 0.99, + "learning_rate": 9.104940142806718e-10, + "loss": 0.2261, + "step": 7930 + }, + { + "epoch": 0.99, + "learning_rate": 8.721628773256107e-10, + "loss": 0.206, + "step": 7931 + }, + { + "epoch": 0.99, + "learning_rate": 8.346559710742386e-10, + "loss": 0.5162, + "step": 7932 + }, + { + "epoch": 0.99, + "learning_rate": 7.979733017099423e-10, + "loss": 0.4785, + "step": 7933 + }, + { + "epoch": 0.99, + "learning_rate": 7.621148752812169e-10, + "loss": 0.5056, + "step": 7934 + }, + { + "epoch": 0.99, + "learning_rate": 7.270806976994449e-10, + "loss": 0.475, + "step": 7935 + }, + { + "epoch": 0.99, + "learning_rate": 6.928707747416719e-10, + "loss": 0.4922, + "step": 7936 + }, + { + "epoch": 0.99, + "learning_rate": 6.594851120478308e-10, + "loss": 0.4847, + "step": 7937 + }, + { + "epoch": 1.0, + "learning_rate": 6.269237151229623e-10, + "loss": 0.4571, + "step": 7938 + }, + { + "epoch": 1.0, + "learning_rate": 5.951865893344399e-10, + "loss": 0.5177, + "step": 7939 + }, + { + "epoch": 1.0, + "learning_rate": 5.6427373991641e-10, + "loss": 0.518, + "step": 7940 + }, + { + "epoch": 1.0, + "learning_rate": 5.341851719642411e-10, + "loss": 0.5174, + "step": 7941 + }, + { + "epoch": 1.0, + "learning_rate": 5.049208904395197e-10, + "loss": 0.5473, + "step": 7942 + }, + { + "epoch": 1.0, + "learning_rate": 4.764809001672754e-10, + "loss": 0.4632, + "step": 7943 + }, + { + "epoch": 1.0, + "learning_rate": 4.4886520583653506e-10, + "loss": 0.4665, + "step": 7944 + }, + { + "epoch": 1.0, + "learning_rate": 4.2207381199976805e-10, + "loss": 0.488, + "step": 7945 + }, + { + "epoch": 1.0, + "learning_rate": 3.96106723075107e-10, + "loss": 0.5007, + "step": 7946 + }, + { + "epoch": 1.0, + "learning_rate": 3.709639433430168e-10, + "loss": 0.577, + "step": 7947 + }, + { + "epoch": 1.0, + "learning_rate": 3.4664547694962525e-10, + "loss": 0.5103, + "step": 7948 + }, + { + "epoch": 1.0, + "learning_rate": 3.2315132790394774e-10, + "loss": 0.509, + "step": 7949 + }, + { + "epoch": 1.0, + "learning_rate": 3.004815000801076e-10, + "loss": 0.5217, + "step": 7950 + }, + { + "epoch": 1.0, + "learning_rate": 2.7863599721511537e-10, + "loss": 0.4999, + "step": 7951 + }, + { + "epoch": 1.0, + "learning_rate": 2.5761482291164486e-10, + "loss": 0.532, + "step": 7952 + }, + { + "epoch": 1.0, + "learning_rate": 2.3741798063470214e-10, + "loss": 0.4913, + "step": 7953 + }, + { + "epoch": 1.0, + "learning_rate": 2.180454737144011e-10, + "loss": 0.4639, + "step": 7954 + }, + { + "epoch": 1.0, + "learning_rate": 1.9949730534485346e-10, + "loss": 0.4991, + "step": 7955 + }, + { + "epoch": 1.0, + "learning_rate": 1.817734785847236e-10, + "loss": 0.4715, + "step": 7956 + }, + { + "epoch": 1.0, + "learning_rate": 1.6487399635556346e-10, + "loss": 0.522, + "step": 7957 + }, + { + "epoch": 1.0, + "learning_rate": 1.4879886144403278e-10, + "loss": 0.4492, + "step": 7958 + }, + { + "epoch": 1.0, + "learning_rate": 1.3354807649967881e-10, + "loss": 0.543, + "step": 7959 + }, + { + "epoch": 1.0, + "learning_rate": 1.1912164403826697e-10, + "loss": 0.495, + "step": 7960 + }, + { + "epoch": 1.0, + "learning_rate": 1.0551956643789496e-10, + "loss": 0.5413, + "step": 7961 + }, + { + "epoch": 1.0, + "learning_rate": 9.274184594065815e-11, + "loss": 0.5302, + "step": 7962 + }, + { + "epoch": 1.0, + "learning_rate": 8.078848465375987e-11, + "loss": 0.5132, + "step": 7963 + }, + { + "epoch": 1.0, + "learning_rate": 6.965948454840111e-11, + "loss": 0.5534, + "step": 7964 + }, + { + "epoch": 1.0, + "learning_rate": 5.935484745867026e-11, + "loss": 0.5195, + "step": 7965 + }, + { + "epoch": 1.0, + "learning_rate": 4.98745750837637e-11, + "loss": 0.5225, + "step": 7966 + }, + { + "epoch": 1.0, + "learning_rate": 4.121866898687543e-11, + "loss": 0.5203, + "step": 7967 + }, + { + "epoch": 1.0, + "learning_rate": 3.338713059519716e-11, + "loss": 0.5818, + "step": 7968 + }, + { + "epoch": 1.0, + "learning_rate": 2.6379961199918258e-11, + "loss": 0.5039, + "step": 7969 + }, + { + "epoch": 1.0, + "learning_rate": 2.0197161956225785e-11, + "loss": 0.5394, + "step": 7970 + }, + { + "epoch": 1.0, + "learning_rate": 1.483873388330448e-11, + "loss": 0.5233, + "step": 7971 + }, + { + "epoch": 1.0, + "learning_rate": 1.0304677865446978e-11, + "loss": 0.5466, + "step": 7972 + }, + { + "epoch": 1.0, + "learning_rate": 6.594994649278263e-12, + "loss": 0.5142, + "step": 7973 + }, + { + "epoch": 1.0, + "learning_rate": 3.709684847086337e-12, + "loss": 0.5591, + "step": 7974 + }, + { + "epoch": 1.0, + "learning_rate": 1.6487489340466512e-12, + "loss": 0.5043, + "step": 7975 + } + ], + "logging_steps": 1.0, + "max_steps": 7977, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 1595, + "total_flos": 2.4284796557040026e+17, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +}